关联:python爬虫学习Python安全

网页爬取过程

  1. 获取网页内容
  2. 解析网页内容为可读形式
  3. 储存或分析数据
import requests  
from bs4 import BeautifulSoup  
  
  
head ={  
"Host": "movie.douban.com",  
"Connection": "keep-alive",  
"sec-ch-ua-platform": "Windows",  
"Upgrade-Insecure-Requests": "1",  
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",  
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"  
}  
  
response = requests.get("https://books.toscrape.com/")  
print (response)  
print (response.status_code)  
print (response.ok)  
content= response.text  
soup = BeautifulSoup(content,"html.parser")  
all_price = (soup.findAll("p",attrs={"class":"price_color"}))  
i = 1  
for price in all_price:  
    print ('[*]第'+str(i)+'本书的价格为'+price.string[2:]+'元')  
    i=i+1