关联:python爬虫学习、Python安全
网页爬取过程
- 获取网页内容
- 解析网页内容为可读形式
- 储存或分析数据
import requests
from bs4 import BeautifulSoup
head ={
"Host": "movie.douban.com",
"Connection": "keep-alive",
"sec-ch-ua-platform": "Windows",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"
}
response = requests.get("https://books.toscrape.com/")
print (response)
print (response.status_code)
print (response.ok)
content= response.text
soup = BeautifulSoup(content,"html.parser")
all_price = (soup.findAll("p",attrs={"class":"price_color"}))
i = 1
for price in all_price:
print ('[*]第'+str(i)+'本书的价格为'+price.string[2:]+'元')
i=i+1