import requests
from bs4 import BeautifulSoup
#有需要Python学习资料的小伙伴吗?小编整理【一套Python资料、源码和PDF】,感兴趣者可以加学习群:548377875,反正闲着也是闲着呢,不如学点东西啦~~
def parse_html(num):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
response = requests.get(f'https://book.douban.com/top250?start={num}', headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
# 书名
all_div = soup.find_all('div', class_='pl2')
book_names = [div.find('a')['title'] for div in all_div]
# 图书信息
all_p = soup.find_all('p', class_='pl')
book_infos = [p.get_text() for p in all_p]
# 评分
all_span = soup.find_all('span', class_='rating_nums')
book_rates = [span.get_text() for span in all_span]
# 简介
all_quote = soup.find_all('span', class_='inq')
book_inqs = [quote.get_text() for quote in all_quote]
d = ''
# 组合
for name, info, rate, inq in zip(book_names, book_infos, book_rates, book_inqs):
d = f'{d}书名:{name}\n作者:{info}\n评分:{rate}\n简介:{inq}\n=======================\n'
return d
if __name__ == '__main__':
data = ''
for i in range(0, 250, 25):
data = f'{data}{parse_html(i)}'
filename = '豆瓣图书Top250.txt'
with open(filename, 'w', encoding='utf-8') as f:
f.write(data)
Python 爬虫-豆瓣读书
猜你喜欢
转载自blog.csdn.net/qq_40925239/article/details/82890316
今日推荐
周排行