python爬虫豆瓣推理书籍及链接

前言

因为喜欢的妹子特别喜欢推理书籍,因此就想用python把豆瓣上的推理书籍给爬下来,让她惊喜惊喜,但是好像没达到预期的目的。先上一张爬到的数据图片。
这里写图片描述

代码

我的代码比较简单只实现基本的功能大家可以自己改

# -*- coding: utf8 -*-
from bs4 import BeautifulSoup
import requests
from openpyxl import Workbook
import sys
reload(sys)
sys.setdefaultencoding('utf8')
f = open("D:\\python\\demos\\crawler\\DouBanSpider\\dybooks.txt", "a")
def get_html(url):
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'}
    html = requests.get(url,headers=header).content
    return html
def get_con(html):
    soup = BeautifulSoup(html,'html.parser')      
    for href in soup.find_all('li',class_='subject-item'):
        cnt=0
        for link in href.find_all("a"):
            if cnt==1:
                links=link.get('href')
                name=link.get('title')    
            cnt=cnt+1;
        for detail in href.find(class_='pub'):
            details= detail.string
        for rank in href.find(class_='rating_nums'):
            rating=rank.string        
            f.write("书籍名称: "+name+"\n")
            f.write("信息: "+details+"\n")
            f.write("评分: "+rating+"\n")
            f.write("链接: "+links+"\n")
            f.write("\n\n")    
def main():
    url = 'https://book.douban.com/tag/推理?start='
    for i in range(0,90):
        urls=url+str(i*20)+"&type=T"
        html=get_html(urls)
        get_con(html)
    f.close()
if __name__ == '__main__':
    main()

猜你喜欢

转载自blog.csdn.net/u013309870/article/details/80184592
今日推荐