scrapy 最简易爬虫

创建一个项目
scrapy startproject baidu 这后面是名字

#运行项目 要cd 进入项目才可以运行
scrapy crawl baidu


import scrapy
from bs4 import BeautifulSoup as bs

#scrapy crawl quotes

class QuotesSpider(scrapy.Spider):
    
    name = "quotes"

    def start_requests(self):
        urls = [
            'http://top.baidu.com/buzz?b=353&c=10&fr=topcategory_c10',

        ]
        for url in urls:
            
            yield scrapy.Request(url=url, callback=self.parse)


    def parse(self, response):
        
        x = response.css('td.keyword').getall()

        import openpyxl
        wb = openpyxl.Workbook()
        word = wb.active
        
        for i in range(len(x)):

            html = bs(x[i],'lxml')
            ming = html.a.text
            u =html.a.attrs['href']
            self.log(ming)

            A = 'A{0}'.format(i+1)
            B = 'B{0}'.format(i+1)
            
            word[A] = html.a.text
            word[B] = html.a.attrs['href']

        wb.save('小说排行榜.csv')

        self.log('保存完成')


猜你喜欢

转载自blog.csdn.net/AnYeZhiYin/article/details/105455709