云起小说

# -*- coding: utf-8 -*-
import scrapy
from ..items import BookListItem,BookDetailItem


class YunqiSpider(scrapy.Spider):
    name = 'yunqi'
    allowed_domains = ['qq.com']
    start_urls = ['http://yunqi.qq.com/bk/so2/n10p1']


    def parse(self, response):


        # 找到小说div
        divs = response.xpath('//div[@class="book"]')
        # for循环遍历小说
        for novel in divs:
            # 小说链接
            novel_link = novel.xpath('a/@href').extract_first('')
            img_src = novel.xpath('a/img/@src').extract_first('')
            novel_id = novel.xpath('div/h3/a/@id').extract_first('')
            novel_title = novel.xpath('div/h3/a/text()').extract_first('')
            info = novel.xpath('div/dl/dd//text()').extract()
            novel_auth = info[0]
            novel_categray = info[1]
            novel_status = info[2]
            novel_date = info[3]
            novel_numbers= info[4]
            # 创建对象
            item = BookListItem(novel_link=novel_link,img_src=[img_src],novel_id=novel_id,novel_title=novel_title,novel_auth=novel_auth,novel_categray=novel_categray,novel_status=novel_status,novel_date=novel_date,novel_numbers=novel_numbers)
            yield item
            # 进入详情页面
            yield scrapy.Request(
                url=novel_link,
                callback=self.parse_detail,
                meta={'novel_id':novel_id}
            )


        # 找到下一页s,继续爬取下一页
        pages = response.xpath('//div[@id="pageHtml2"]/a')


        next_link = pages[-1].xpath('@href').extract_first('')


        if next_link:
            yield scrapy.Request(
                url=next_link
            )




    def parse_detail(self, response):


        # 小说id
        novel_id = response.meta.get('novel_id')
        # 小说标签
        tags = response.xpath('//div[@class="tags"]/text()').extract_first('').split(':')[-1].strip('\r\n ')
        # 小说信息
        infos = response.xpath('//div[@id="novelInfo"]/table/tr/td//text()').extract()
        # 小说总点击
        novelAllClick = infos[3].split(':')[-1]
        # 月点击
        novelMonClick = infos[6].split(':')[-1]
        # 周点击
        novelWeekClick = infos[9].split(':')[-1]
        # 总人气
        novelAllPopular = infos[4].split(':')[-1]
        # 月人气
        novelMonPopular = infos[7].split(':')[-1]
        # 周人气
        novelWeekPopular = infos[10].split(':')[-1]
        # 总推荐
        novelAllComm = infos[5].split(':')[-1]
        # 月推荐
        novelMonComm = infos[8].split(':')[-1]
        # 周推荐
        novelWeekComm = infos[11].split(':')[-1]
        # 评论数
        CommentNums = response.xpath('//span[@id="novelInfo_commentCount"]/text()').extract_first('')


        # 创建item对象
        item = BookDetailItem(novel_id=novel_id,tags=tags,novelAllClick=novelAllClick,novelMonClick=novelMonClick,novelWeekClick=novelWeekClick,novelAllPopular=novelAllPopular,novelMonPopular=novelMonPopular,novelWeekPopular=novelWeekPopular,novelAllComm=novelAllComm,novelMonComm=novelMonComm,novelWeekComm=novelWeekComm,CommentNums=CommentNums)


        yield item























猜你喜欢

转载自blog.csdn.net/baidu_32542573/article/details/80444534