爬取虎牙标题、作者、热度

# -*- coding: utf-8 -*-
import scrapy
from huyaAll1.items import Huyaall1Item


class HuyaSpider(scrapy.Spider):
name = 'huya'
# allowed_domains = ['www.xxx.com']
start_urls = ['https://www.huya.com/g/xingxiu']

# 建立通用的url模板:
url = "https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=%d"

def parse(self, response):
li_list = response.xpath('//*[@id="js-live-list"]/li')
for li in li_list:
title = li.xpath('./a[2]/text()').extract_first()
author = li.xpath('./span/span[1]/i/text()').extract_first()
hot = li.xpath('./span/span[2]/i[2]/text()').extract_first()
# 实例化item类型对象:
item = Huyaall1Item()
item['title'] = title
item['author'] = author
item['hot'] = hot
yield item

# 手动请求发送:
for page in range(2, 5):
new_url = format(self.url % 1)
# 发起get请求:
yield scrapy.Request(url=new_url, callback=self.parse_othor)

# 解析方法模拟parse进行定义、必须要有和parse同样的参数:
def parse_othor(self, response):
print(response.text)

猜你喜欢

转载自www.cnblogs.com/zhang-da/p/12432110.html