关于python爬虫

爬取虎牙直播间信息

1.代码

import requestsfrom lxml 
import etreeimport json
#创建文件 def create__file( file_path,msg):    f =open( file_path,"a", encoding='utf-8')    f.write(msg)    f.close
 stat_url = 'https://www.huya.com/l'headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'}html_obj = requests.get(url= stat_url,headers=headers).textpage_obj = etree.HTML(html_obj)page_list = page_obj.xpath('//div[@class="list-page"]/@data-pages')for i in page_list:    numbers = range( 0,int(i))    for number in numbers:        url = 'https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&tagAll=0&page='+str(number)        response = requests.get(url= url,headers=headers).text        datas = json.loads(response)        data1 = datas["data"]["datas"]        for data2 in data1:            print("房间类型:"+data2['gameFullName'])            print("房间名称:"+data2['roomName'])            print("主播名称:"+data2['nick'])            print("粉丝量:  "+data2['totalCount'])            print("-"*100)            content = "\n房间类型:"+data2['gameFullName']+"\n房间名称:"+data2['roomName']+"\n主播名称:"+data2['nick']+"\n粉丝量:  "+data2['totalCount']            create__file("d:\\huya.txt", content)
 

2.使用到的结构以及语法

使用了创建文件,嵌套for循环,发送方请求,拿到请求响应,对请求响应做相关的处理。

3.运行结果

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/weixin_41340417/article/details/107218908
今日推荐