爬取buff商城饰品的最低价(scrapy)

1.创建scrapy项目

安装包

pip install scrapy

然后进入项目文件夹

scrapy startproject 项目名
scrapy genspider 爬虫名 域名

以buff为例

scrapy startproject buff
scrapy genspider buffprice www.xxx.com 

创建完成后
在这里插入图片描述

2.获取网易buff饰品的api和参数

在这里插入图片描述
在这里插入图片描述
url:https://buff.163.com/api/market/goods

3.编写爬虫

class BuffpriceSpider(scrapy.Spider):
    name = 'buffprice'
    urlp='https://buff.163.com/api/market/goods/price_history/buff'


    cookies=[{
    
    'Device-Id': 'DHupMTHY21dU88DHKKxm', ' Locale-Supported': 'zh-Hans', ' game': 'csgo', ' NTES_YD_SESS': 'sdRnq33P8uTwN2I2T.Axt5ZsW1pp1BTXHPCMK5t9QQ3a8Ftg8d_0OTbKnOw.peSOyyyDJLbZIJUImbyxKp7aaAOMuS2KxTi30YeARv7oz3XyhNlQRrqf.USZjJuDGZpTo2wmqT3iCGlGTbK565Uvfb.cjfSje2TaZmLB4lJOiOOovY2KF0nkpTyzJ7SJvPLjxpXvBFD7fBdYFhXW4LirCUwfLWXubq3aPqUMqUwkouyz.', ' S_INFO': '1681704803|0|0&60##|16581939586', ' P_INFO': '16581939586|1681704803|1|netease_buff|00&99|sic&1681652238&netease_buff#sic&510100#10#0#0|&0|null|16581939586', ' remember_me': 'U1106380554|Pj2YpQ0dSCQgZHWo7BmX3jA2wXRqrE2V', ' session': '1-xOpClokz5XCP2uTasjgKvqzYAfLeb88QyDW32zL9l6HO2034014290', ' csrf_token': 'IjQ3ZmM4OGNlMjdiMDE3OTZlZmE4YWMzOTUxM2NjYWE3YTFlYmIwM2Qi.Fx5Y9A.c7j1TlR1aO4xZydZbWkLJb2f-ik'},{
    
    'Device-Id': 'Qd13dM33YjFu5uEyOUVS', ' Locale-Supported': 'zh-Hans', ' game': 'csgo', ' NTES_YD_SESS': 'BERnNqenNhpaucVh8kJsjAPXbrbFeg4pUOEy8neUuuTstMe.tG1Vzrs2hE227KgKO7KpArtQohZQhKNq0weIGbSYTD5aGpmTUyVAUcHRJfJ3Y1Gmxla66AHqmRdLkqFraCIb5rTPEkDkr48nYnQj74JKm7ZmpCrsqbgf0DRzPzzPBvVV4rCHOgKslzOmHgvsstPr34SKbMI938rA0uNuBFcEEhfHag5c1w4NywkGpcnvJ', ' S_INFO': '1681704859|0|0&60##|17030031991', ' P_INFO': '17030031991|1681704859|1|netease_buff|00&99|sic&1681696671&netease_buff#sic&510100#10#0#0|&0|null|17030031991', ' remember_me': 'U1106382607|PQD0AqsSJTaP3alxmZCpFdROPToSdNaY', ' session': '1-myjrLAIqOnbtflxm5vLjJz4ty8l_Wb48tWMRjLKVJMa72034012247', ' csrf_token': 'IjFiZjFlNTZmZDMxZTM2MzdkN2NlZWM0NDhmMDgyMTRjMmYwYWQ3YzEi.Fx5ZIA.MHVgUgEfQOTTXQXd18QiAapUJlM'},{
    
    'Device-Id': 'rKHlSmE6bFUmCirkgjDk', ' Locale-Supported': 'zh-Hans', ' game': 'csgo', ' NTES_YD_SESS': 'QFAzQExV0cDcg34ReIIaSG3gAEDH2ouJgcb21vZfHHJUSnZrSDeaKRZjluKgwHQ5xag0LXxXWURcnIUea684IOuph1GduKWywqrMRcJcHuNWjgk2EsSAqBqMxil3LM5R9mVwyRJYbLWLRI1vzv7EdIh_xdFxqmRUMwTuoWiKYKKlPNuZ9QbSzRH71l4dLH7N32M5_2QqH_v6Ad9qPl08as6vE46FR5WM5R04tPOyMFt.h', ' S_INFO': '1681704912|0|0&60##|16537761833', ' P_INFO': '16537761833|1681704912|1|netease_buff|00&99|sic&1681696581&netease_buff#sic&510100#10#0#0|&0|null|16537761833', ' remember_me': 'U1106387923|aqxeUnC6vHMes8UTpb84UL7CRqhzUIJS', ' session': '1-KaY6HG6_1rCK3vhnT8h1Wm1D2-ia45IK-dr4nkE68MsF2034023563', ' csrf_token': 'ImUyOWYzYzUwNzE1ODVkODdhMjM5ODBkNGNlY2NiYzU5ZDgzNGRmNWIi.Fx5ZVw.THhhI3En7ft8vNNBT6K--d5s3qY'}]
    a=1
    #allowed_domains = ['www.xxx.com']
    start_urls = ['https://buff.163.com/api/market/goods?game=csgo&page_num=4&use_suggestion=0&_=1681384999985']

我们从第一页开始爬取所以起始url用第一页的url
由于这些数据都要等录后才能查看所以我们要记录cookie

    def parse(self, response):
        response=response.json()
        res = response['data']['items']
        for i in res:
            id = i['id']
            name = i['name']
            appid = i['appid']
            sell_num=i['sell_num']
            sell_min_price=i['sell_min_price']
        self.a=self.a+1
        data={
    
    
                'game': 'csgo',
                'page_num': str(self.a) ,
                'use_suggestion': '0',
                '_': tim()
            }
        yield scrapy.FormRequest(url='https://buff.163.com/api/market/goods',cookies=random.choice(self.cookies),method='GET',callback=self.parse,formdata=data)
def tim():
    t = time.time()
    vb = (str(round(t * 1000)))  # vb
    return str(vb)

我们将数据转换为json格式便于数据的提取
这里tim()函数是用来生成_参数一个毫秒级的时间戳
因为我们要负载一个参数所以这里使用scrapy.FormRequest
method=‘GET’ 声明发送方法
callback=self.parse 声明获取的数据传输给哪个函数处理
formdata=data 类似于request.get中的params resquest.post中的data
cookies= 添加cookie这个参数要将cookie的值反序列化

cookie='Device-Id=rKHlSmE6bFUmCirkgjDk; Locale-Supported=zh-Hans; game=csgo; NTES_YD_SESS=QFAzQExV0cDcg34ReIIaSG3gAEDH2ouJgcb21vZfHHJUSnZrSDeaKRZjluKgwHQ5xag0LXxXWURcnIUea684IOuph1GduKWywqrMRcJcHuNWjgk2EsSAqBqMxil3LM5R9mVwyRJYbLWLRI1vzv7EdIh_xdFxqmRUMwTuoWiKYKKlPNuZ9QbSzRH71l4dLH7N32M5_2QqH_v6Ad9qPl08as6vE46FR5WM5R04tPOyMFt.h; S_INFO=1681704912|0|0&60##|16537761833; P_INFO=16537761833|1681704912|1|netease_buff|00&99|sic&1681696581&netease_buff#sic&510100#10#0#0|&0|null|16537761833; remember_me=U1106387923|aqxeUnC6vHMes8UTpb84UL7CRqhzUIJS; session=1-KaY6HG6_1rCK3vhnT8h1Wm1D2-ia45IK-dr4nkE68MsF2034023563; csrf_token=ImUyOWYzYzUwNzE1ODVkODdhMjM5ODBkNGNlY2NiYzU5ZDgzNGRmNWIi.Fx5ZVw.THhhI3En7ft8vNNBT6K--d5s3qY'
cookies={
    
     data.split('=')[0] : data.split('=')[1] for data in cookie.split(';')}
print(cookies)

4.setting设置

ROBOTSTXT_OBEY = False 改为False
COOKIES_ENABLED = False  解注释
DEFAULT_REQUEST_HEADERS = {
'cookie': 'Device-Id=DHupMTHY21dU88DHKKxm; Locale-Supported=zh-Hans; game=csgo; NTES_YD_SESS=sdRnq33P8uTwN2I2T.Axt5ZsW1pp1BTXHPCMK5t9QQ3a8Ftg8d_0OTbKnOw.peSOyyyDJLbZIJUImbyxKp7aaAOMuS2KxTi30YeARv7oz3XyhNlQRrqf.USZjJuDGZpTo2wmqT3iCGlGTbK565Uvfb.cjfSje2TaZmLB4lJOiOOovY2KF0nkpTyzJ7SJvPLjxpXvBFD7fBdYFhXW4LirCUwfLWXubq3aPqUMqUwkouyz.; S_INFO=1681704803|0|0&60##|16581939586; P_INFO=16581939586|1681704803|1|netease_buff|00&99|sic&1681652238&netease_buff#sic&510100#10#0#0|&0|null|16581939586; remember_me=U1106380554|Pj2YpQ0dSCQgZHWo7BmX3jA2wXRqrE2V; session=1-xOpClokz5XCP2uTasjgKvqzYAfLeb88QyDW32zL9l6HO2034014290; csrf_token=IjQ3ZmM4OGNlMjdiMDE3OTZlZmE4YWMzOTUxM2NjYWE3YTFlYmIwM2Qi.Fx5Y9A.c7j1TlR1aO4xZydZbWkLJb2f-ik',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.34'

}
#为起始请求添加cookie
~~~## 标题

猜你喜欢

转载自blog.csdn.net/qq_62975494/article/details/130200618