spider随机请求头和ip

#创建爬虫

scrapy genspider randomIp_spider "taobao.com"

#把需要请求的url放到一个混淆的url请求list中去,避免被监测到总是访问此页面

import random

url_list = [
         'https://detail.tmall.com/item.htm?id=522194707780&ali_refid=a3_430583_1006:1109696291:N:%E6%B6%88%E9%98%B2%E5%BA%94%E6%80%A5%E7%81%AF:eb9682757281a9ec406cb4647d3f584a&ali_trackid=1_eb9682757281a9ec406cb4647d3f584a&spm=a230r.1.14.3',
         'https://item.taobao.com/item.htm?spm=a219r.lmn002.14.1.f3b87156TcpPbp&id=587398066660&ns=1&abbucket=16',
         'https://item.taobao.com/item.htm?spm=a230r.1.14.50.1af3248cr0GGyM&id=576997844987&ns=1&abbucket=16#detail'
    ]
    #随机去一个访问链接
    start_urls = random.choice(url_list)

#到middlewares.py文件中去

#设置随机请求头
class UserAgentDownloadMiddleware(object):
    USER_AGENTS = [
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; pl-PL; rv:1.0.1) Gecko/20021111 Chimera/0.6',
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; pl-PL; rv:1.0.1)   Gecko/20021111 Chimera/0.6',
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.0.1) Gecko/20021111 Chimera/0.6',
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.0.1) Gecko/20021104 Chimera/0.6',
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030111 Chimera/0.6',
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20030109 Chimera/0.6',
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021220 Chimera/0.6',
        'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.0.1) Gecko/20021216 Chimera/0.6'
    ]

    def process_request(self, request, spider):
       user_agent = random.choice(self.USER_AGENTS)
       request.headers['User-Agent'] = user_agent
#设置随机ip
class IPProxyDownloadMiddleware(object):
    PROXIES = [
        '222.190.163.141:45334',
        '183.143.73.146:31998',
        '115.216.58.182:43060',
        '116.209.129.167:27158',
        '60.167.23.29.205:44728'
    ]


    def process_request(self, request, spider):
        proxy = random.choice(self.PROXIES)
        print('+' * 40)
        print(proxy)
        request.meta['proxy'] = proxy

        f = open("texr.json", encoding='utf-8')
        setting = json.load(f)
        family = setting['BaseSettings']['size']

#到settings.py文件中去,修改如下配置

DOWNLOADER_MIDDLEWARES = {
#随机请求头
'taobao_for_attack.middlewares.UserAgentDownloadMiddleware': 543,、
#随机ip
'taobao_for_attack.middlewares.IPProxyDownloadMiddleware': 124, }

猜你喜欢

转载自www.cnblogs.com/shaoqizhi/p/10485835.html