python小白学习记录 scrapy设置随机请求头设置免费代理ip

from scrapy import signals
import random
class Test001UseragentMiddleware(object):
    USER_AGENT=[
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1", #Chrome 浏览器
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
        "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0", # 火狐浏览器
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", #IE浏览器
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)",
        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.0)",
        "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
        "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)",
        "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;",
        "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0;rv:11.0) like Gecko",
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)" , #搜狗浏览器
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;360SE)",#360浏览器
        "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;TencentTraveler 4.0)"#QQ浏览器
    ]
    def process_request(self, request, spider):
        user_agent = random.choice(self.USER_AGENT)
        request.headers["user-agent"] = user_agent
class CheckUA:
    def process_response(self,request,response,spider):
        if response.status != '200':
            request.dont_filter = True  # 重新发送的请求对象能够再次进入队列
        return response # 不能少!
class RandomProxy(object):
    IP = [
        '//118.31.250.72:8080'
    ]
    def process_request(self, request, spider):
        proxy = random.choice(self.IP)
        request.meta['proxy'] = "http:%s" %proxy

猜你喜欢

转载自www.cnblogs.com/jswf/p/12340684.html