from scrapy import signals import random class Test001UseragentMiddleware(object): USER_AGENT=[ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1", #Chrome 浏览器 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0", # 火狐浏览器 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)", #IE浏览器 "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; GTB7.0)", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0;", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0;rv:11.0) like Gecko", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)" , #搜狗浏览器 "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;360SE)",#360浏览器 "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;TencentTraveler 4.0)"#QQ浏览器 ] def process_request(self, request, spider): user_agent = random.choice(self.USER_AGENT) request.headers["user-agent"] = user_agent class CheckUA: def process_response(self,request,response,spider): if response.status != '200': request.dont_filter = True # 重新发送的请求对象能够再次进入队列 return response # 不能少! class RandomProxy(object): IP = [ '//118.31.250.72:8080' ] def process_request(self, request, spider): proxy = random.choice(self.IP) request.meta['proxy'] = "http:%s" %proxy
python小白学习记录 scrapy设置随机请求头设置免费代理ip
猜你喜欢
转载自www.cnblogs.com/jswf/p/12340684.html
今日推荐
周排行