''' scrapy 自定义下载中间件 动态设置User-Agent ''' import random class RandomUserAgent: def __init__(self, agents): self.agents = agents @classmethod def from_crawler(cls, crawler): # 从Settings中加载USER_AGENTS的值 return cls(crawler.settings.getlist('USER_AGENTS')) def process_request(self, request, spider): # 在process_request中设置User-Agent的值 request.headers.setdefault('User-Agent', random.choice(self.agents)) ''' 动态设置代理ip ''' class RandomProxy: def __init__(self, iplist): self.iplist = iplist @classmethod def from_crawler(cls, crawler): # 加载IPLIST return cls(crawler.settings.getlist('IPLIST')) def process_request(self, request, spider): proxy = random.choice(self.iplist) request.meta['proxy'] = proxy # 在settings中设置USER_AGENTS和IPLIST,并激活该中间件。
scrapy中自定义下载中间件设置动态User-Agent和代理ip
猜你喜欢
转载自blog.csdn.net/qq_42034590/article/details/80778691
今日推荐
周排行