scrapy设置代理
- 在setting中添加代理
PROXIES = [
{'ip_port':'xxx.xxx.x.xx(ip地址):xxxx(端口号)','user_passwd':'username:passwd(付费代理)'}
]
- 自定义中间件
import random,base64
from MinYan.settings import PROXIES
class RandomProxy(object):
def process_request(self,request,spider):
proxy = random.choice(PROXIES)
if proxy['user_passwd'] is None:
request.meta['proxy'] = "http://" + proxy['ip_port']
else:
base64_userpasswd = base64.b64encode(proxy['user_passwd'])
request.headers['Proxy-Authorization'] = 'basic' + base64_userpasswd
request.meta['proxy'] = "http://" + proxy['ip_port']
- 在setting中开启DOWNLOADER_MIDDLEWARES