问题描述:
比如一个网站,我们在浏览器可以正常打开,但是使用selenium模拟器却不可以,这是被反扒了,或许尝试,添加代理插件的方法,可以解决这一问题.
代码如下:
定义一个获取代理插件zip文件的函数
from selenium import webdriver
import os
import re
import zipfile
def get_chrome_proxy_extension(proxy):
"""获取一个Chrome代理扩展,里面配置有指定的代理(带用户名密码认证)
proxy - 指定的代理,格式: username:password@ip:port
"""
m = re.compile('([^:]+):([^\@]+)\@([\d\.]+):(\d+)').search(proxy)
if m:
# 提取代理的各项参数
username = m.groups()[0]
password = m.groups()[1]
ip = m.groups()[2]
port = m.groups()[3]
# 创建一个定制Chrome代理扩展(zip文件)
if not os.path.exists(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR):
os.mkdir(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR)
extension_file_path = os.path.join(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR, '{}.zip'.format(proxy.replace(':', '_')))
if not os.path.exists(extension_file_path):
# 扩展文件不存在,创建
zf = zipfile.ZipFile(extension_file_path, mode='w')
zf.write(os.path.join(CHROME_PROXY_HELPER_DIR, 'manifest.json'), 'manifest.json')
# 替换模板中的代理参数
background_content = open(os.path.join(CHROME_PROXY_HELPER_DIR, 'background.js')).read()
background_content = background_content.replace('YOU_PROXY_ADDRESS', ip)
background_content = background_content.replace('YOUR_PROXY_PORT', port)
background_content = background_content.replace('YOUR_PROXY_USERNAME', username)
background_content = background_content.replace('YOUR_PROXY_PASSWORD', password)
zf.writestr('background.js', background_content)
zf.close()
return extension_file_path
else:
raise Exception('Invalid proxy format. Should be username:password@ip:port')
background.js文件内容
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "YOU_PROXY_ADDRESS",
port: parseInt(YOUR_PROXY_PORT)
},
bypassList: ["foobar.com"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "YOUR_PROXY_USERNAME",
password: "YOUR_PROXY_PASSWORD"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
manifest.json文件内容
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
目录结构
if __name__ == "__main__":
# 测试
options = webdriver.ChromeOptions()
# 添加一个自定义的代理插件(配置特定的代理,含用户名密码认证)
options.add_extension(get_chrome_proxy_extension(proxy='username:pwd@host:port'))
driver = webdriver.Chrome(chrome_options=options,executable_path=r'F:\MyDatas\chromedriver.exe')
# 访问一个IP回显网站,查看代理配置是否生效了
driver.get('https://makeabooking.flyscoot.com/Member?at=signup&culture=zh-CN')
time.sleep(30)
经过测试ok