关于selenium中chrome被反扒的问题(1)

问题描述:

       比如一个网站,我们在浏览器可以正常打开,但是使用selenium模拟器却不可以,这是被反扒了,或许尝试,添加代理插件的方法,可以解决这一问题.

代码如下:

定义一个获取代理插件zip文件的函数

from selenium import webdriver
import os
import re
import zipfile


def get_chrome_proxy_extension(proxy):
    """获取一个Chrome代理扩展,里面配置有指定的代理(带用户名密码认证)
    proxy - 指定的代理,格式: username:password@ip:port
    """
    m = re.compile('([^:]+):([^\@]+)\@([\d\.]+):(\d+)').search(proxy)
    if m:
        # 提取代理的各项参数
        username = m.groups()[0]
        password = m.groups()[1]
        ip = m.groups()[2]
        port = m.groups()[3]
        # 创建一个定制Chrome代理扩展(zip文件)
        if not os.path.exists(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR):
            os.mkdir(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR)
        extension_file_path = os.path.join(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR, '{}.zip'.format(proxy.replace(':', '_')))
        if not os.path.exists(extension_file_path):
            # 扩展文件不存在,创建
            zf = zipfile.ZipFile(extension_file_path, mode='w')
            zf.write(os.path.join(CHROME_PROXY_HELPER_DIR, 'manifest.json'), 'manifest.json')
            # 替换模板中的代理参数
            background_content = open(os.path.join(CHROME_PROXY_HELPER_DIR, 'background.js')).read()
            background_content = background_content.replace('YOU_PROXY_ADDRESS', ip)
            background_content = background_content.replace('YOUR_PROXY_PORT', port)
            background_content = background_content.replace('YOUR_PROXY_USERNAME', username)
            background_content = background_content.replace('YOUR_PROXY_PASSWORD', password)
            zf.writestr('background.js', background_content)
            zf.close()
        return extension_file_path
    else:
        raise Exception('Invalid proxy format. Should be username:password@ip:port')

background.js文件内容

var config = {
    mode: "fixed_servers",
    rules: {
      singleProxy: {
        scheme: "http",
        host: "YOU_PROXY_ADDRESS",
        port: parseInt(YOUR_PROXY_PORT)
      },
      bypassList: ["foobar.com"]
    }
  };

chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});

function callbackFn(details) {
    return {
        authCredentials: {
            username: "YOUR_PROXY_USERNAME",
            password: "YOUR_PROXY_PASSWORD"
        }
    };
}

chrome.webRequest.onAuthRequired.addListener(
        callbackFn,
        {urls: ["<all_urls>"]},
        ['blocking']
);

manifest.json文件内容

{
    "version": "1.0.0",
    "manifest_version": 2,
    "name": "Chrome Proxy",
    "permissions": [
        "proxy",
        "tabs",
        "unlimitedStorage",
        "storage",
        "<all_urls>",
        "webRequest",
        "webRequestBlocking"
    ],
    "background": {
        "scripts": ["background.js"]
    },
    "minimum_chrome_version":"22.0.0"
}

目录结构

if __name__ == "__main__":
    # 测试
    options = webdriver.ChromeOptions()
    # 添加一个自定义的代理插件(配置特定的代理,含用户名密码认证)
    options.add_extension(get_chrome_proxy_extension(proxy='username:pwd@host:port'))
    driver = webdriver.Chrome(chrome_options=options,executable_path=r'F:\MyDatas\chromedriver.exe')
    # 访问一个IP回显网站,查看代理配置是否生效了
    driver.get('https://makeabooking.flyscoot.com/Member?at=signup&culture=zh-CN')
    time.sleep(30)

 经过测试ok

猜你喜欢

转载自blog.csdn.net/qq_38839677/article/details/83303215