selenium 记录 performance日志

做爬虫的时候,有时候遇到需要的数据在加载资源当中,通常做法是拼接url,然后获取数据,但首先需要进行分析,如果拼接中的参数有加密的情况时,如果不能模拟算法生成正确的参数,那就很头疼。而访问performance,可以获得加载网站时的资源请求信息,可以通过这一特点,获取url和数据。

import time

from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import json

class Mychrome:

    def __init__(self):
        self.options = webdriver.ChromeOptions()
        self.flash_urls = []
        self.set_browser()

    def set_browser(self):

        prefs = {
            "profile.managed_default_content_settings.images": 1,

        }
        if self.flash_urls is not None and len(self.flash_urls) != 0:
            prefs['profile.managed_plugins_allowed_for_urls'] = self.flash_urls
        self.options.add_experimental_option('prefs', prefs)

        # 方法1
        # capabilities = DesiredCapabilities.CHROME
        # capabilities['loggingPrefs'] = {"performance","all"}
        # self.driver = webdriver.Chrome(
        #     desired_capabilities=capabilities
        # )

        # 方法2
        # self.options.add_experimental_option("excludeSwitches", ['enable-automation'])  # window.navigator.webdriver设置为undefined,逃过网站的防爬检查,headless无效
        desired_capabilities = self.options.to_capabilities()  # 将功能添加到options中
        desired_capabilities['loggingPrefs'] = {
            "performance": "ALL"  # 添加日志
        }
        self.driver =webdriver.Chrome(
                    desired_capabilities=desired_capabilities
                )

    def gethtml(self):
        url ='http://www.baidu.com'
        self.driver.get(url)
        print(self.driver.get_log('performance'))
        print('-'*60)
        print(self.driver.get_log('performance'))
        for entry in self.driver.get_log('performance'):
            params = json.loads(entry.get('message')).get('message')
            print(params.get('request'))  # 请求连接 包含错误连接
            print(params.get('response'))  # 响应连接 正确有返回值得连接

if __name__ =='__main__':

    browser = Mychrome().gethtml()
发布了77 篇原创文章 · 获赞 3 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/qq_24137739/article/details/100551059