python使用pupeteer的一些姿势

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/weixin_39198406/article/details/86693240
# 运行浏览器
browser = await pyppeteer.launch(
        executablePath="/home/nick/.local/share/puppeteer/local-chromium/609904/chrome-linux/chrome",
        headless=False,
        args=['--proxy-server=114.115.252.102:10000']
    )

# 新建页面标签对象
page = await browser.newPage()

# 设置视图大小
await page.setViewport({"width": 1900, "height": 1100})
# 设置浏览器头
await page.setUserAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")
# 设置跳转的URL
await page.goto('https://blog.csdn.net/weixin_39198406')

# 页面内容源码
content = await page.content()
# 页面会话的cookies
cookies = await page.cookies()

# 截图和保存pdf
await page.screenshot({'path': 'example.png'})
await page.pdf({'path': 'example.png'})

# J-css选择器定位元素 click-点击元素 等待页面加载完成-waitForNavigation(goto不需要用)
# 延时多久执行 {'delay': input_time_random() + 300}
while True:
    ele = await page.J(".js-page-next")  # 如果元素不存在,ele=None
    if ele:
        await page.click(".js-page-next", {'delay': input_time_random() + 300})
        await page.waitForNavigation({'waitUntil': 'load'})
    else:
        break

# evaluate执行js代码
dimensions = await page.evaluate('''() => {
    return {
        width: document.documentElement.clientWidth,
        height: document.documentElement.clientHeight,
        deviceScaleFactor: window.devicePixelRatio,
    }
}''')
print(dimensions)

# 使用xpath定位元素
ua_ele = await page.xpath("//input[@id='userName']")

# iframe list 找到iframe
frame_list = page.frames
for frame in frame_list:
	if frame.name == "mainFrame":
		print("iframe)

# 设置cookies,假设源cookies是dict
cookies_list = []
if cookies:
    for c in cookies:
        cookie_item = dict()
        cookie_item['name'] = c
        cookie_item['value'] = cookies[c]
        cookie_item['url'] = url
        cookies_list.append(cookie_item)
for cookie in cookies_list:
    await page.setCookie(cookie)

# 关闭浏览器
await browser.close()

# 启动程序
loop = asyncio.get_event_loop()
task = asyncio.ensure_future(main())
loop.run_until_complete(task)
# 滚动到底
await page.evaluate("() => {window.scrollBy(0, document.body.scrollHeight)}")
await page.waitForXPath(need_scroll_wait_for)
await page.waitFor(1000)

注意
如果此方法触发导航事件并且存在单独的事件,则waitForNavigation()最终可能会出现产生意外结果的竞争条件。单击并等待导航的正确模式如下:

await asyncio.gather(
    page.waitForNavigation(waitOptions),
    page.click(selector, clickOptions),
)

猜你喜欢

转载自blog.csdn.net/weixin_39198406/article/details/86693240