版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
selenium网页截图总结(持续更新)不废话直接上代码
方法一
- 优点:(可视化浏览器)可以截长图
import pdfkit
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r".\chromedriver.exe")
driver.get('https://blog.csdn.net/weixin_43695063/article/details/100701356') # 一篇有滚动条博客的url
ac=driver.current_url
confg = pdfkit.configuration(wkhtmltopdf=r'E:\install\wkhtmltopdf\bin\wkhtmltopdf.exe')
pdfkit.from_url(ac, './jmeter11.pdf',configuration=confg)
driver.quit()
方法二
- 全网页截图
- 优点:PhantomJS可以截全图
- 缺点:Chrome和Firefox只能截可视区域
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r".\chromedriver.exe")
driver.get('https://blog.csdn.net/weixin_43695063/article/details/100701356') # 一篇带有滚动条博客的url
driver.save_screenshot('screenshot.png')
driver.quit()
方法三
- 定位区块截图
- 优点:PhantomJS可以截全图
- 缺点:Chrome和Firefox只能截可视区域
from selenium import webdriver
from PIL import Image
driver = webdriver.Chrome(executable_path=r".\chromedriver.exe")
driver.get('https://www.baidu.com')
driver.save_screenshot(r'img.png') # 截一张全图
baidu = driver.find_element_by_id('su') # 截图按钮百度一下
left = baidu.location['x'] # 区块截图左上角在网页中的x坐标
top = baidu.location['y'] # 区块截图左上角在网页中的y坐标
right = left + baidu.size['width'] # 区块截图右下角在网页中的x坐标
bottom = top + baidu.size['height'] # 区块截图右下角在网页中的y坐标
print({"left": left, "top": top, "right": right, "bottom ": bottom})
print("baidu.size['width']:%s" % baidu.size['width'])
print("baidu.size['height']:%s" % baidu.size['height'])
picture = Image.open(r'img.png')
picture = picture.crop((left, top, right, bottom)) # 定位区块截图
picture.save(r'img1.png')
driver.quit()
方法四
- 全网页截图
- 优点:PhantomJS可以截全图
- 缺点:Chrome和Firefox只能截可视区域
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r".\chromedriver.exe")
driver.get('https://blog.csdn.net/weixin_43695063/article/details/100701356') # 一篇带有滚动条博客的url
driver.find_element_by_xpath('//*[@id="container"]').screenshot_as_png
with open('image/img.png', 'wb') as f:
f.write(img)
driver.quit()
方法五
- 全网页截图
- 优点:模拟鼠标键盘操作浏览器
- 缺点:系统运行期间,不可以动光标和键盘
import time
import pyperclip,pyautogui
import win32api
import win32con
from selenium import webdriver
driver = webdriver.Chrome(executable_path=r".\chromedriver.exe")
driver.get('https://blog.csdn.net/weixin_43695063/article/details/100701356') # 一篇带有滚动条博客的url
def baocun_pdf():
# ctrl+p
time.sleep(2)
win32api.keybd_event(17, 0, 0, 0)
win32api.keybd_event(80, 0, 0, 0)
win32api.keybd_event(17, 0, win32con.KEYEVENTF_KEYUP, 0)
win32api.keybd_event(80, 0, win32con.KEYEVENTF_KEYUP, 0)
time.sleep(1)
#两次tap
win32api.keybd_event(9, 0, 0, 0)
win32api.keybd_event(9, 0, win32con.KEYEVENTF_KEYUP, 0)
win32api.keybd_event(9, 0, 0, 0)
win32api.keybd_event(9, 0, win32con.KEYEVENTF_KEYUP, 0)
time.sleep(1)
#一次下
win32api.keybd_event(40, 0, 0, 0)
win32api.keybd_event(40, 0, win32con.KEYEVENTF_KEYUP, 0)
for i in range(7):
time.sleep(0.3)
win32api.keybd_event(9, 0, 0, 0)
win32api.keybd_event(9, 0, win32con.KEYEVENTF_KEYUP, 0)
time.sleep(3)
#点击保存
win32api.keybd_event(13, 0, 0, 0)
win32api.keybd_event(13, 0, win32con.KEYEVENTF_KEYUP, 0)
time.sleep(1)
#将地址以及文件名复制
pic_dir = r'C:\install\.pdf'
pyperclip.copy(pic_dir)
#粘贴
pyautogui.hotkey('ctrlleft','V')
time.sleep(1)
#确认保存
pyautogui.press('enter')
time.sleep(1)
try:
win32api.keybd_event(37, 0, 0, 0)
win32api.keybd_event(37, 0, win32con.KEYEVENTF_KEYUP, 0)
time.sleep(1)
pyautogui.press('enter')
except:
pass
baocun_pdf()
driver.quit()
方法六
- 可视化浏览器截图
- 优点:可视化浏览器截带有滚动条的长图
- 缺点: 自定义截取高度和次数,图片与图片之间可能会有重复部分,完美衔接需要调试滑动的高度
import fitz
import pdfkit
import requests
from selenium import webdriver
import time
import glob
import os
import warnings
warnings.simplefilter("ignore", ResourceWarning)
chrome_options =webdriver.ChromeOptions()
chrome_options.add_argument('--start-maximized ')
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')
# chrome_options.add_argument("window-size=1920,1080")
driver = webdriver.Chrome(executable_path=r".\chromedriver.exe",options=chrome_options)
driver.get('https://blog.csdn.net/weixin_43695063/article/details/100701356') # 一篇带有滚动条博客的url
driver.maximize_window()
driver.fullscreen_window()
def get_img_pdf_return_url(driver):
num=0
num_while=936 #每次截取的后,滚动条滚动的高度,根据可视化浏览器的高度拟定(自己定)
while True:
driver.save_screenshot("C:/Users/Administrator/Desktop/img_pdf/img{}.png".format(num))
js="var q=document.documentElement.scrollTop={}".format(num_while)
driver.execute_script(js)
time.sleep(0.2)
print("截图:",num_while,"成功")
# print(driver.title)
num+=1
if num_while >7198: # 滑动总高度之后,退出循环截取(总高度自己定)
break
num_while+=936
doc = fitz.open()
# print("doc:", f)
# 从规定路径下读取图片,确保按文件名排序
for img in sorted(glob.glob(r"C:\Users\Administrator\Desktop\img_pdf\*.png")):
print(img)
imgdoc = fitz.open(img)
# print("imgdoc",imgdoc)
# 使用图片创建单页的 PDF
pdfbytes = imgdoc.convertToPDF()
# print("pdfbytes", pdfbytes)
# 将当前页插入文档
imgpdf = fitz.open("pdf", pdfbytes)
doc.insertPDF(imgpdf)
if os.path.exists(r"C:\Users\Administrator\Desktop\img_pdf\aa.pdf"): #根据自己的路径自定义
os.remove(r"C:\Users\Administrator\Desktop\img_pdf\aa.pdf")
doc.save(r"C:\Users\Administrator\Desktop\img_pdf\aa.pdf")
doc.close()
get_img_pdf_return_url(driver)
driver.quit()
方法七(明天写…)
- 优点:
- 缺点: