Herramienta de prueba Xpath para páginas web dinámicas basadas en selenio

Recientemente, me dediqué a la compilación de algunas reglas de páginas web de Xpath y descubrí que hay muchas herramientas de prueba de Xpath en Internet, pero todas están basadas en páginas estáticas. Por el momento, no he encontrado una herramienta de prueba de Xpath basada en páginas dinámicas. Para la comodidad de las pruebas posteriores, escribí una yo mismo.

from tkinter import *
import tkinter as tk
import re
from lxml import etree
from selenium import webdriver

# 创建chrome无头浏览器
driver = ""
try:
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(chrome_options=chrome_options)
except:
    fire_profile = webdriver.FirefoxOptions()
    fire_profile.add_argument('--disable-gpu')  # 设置无头模式
    fire_profile.add_argument('-headless')  # 设置无头模式
    driver = webdriver.Firefox(options=fire_profile)
# 动态网页源码字段
source = ""
# 辅助字段
old_url = ""


def result_to_string(result):
    """
    将匹配到的网页内容输出
    :param result: 已经匹配好的网页内容
    :return:
    """
    if isinstance(result, list):
        out_str = ""
        out_html = ""
        for one in result:
            if isinstance(one, str):
                out_str = out_str + one + "\n"
                out_html = out_html + one + "\n"
            else:
                out_str = out_str + one.text + "\n"
                out_html = out_html + etree.tostring(one, pretty_print=True, encoding="utf-8").decode("utf-8") + "\n"
        return out_str.replace(" ", "").strip("\n"), out_html.replace(" ", "").strip("\n")
    elif isinstance(result, str):
        return result.replace(" ", ""), result.replace(" ", "")
    else:
        return "", ""


def jiazai():
    """
    加载动态网页源码
    :return:
    """
    global old_url, source
    url = url_text.get()
    if not (str(url).startswith("http://") or str(url).startswith("https://")):
        source_text.delete(1.0, 'end')
        source_text.insert("insert", "请检查是否添加http或https前缀!!!")
        return None
    if not str(url).__contains__("."):
        source_text.delete(1.0, 'end')
        source_text.insert("insert", "请输入正确格式的网址!!!")
        return None
    if url != "" and (source_text.get(1.0, 1.1) == "" or old_url != url):
        source_text.delete(1.0, 'end')
        old_url = url
        try:
            driver.get(url)
            source = driver.page_source
            if source == '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body></body></html>':
                source_text.insert("insert", "请输入真实的网址!!!")
            else:
                source_text.insert("insert", source)
        except Exception as e:
            source_text.insert("insert", "此网址无法解析,请输入其他的网址!!!")


def ceshi():
    """
    将匹配到的网页结果显示出来
    :return:
    """
    global source
    if source != "" and xpath_text.get() != "":
        html = etree.HTML(source)
        print(xpath_text.get())
        result = html.xpath(xpath_text.get())
        out_string, out_html = result_to_string(result)
        result_text.delete(1.0, "end")
        result_source_text.delete(1.0, "end")
        result_text.insert("insert", out_string)
        result_source_text.insert("insert", out_html)


window = tk.Tk()
window.title('动态网页XPATH验证工具    Designed by Mr.Li')
window.geometry('750x560')

# 框架列
url_frame = tk.Frame(window)
url_frame.pack()
xpath_frame = tk.Frame(window)
xpath_frame.pack()
result_frame = tk.Frame(window)
result_frame.pack()
html_frame = tk.Frame(window)
html_frame.pack()
source_frame = tk.Frame(window)
source_frame.pack()

# 待匹配网址输入列
url_label = tk.Label(url_frame, text='请输入网址:')
url_label.pack(side=LEFT)
url_text = tk.Entry(url_frame, show=None, width=45)
url_text.pack(side=LEFT)
button1var = StringVar()
button1var.set("加载网页")
url_button = tk.Button(url_frame, textvariable=button1var, width=10, command=jiazai)
url_button.pack(side=RIGHT)

# xpath规则输入列
xpath_label = tk.Label(xpath_frame, text='请输入规则:')
xpath_label.pack(side=LEFT)
xpath_text = tk.Entry(xpath_frame, show=None, width=45)
xpath_text.pack(side=LEFT)
xpath_button = tk.Button(xpath_frame, text="测试", width=10, command=ceshi)
xpath_button.pack(side=RIGHT)

# 结果列
result_label = tk.Label(result_frame, text='结果文字:')
result_label.pack(side=LEFT)
result_text = tk.Text(result_frame, show=None, height=9, width=80)
result_text.pack(side=LEFT)
result_source_label = tk.Label(html_frame, text='结果源码:')
result_source_label.pack(side=LEFT)
result_source_text = tk.Text(html_frame, show=None, height=9, width=80)
result_source_text.pack(side=LEFT)

# 源码列
source_label = tk.Label(source_frame, text='网页源码:')
source_label.pack(side=LEFT)
source_text = tk.Text(source_frame, height=17, width=80)
source_text.pack(side=LEFT)
window.mainloop()

La siguiente figura muestra la interfaz de inicio: la
Inserte la descripción de la imagen aquí
siguiente figura muestra el resultado de ejecución:
Inserte la descripción de la imagen aquí

Supongo que te gusta

Origin blog.csdn.net/mrliqifeng/article/details/88582976
Recomendado
Clasificación