Xpath test tool for dynamic web pages based on selenium

Recently, I am engaged in the compilation of some Xpath webpage rules, and found that there are many Xpath testing tools on the Internet, but they are all based on static pages. For the time being, I haven’t found an Xpath testing tool based on dynamic pages. For the convenience of subsequent testing, I wrote one myself.

from tkinter import *
import tkinter as tk
import re
from lxml import etree
from selenium import webdriver

# 创建chrome无头浏览器
driver = ""
try:
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    driver = webdriver.Chrome(chrome_options=chrome_options)
except:
    fire_profile = webdriver.FirefoxOptions()
    fire_profile.add_argument('--disable-gpu')  # 设置无头模式
    fire_profile.add_argument('-headless')  # 设置无头模式
    driver = webdriver.Firefox(options=fire_profile)
# 动态网页源码字段
source = ""
# 辅助字段
old_url = ""


def result_to_string(result):
    """
    将匹配到的网页内容输出
    :param result: 已经匹配好的网页内容
    :return:
    """
    if isinstance(result, list):
        out_str = ""
        out_html = ""
        for one in result:
            if isinstance(one, str):
                out_str = out_str + one + "\n"
                out_html = out_html + one + "\n"
            else:
                out_str = out_str + one.text + "\n"
                out_html = out_html + etree.tostring(one, pretty_print=True, encoding="utf-8").decode("utf-8") + "\n"
        return out_str.replace(" ", "").strip("\n"), out_html.replace(" ", "").strip("\n")
    elif isinstance(result, str):
        return result.replace(" ", ""), result.replace(" ", "")
    else:
        return "", ""


def jiazai():
    """
    加载动态网页源码
    :return:
    """
    global old_url, source
    url = url_text.get()
    if not (str(url).startswith("http://") or str(url).startswith("https://")):
        source_text.delete(1.0, 'end')
        source_text.insert("insert", "请检查是否添加http或https前缀!!!")
        return None
    if not str(url).__contains__("."):
        source_text.delete(1.0, 'end')
        source_text.insert("insert", "请输入正确格式的网址!!!")
        return None
    if url != "" and (source_text.get(1.0, 1.1) == "" or old_url != url):
        source_text.delete(1.0, 'end')
        old_url = url
        try:
            driver.get(url)
            source = driver.page_source
            if source == '<html xmlns="http://www.w3.org/1999/xhtml"><head></head><body></body></html>':
                source_text.insert("insert", "请输入真实的网址!!!")
            else:
                source_text.insert("insert", source)
        except Exception as e:
            source_text.insert("insert", "此网址无法解析,请输入其他的网址!!!")


def ceshi():
    """
    将匹配到的网页结果显示出来
    :return:
    """
    global source
    if source != "" and xpath_text.get() != "":
        html = etree.HTML(source)
        print(xpath_text.get())
        result = html.xpath(xpath_text.get())
        out_string, out_html = result_to_string(result)
        result_text.delete(1.0, "end")
        result_source_text.delete(1.0, "end")
        result_text.insert("insert", out_string)
        result_source_text.insert("insert", out_html)


window = tk.Tk()
window.title('动态网页XPATH验证工具    Designed by Mr.Li')
window.geometry('750x560')

# 框架列
url_frame = tk.Frame(window)
url_frame.pack()
xpath_frame = tk.Frame(window)
xpath_frame.pack()
result_frame = tk.Frame(window)
result_frame.pack()
html_frame = tk.Frame(window)
html_frame.pack()
source_frame = tk.Frame(window)
source_frame.pack()

# 待匹配网址输入列
url_label = tk.Label(url_frame, text='请输入网址:')
url_label.pack(side=LEFT)
url_text = tk.Entry(url_frame, show=None, width=45)
url_text.pack(side=LEFT)
button1var = StringVar()
button1var.set("加载网页")
url_button = tk.Button(url_frame, textvariable=button1var, width=10, command=jiazai)
url_button.pack(side=RIGHT)

# xpath规则输入列
xpath_label = tk.Label(xpath_frame, text='请输入规则:')
xpath_label.pack(side=LEFT)
xpath_text = tk.Entry(xpath_frame, show=None, width=45)
xpath_text.pack(side=LEFT)
xpath_button = tk.Button(xpath_frame, text="测试", width=10, command=ceshi)
xpath_button.pack(side=RIGHT)

# 结果列
result_label = tk.Label(result_frame, text='结果文字:')
result_label.pack(side=LEFT)
result_text = tk.Text(result_frame, show=None, height=9, width=80)
result_text.pack(side=LEFT)
result_source_label = tk.Label(html_frame, text='结果源码:')
result_source_label.pack(side=LEFT)
result_source_text = tk.Text(html_frame, show=None, height=9, width=80)
result_source_text.pack(side=LEFT)

# 源码列
source_label = tk.Label(source_frame, text='网页源码:')
source_label.pack(side=LEFT)
source_text = tk.Text(source_frame, height=17, width=80)
source_text.pack(side=LEFT)
window.mainloop()

The following figure shows the startup interface: the
Insert picture description here
following figure shows the running result:
Insert picture description here

Guess you like

Origin blog.csdn.net/mrliqifeng/article/details/88582976