用Python实现笔趣阁小说爬取 GUI版

上篇文章我们实现了一个简单的小说爬虫
用Python实现笔趣阁小说爬取
这篇文章带大家来添加一下GUI界面

GUI使用经典的tkinter完成的
界面初始化

    window = tk.Tk()
    window.title('笔趣阁爬虫')
    width = 300
    height = 200

    # 获取屏幕尺寸以计算布局参数，使窗口居屏幕中央
    screenwidth = window.winfo_screenwidth()
    screenheight = window.winfo_screenheight()
    alignstr = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2)
    window.geometry(alignstr)
    # 设置窗口是否可变长、宽，True：可变，False：不可变
    window.resizable(width=False, height=True)
    window.geometry('300x200')
    # result = tk.StringVar()
    # result.set("")
    # main()
    L1 = tk.Label(window, text='输入网址:')
    L1.grid(row=0, column=0)

    E1 = tk.Entry(window)
    E1.grid(row=0, column=1)

    B1 = tk.Button(window, text='开始爬取', command=lambda : thread_it)
    B1.grid(row=0, column=2)

    T1 = tk.Text(window, width=40, height=10)
    T1.grid(row=1, columnspan=3)
    window.mainloop()

实现的过程中遇到了界面卡死的问题
我用了多线程的解决办法
将爬取单独写出来一个线程
不影响主线程的运行

def thread_it():
    # 创建
    t = threading.Thread(target=main)
    # 守护 !!!
    t.setDaemon(True)
    # 启动
    t.start()

完整代码如下：

import requests
import time
from lxml import etree

import tkinter as tk
import threading

url_list = []


def get_tag(response, tag):
    html = etree.HTML(response)
    ret = html.xpath(tag)
    return ret


def parse_url(url):
    response = requests.get(url)
    response.encoding = 'gbk'
    return response.text


def find_url(response):
    chapter = get_tag(response, '//*[@id="list"]/dl/dd/a/@href')
    # print(chapter)
    for i in chapter:
        url_list.append(E1.get() + i)
        # url_list.append('https://www.52bqg.com/book_187/' + i)
    # print(url_list)


def find_content(url):
    global T1
    response = parse_url(url)
    chapter = get_tag(response, '//*[@id="box_con"]/div[2]/h1/text()')[0]
    content = get_tag(response, '//*[@id="content"]/text()')
    # print('正在爬取', chapter)
    T1.insert("end", '正在爬取:' + chapter)
    with open('{}.txt'.format(title), 'at', encoding='utf-8') as j:
        j.write(chapter)
        for i in content:
            if i == '\r\n':
                continue
            j.write(i)
    j.close()
    T1.insert("end", chapter + ':保存完毕')
    # print(chapter, '保存完毕')
    time.sleep(2)
    T1.delete(1.0, tk.END)


def main():
    global title
    T1.insert('end', '开始爬取，请耐心等候')
    start_url = E1.get()
    # print(start_url)
    # start_url = 'https://www.52bqg.com/book_187/'
    response = parse_url(start_url)
    # print(response)
    title = get_tag(response, '//*[@id="info"]/h1/text()')[0]
    # print(title)
    find_url(response)
    # print(1)
    for url in url_list:
        find_content(url)


def thread_it():
    # 创建
    t = threading.Thread(target=main)
    # 守护 !!!
    t.setDaemon(True)
    # 启动
    t.start()


if __name__ == '__main__':
    window = tk.Tk()
    window.title('笔趣阁爬虫')
    width = 300
    height = 200

    # 获取屏幕尺寸以计算布局参数，使窗口居屏幕中央
    screenwidth = window.winfo_screenwidth()
    screenheight = window.winfo_screenheight()
    alignstr = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2)
    window.geometry(alignstr)
    # 设置窗口是否可变长、宽，True：可变，False：不可变
    window.resizable(width=False, height=True)
    window.geometry('300x200')
    # result = tk.StringVar()
    # result.set("")
    # main()
    L1 = tk.Label(window, text='输入网址:')
    L1.grid(row=0, column=0)

    E1 = tk.Entry(window)
    E1.grid(row=0, column=1)

    B1 = tk.Button(window, text='开始爬取', command=lambda : thread_it())
    B1.grid(row=0, column=2)

    T1 = tk.Text(window, width=40, height=10)
    T1.grid(row=1, columnspan=3)
    window.mainloop()

效果图如下：
在这里插入图片描述
一起学习python，小白指导，教学分享记得私信我

用Python实现笔趣阁小说爬取 GUI版

猜你喜欢