利用python快速获取知乎高赞答案,附源码

闲话少说
直接撸代码

import tkinter as tk
from tkinter.filedialog import askdirectory
import requests
import re
import os
import time




class crawl_picture():
    window = tk.Tk()
    window.title("【星云随风_倚码为诗】之自动爬取知乎回答")
    window.geometry("500x500")

    canvas = tk.Canvas(window,
                       width=800,  # 指定Canvas组件的宽度
                       height=500,  # 指定Canvas组件的高度
                       bg='#E6E8FA')  # 指定Canvas组件的背景色
    canvas.pack()
    notice_str = tk.StringVar()  # StringVar是一个很强大的类,可以辅助控件动态改变值

    def start_interface(self):
        self.notice = tk.Label(self.window, textvariable=self.notice_str )
        self.notice_str.set("尚未开始")
        self.notice.place(x=50, y=50)


        tk.Label(self.window, text="问题号:").place(x=50, y=100)
        tk.Label(self.window, text="保存路径:").place(x=50, y=150)


        tk.Label(self.window, bg='#E6E8FA', text="有什么意见可以加我好友交流").place(x=50, y=250)

        tk.Label(self.window, bg='#E6E8FA', text="对这个程序有什么想法可以加我好友交流哦").place(x=50, y=250)
        tk.Label(self.window, bg='#E6E8FA', text="B站up主", font="Arial 20 bold", fg="#4169E1").place(x=50, y=270)
        tk.Label(self.window, bg='#E6E8FA', text="欢迎三连支持一下,谢谢",font="Arial 15 bold", fg="red").place(x=50, y=300)

        self.keyWord = tk.StringVar()
        self.entry = tk.Entry(self.window, textvariable=self.keyWord)
        self.entry.place(x=150, y=100)
        self.keyWord.set("请输入知乎问题号")


        # 修改背景色
        self.text1 = tk.Text(self.window)
        self.text1.place(x=150,y=150,width=150,height=25)

        tk.Button(self.window, text="选择路径", command=self.select_path).place(x=320, y=152)
        tk.Button(self.window, text="开始爬取", command=lambda:self.craw(self.entry.get(),
                                                                self.path, self.window)).place(x=200, y=200)
        self.window.mainloop()

    def text(self):
        self.notice = self.notice_str.set("")

    def select_path(self):
        """放入文件"""
        self.path = askdirectory(title='选择文件')
        # 清楚text文本框内容并进行插入
        print(self.path)
        self.text1.delete(1.0, tk.END)
        self.text1.insert(tk.END, self.path, 'red')

    def getAnser(self,qid, offset):
        # 利用知乎API请求json数据
        # qid:知乎问题号
        # offset:第几页
        # 知乎API
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 QIHU 360SE'
        }
        url = "https://www.zhihu.com/api/v4/questions/{}/answers?include=content&limit=20&offset={}&platform=desktop&sort_by=default".format(
            qid, offset)
        res = requests.get(url, headers=headers)
        res.encoding = 'utf-8'
        return res.json()

    def getAnswers(self,qid, save_path):

        offset = 0
        self.num = 1
        f = open("%s/知乎回答%s.txt" % (save_path, qid), "a")
        while True:
            qid = qid
            print('Offset =', offset)
            # 知乎api请求
            data = self.getAnser(qid, offset)
            print(data)
            if len(data['data']) == 0:
                break
            for line in data['data']:
                # 保存回答数据
                content = line['content']
                pattern = re.compile(r'<[^>]+>', re.S)
                result = pattern.sub('', content)
                print(result)
                f.write("\n【第%d个回答】" % self.num)
                self.notice_str.set("正在爬取第%d个" % self.num)
                self.window.update()
                # notice["text"]="正在爬取第%d个"%num
                self.num += 1
                f.write(result)

            offset += 20
            time.sleep(1)

        f.close()

    def craw(self,keyword, path, enter_w):
        self.notice_str.set("开始爬取")
        key = keyword  # 需要爬取的内容
        SAVE_DIR = path + "/" + key  # 以KEY的名字新建一个文件夹
        if not os.path.exists(SAVE_DIR):
            os.makedirs(SAVE_DIR)
        self.getAnswers(key, SAVE_DIR)  # 获取图片函数
        self.new_window(enter_w)

    def new_window(self,enter_w):
        window_one = tk.Toplevel(enter_w)
        window_one.geometry('100x50')
        window_one.title('星云随风_倚码为诗')
        Lab = tk.Label(window_one, text='爬取成功', compound=tk.CENTER)
        Lab.pack()

if __name__ == '__main__':
    cp = crawl_picture()
    cp.start_interface()

创作不易,点个赞吧!!

版权声明:如无特殊说明,文章均为本站原创,转载请注明出处
本文链接:https://blog.csdn.net/wsad861512140

猜你喜欢

转载自blog.csdn.net/wsad861512140/article/details/106426425
今日推荐