1 import requests 2 from lxml import etree 3 from pyquery import PyQuery as pq 4 import re 5 import time 6 from datetime import datetime 7 from tkinter import * 8 import threading 9 from tkinter import messagebox 10 11 #获取小说内容------------------------------------------------------------- 12 def content(a): 13 response=requests.get(a,headers=header) 14 response.encoding = 'utf-8' 15 dom = etree.HTML(response.text) 16 content3=dom.xpath('//*[@id="content"]/text()') 17 b=''.join(content3).replace(u'\xa0','\r\n') 18 return b 19 #获取小说目录------------------------------------------------------------- 20 def get_mulu_url(index_url): 21 response=requests.get(index_url,headers=header).text 22 dom=etree.HTML(response) 23 for i in range(13,end+13): 24 herf = dom.xpath('//*[@id="list"]/dl/dd['+str(i)+']/a/@href') 25 title =dom.xpath('//*[@id="list"]/dl/dd['+str(i)+']/a/text()') 26 index_name_list.append(title) 27 index_list.append(herf) 28 print('获取目录成功') 29 #下载小说--------------------------------------------------------------- 30 def download(): 31 try: 32 for i in range(0,len(index_list)): 33 a=index_url+''.join(index_list[i]) 34 content(a) 35 title_name=''.join(index_name_list[i]) 36 print() 37 import codecs 38 with codecs.open('F:\小说1.txt','a+',encoding='utf-8') as f: 39 #file=open('F:\小说.txt','a+',) 40 f.write(title_name) 41 f.write('\r\n') 42 f.write(content(a)) 43 f.write('\r\n') 44 #print(index_list) 45 #print('下载'+''.join(index_name_list[i]+'完成')) 46 # del index_list[0] 47 # del index_name_list[0] 48 # title_name1=''.join(index_name_list[i]) 49 print('下载'+title_name+'章完成') 50 var_size.set('下载'+title_name+'章完成') 51 time.sleep(1) 52 var_size.set('下载完成') 53 except: 54 r = messagebox.askokcancel('警告', '网站被检测') 55 print('error')#pass
56 57 #主函数 启动获取目录线程---------------------------------------------------- 58 def main(): 59 var_size.set('正在获取目录和章节') 60 print('正在获取目录和章节') 61 global index_url 62 index_url=E1.get() 63 global end 64 end=E2.get() 65 end=int(end) 66 print(index_url) 67 get_mulu_url(index_url) 68 time.sleep(2) 69 a=datetime.now() 70 var_size.set('开始下载') 71 print('开始下载时间为',a) 72 t.start() 73 #调用线程-------------------------------------------------------------------------- 74 def one(): 75 test=E2.get() 76 if test =='': 77 r = messagebox.askokcancel('警告-warning', '书籍链接和章节不可为空!') 78 else: 79 t1.start() 80 81 if __name__ == "__main__": 82 app=Tk() 83 app.title('小说下载器') 84 app.geometry('370x200') 85 v1 = StringVar() 86 v2 = StringVar() 87 index_list=[] 88 index_name_list=[] 89 #index_url='https://www.qu.la/book/44121/' 90 header={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3695.400 SLBrowser/10.0.3710.400'} 91 92 var_size = StringVar() 93 var_ck = StringVar() 94 95 label3=Label(app,text='小说官网:https://www.qu.la/').pack() 96 label4=Label(app,textvariable=var_size).pack() 97 98 E2 = Entry(app,textvariable=v2,bd=5)#章节 99 E1 = Entry(app,textvariable=v1,bd=5)#小说网站 100 E2.pack() 101 E1.pack() 102 103 t=threading.Thread(target=download)#下载 104 t1=threading.Thread(target=main)#获取目录 105 106 run=Button(app,text='开始下载',command=one).pack() 107 label3=Label(app,text='1.章节号 2.书籍链接').pack() 108 app.mainloop() 109 110
python爬取笔趣阁小说(TK可视化)
猜你喜欢
转载自www.cnblogs.com/liubingzhe/p/11314704.html
今日推荐
周排行