python crawling pen Fun Club novels (TK visualization)

  1 import requests
  2 from lxml import etree
  3 from pyquery import PyQuery as pq 
  4 import re
  5 import time
  6 from datetime import datetime
  7 from tkinter import *
  8 import threading
  9 from tkinter import messagebox
 10  
 11 #获取小说内容-------------------------------------------------------------
 12 def content(a):
 13     response=requests.get(a,headers=header)
 14     response.encoding = 'utf-8'    
 15         dom = etree.HTML(response.text)
 16     content3=dom.xpath('//*[@id="content"]/text()')
 17     b=''.join(content3).replace(u'\xa0','\r\n')
 18     return b
 19 #获取小说目录-------------------------------------------------------------
 20 def get_mulu_url(index_url):
 21     response=requests.get(index_url,headers=header).text
 22     dom=etree.HTML(response)
 23     for i in range(13,end+13):
 24         herf = dom.xpath('//*[@id="list"]/dl/dd['+str(i)+']/a/@href')        
 25         title =dom.xpath('//*[@id="list"]/dl/dd['+str(i)+']/a/text()')        
 26         index_name_list.append(title)
 27         index_list.append (Herf)
 28      Print ( ' for catalog success ' )
 29  # novel download ------------------------------- -------------------------------- 
30  DEF downloads ():
 31 is      the try :
 32          for I in Range (0, len (index_list)):
 33 is              A + = index_url '' .join (index_list [I])
 34 is              Content (A)
 35              TITLE_NAME = '' .join (index_name_list [I])
 36              Print ()
 37 [              Import the codecs
 38 is             with codecs.open('F:\小说1.txt','a+',encoding='utf-8') as f:
 39             #file=open('F:\小说.txt','a+',)
 40                 f.write(title_name)
 41                 f.write('\r\n')
 42                 f.write(content(a))
 43                 f.write('\r\n')
 44             #print(index_list)
 45             #print('下载'+''.join(index_name_list[i]+'完成'))
 46            #del index_list [0] 
47             # del index_name_list [0] 
48             # title_name1 = ''. the Join (index_name_list [I]) 
49              Print ( ' download ' + TITLE_NAME + ' Cap complete ' )
 50              var_size.set ( ' download ' + TITLE_NAME + ' Chapter complete ' )
 51 is              the time.sleep (. 1 )
 52 is          var_size.set ( ' download completes ' )
 53 is      the except :
 54 is          R & lt messagebox.askokcancel = ( 'Warning ' , ' site has been detected ' )
 55          Print ( ' error ' ) #pass 
56 57 # main function to start a thread for catalog ----------------------- ----------------------------- 58 DEF main (): 59 var_size.set ( ' being acquired contents and the chapter ' ) 60 Print ( ' are acquired contents and the chapter ' ) 61 is Global index_url 62 is index_url = E1.get () 63 is Global End 64 End =E2.get () 65 End = int (End) 66 Print (index_url) 67 get_mulu_url (index_url) 68 the time.sleep (2 ) 69 A = DateTime.Now () 70 var_size.set ( ' begin download ' ) 71 is Print ( ' start the download time ' , A) 72 t.start () 73 is # calling thread ------------------------------- ------------------------------------------- 74 DEF One (): 75 = Test E2.get () 76 IF== Test '' : 77 R & lt messagebox.askokcancel = ( ' warning -warning ' , ' ! Books and chapters link is not empty ' ) 78 the else : 79 t1.start () 80 81 IF the __name__ == " __main__ " : 82 = App Tk () 83 app.title ( ' novel Downloader ' ) 84 app.geometry ( ' 370x200 ' ) 85 V1 = StringVar () 86 v2 = StringVar() 87 index_list=[] 88 index_name_list=[] 89 #index_url='https://www.qu.la/book/44121/' 90 header={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3695.400 SLBrowser/10.0.3710.400'} 91 92 var_size = StringVar() 93 var_ck = StringVar() 94 95 label3=Label(app,text='小说官网:https://www.qu.la/' ) .Pack () 96 Label4 the Label = (App, textvariable = var_size) .pack () 97 98 E2 of the Entry = (App, textvariable = V2, BD =. 5) # Chapter 99 E1 = the Entry (App, V1 = textvariable, . 5 = BD) # novel site 100 E2.pack () 101 E1.pack () 102 103 T of the threading.Thread = (target = downloads) # download 104 T1 of the threading.Thread = (= main target) # obtain the directory 105 106 RUN the Button = (App, text = ' begin download ' , Command = One) .pack () 107 the Label = Label3 (App, text = ' No. 1. Chapter 2. Books link ' ) .pack () 108 app.MainLoop () 109 110

 

Guess you like

Origin www.cnblogs.com/liubingzhe/p/11314704.html
Recommended