from urllib import parse from urllib import request from lxml import etree import time class Novel: def __init__(self,*args): self.name = args[0] self.dict = args[1] self.txt = '' for key in sorted(self.dict): self.txt = self.txt + self.dict[key] def write(self): f = open(self.name+'.txt','w') f.write(self.txt) f.close() #获取网页源代码 def get_http_page(url,**kw): headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36" } req = request.Request(url,headers=headers) response = request.urlopen(req) page = response.read() encoding = 'gbk' if kw: encoding = kw['encoding'] page = page.decode(encoding) return page #获取漫画目录 def get_comics_directory(url): url_list = [] page = get_http_page(url,encoding='utf-8') html = etree.HTML(page) result = html.xpath('/html/body/div[2]/div/div[2]/h3/a') elment_select = None if len(result): url2 = result[0].get('href') if url2: page = get_http_page(url2) html = etree.HTML(page) elment_select = html.xpath('/html/body/div[4]/div[9]/span[2]/select') if len(elment_select): result_option = elment_select[0].findall('option') for option in result_option: url_list.append('https://m.wenxuemi6.com{}'.format(option.get('value'))) return url_list def downdload_txt(url_list,**kw): if kw: start = int(kw['start']) stop = int (kw['stop']) if start >= 0 and start < len(url_list) and stop > start and stop <len(url_list): count = kw['start'] count_max = kW [ ' STOP ' ] the else : COUNT = 0 count_max = len (URL_LIST) Print ( ' are crawling directory and chapter address, please wait ...... ' ) D = {} the while COUNT < count_max: URL = URL_LIST [ COUNT] Page = get_http_page (URL) HTML = etree.HTML (Page) Result = html.xpath ( ' / HTML / body / div [. 4] / UL [2] / Li / A ' ) txt = '' if type(result).__name__ == 'list': for l in result: url = 'https://m.wenxuemi6.com{}'.format(l.get('href')) #url_list.append('https://m.wenxuemi6.com{}'.format(l.get('href'))) print('Download chapters by URL:{}'.format(url)) d2 = {'{}'.format(count): ''} page = get_http_page(url) html = etree.HTML(page) url_next = html.xpath('//*[@id="pb_next"]') t = html.xpath('//*[@id="nr1"]/text()') t2 = html.xpath('//*[@id="nr1"]/p') txt_title = '' txt_title_list = html.xpath('//*[@id="nr_title"]/text()') if type(txt_title_list).__name__ == 'list': if (len(txt_title_list) == 1): txt_title = txt_title_list[0] txt = txt + txt_title + '\r\n' for l2 in t: txt = txt + l2 + '\r\n' if type(t2).__name__ == 'list': if len(t2) == 1: url = 'https://m.wenxuemi6.com{}'.format(l.get('href')[:-5] + '_2.html') print('Download chapters by URL:{}'.format(url)) page = get_http_page(url) html = etree.HTML(page) t = html.xpath('//*[@id="nr1"]/text()') for l2 in t: txt TXT + L2 + = ' \ R & lt \ n- ' D2 [ ' {} ' .format (COUNT)] = TXT d.update (D2) the time.sleep ( . 1 ) return D IF the __name__ == ' __main__ ' : txt_name = INPUT ( " Please enter the title to be searched: " ) URL = ' https://m.wenxuemi6.com/search.php?keyword= {} ' .format (parse.quote (txt_name)) Referer = URL URL_LIST =get_comics_directory (URL) # novel downloaded first page directory under D = downdload_txt (URL_LIST, Start = 0, = STOP. 1 ) N1 = Novel (txt_name, D) # Write File [txt_name] .txt to the current directory n1. write () # download full novel D2 = downdload_txt (URL_LIST, Start = 0, = STOP. 1 ) N2 = novel (txt_name, D2) # write file [txt_name] .txt to the current directory n2.write ()