小说下载脚本

#coding:utf-8

from requests import get
import re
from bs4 import BeautifulSoup
from time import sleep,ctime

from threading import Thread


import sys
reload(sys)
sys.setdefaultencoding('utf-8')
print sys.getdefaultencoding()

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.91 Safari/537.36'}

def Usage():
    print "\n[Usage]python 笔趣阁.py"

def List_Fiction(name):
    Search_Url = "http://www.biquge5200.com/modules/article/search.php?searchkey="+name
    print Search_Url
    result = get(Search_Url.decode('gbk'))
    return result.text
    
def Make_Form(html):
    Fiction_urls = []
    Fiction_names = []
    num = 0
    result = re.findall(r'<td class="odd"><a href="(.*?)">(.*?)</a></td>',html)
    if result[0] == "":
        Usage()
        exit()
    for i in result:
        Fiction_urls.append(i[0])
        Fiction_names.append(i[1])
    A = re.findall(r'<td class="odd">(.*?)</td>',html)
    Authors = []
    for i in A:
        if "<a" not in i:
            Authors.append(i)
    for i in Fiction_urls:
        print "["+str(num)+"]"+i+"\t[name]"+Fiction_names[num]+"\t[Author]"+Authors[num]
        num += 1
    number = raw_input("选择想下载的小说(填写编号):")
    content = get(Fiction_urls[int(number)]).text
    Chapter = re.findall(r'<dd><a href="(.*?)">(.*?)</a></dd>',content)
    return Chapter

def Write_into_TXT(cont):
    f = open(Fiction_Name+'.txt','ab+')
    pattern = '<(.*?)>'
    CH_cont = re.sub(pattern,'\n',cont)
    f.write(CH_cont)
    f.close()

def Download_all(Chapter_urls):
    Number = 0
    for i in Chapter_urls:
        Number += 1
        try:
            Code = get(i,headers=headers).content
            #Status_code = str(get(i,headers=headers).status_code)
            #Content = re.search(r'<div id="content">(.*)',Code)#这里的正则不理解
            soup = BeautifulSoup(Code,"html.parser",from_encoding="utf8")
            Content = soup.find('div',id="content")
            H1 = re.search(r'<h1>(.*)</h1>',Code).group(1)
            cont = H1+"\n"+Content.encode('gbk')
            if cont == "":
                Download_Part(Chapter_urls,Number-1,Number)
                continue
            Write_into_TXT(cont)
            sleep(0.3)
            print "[Successful]%s" %H1
        except:
            print "[%s]Error Happenning!" %ctime()
            sleep(2)
            Download_Part(Chapter_urls,Number-1,Number)
            # Code = get(Chapter_urls[Number-1],headers=headers).content
            # H1 = re.search(r'<h1>(.*)</h1>',Code).group(1)
            # print "[Download_Again]%s" %H1
            # soup = BeautifulSoup(Code,"html.parser",from_encoding="utf8")
            # Content = soup.find('div',id="content")
            # cont = H1+"\n"+Content.encode('gbk')
            # Write_into_TXT(cont)

def Download_Part(Chapter_urls,num1,num2):
    Part_Urls = Chapter_urls[num1:num2]
    Download_all(Part_Urls)

def Select_Chapter(html):
    Chapter_urls = []
    Chapter_names = []
    num = 0
    for i in html:
        Chapter_urls.append(i[0])
        Chapter_names.append(i[1])
        print "[%s]%s[Chapter_name]%s" %(num,i[0],i[1])
        num += 1
    print '''[选择要下载的章节数]\n\n===>[Default][all]所有章节\n===>[0-100]0到99章\n'''
    Chapter_Numbres = raw_input("[章节数]:")    
    if Chapter_Numbres == "" or Chapter_Numbres == "all":
        #多线程下载
        # for i in range(num):
        #     t = Thread(target=Download_all,args=(Chapter_urls,))
        #     sleep(0.5)
        #     t.start()
        Download_all(Chapter_urls)
    else:
        result = re.search(r'(.*?)-(.*)',Chapter_Numbres)
        number1 = int(result.group(1))
        number2 = int(result.group(2))
        Download_Part(Chapter_urls,number1,number2)

if __name__ == "__main__":
    try:
        Fiction_Name = raw_input("[小说名称]:")
        while Fiction_Name == "":
            Fiction_Name = raw_input("[小说名称]:")
        print Fiction_Name
        html = List_Fiction(Fiction_Name)
        Chapter_Html = Make_Form(html)
        Select_Chapter(Chapter_Html)
    except:
        exit()

猜你喜欢

转载自blog.csdn.net/github_38641765/article/details/79587298