Download storytelling

# /usr/bin/env python
# -*- coding: utf-8 -*-
# __Author__: yunrui
# __Date__:   2019/8/16

# 从中国评书网下载评书《射雕英雄传》

import requests
import re
import os
import urllib
import time

from getHeaders import getHeaders

# 变量配置
# 文件夹名称
novel_file_name = '射雕英雄传'
# 每集mp3文件的前缀
novel_chinese_name = '射雕英雄传_刘少佐'
# 在中国评书网上可以看到目标评书的id号
novel_id = 5277
# 同上,也知道目标评书有多少集
novel_max_count = 200

isExists = os.path.exists(novel_file_name)
if not isExists:
    os.makedirs(novel_file_name)


#第一章url没有后面的count号,比较特殊,单独处理,代码和下面是一样的
isExists = os.path.exists('%s\\%s_1.mp3' % (novel_file_name, novel_chinese_name))
name = '%s_1' % novel_chinese_name
if not isExists:    
    url = "http://www.zgpingshu.com/down/%d/" % novel_id
    response = requests.get(url = url, headers = getHeaders())
    response.encoding = 'gb2312'
    html = response.text  
    download_url = re.findall(r'<a href="(.*?)" id=\'down\'', html)[0]
    urllib.request.urlretrieve(download_url, '%s\\%s.mp3' % (novel_file_name, name))
    # 这里注意,一定要关闭response,最好time.sleep一段时间,不然网站会认为你恶意攻击而关闭连接
    response.close()
    time.sleep(1)
print('%s下载完毕' % name)
    

count = 2
while count <= novel_max_count:
    isExists = os.path.exists('%s\\%s_%d.mp3' % (novel_file_name, novel_chinese_name, count))
    name = '%s_%d' % (novel_chinese_name, count)
    if not isExists: 
        url = "http://www.zgpingshu.com/down/%d/%s.html" % (novel_id, count)
        response = requests.get(url = url, headers = getHeaders())
        response.encoding = 'gb2312'
        html = response.text
        download_url = re.findall(r'<a href="(.*?)" id=\'down\'', html)[0]
        urllib.request.urlretrieve(download_url, '%s\\%s.mp3' % (novel_file_name, name))
        response.close()
        time.sleep(1)
    print('%s下载完毕' % name)
    count += 1

getHeaders is a function of a randomly generated header I wrote it myself, quite simply, in my another blog post has a code;
if there is a bug 10061, and in most cases is a network problem, the code is no problem, you can re try to run again

Guess you like

Origin www.cnblogs.com/RyanZhou/p/11367079.html