python 爬取喜马拉雅

import re

import requests


class SpiderHimalaya(object):
    def __init__(self):
        self.headers = {"User-Agent": "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50"}
        self.audio_url = ''
    def get_page_url(self):
        """每一页的url"""
        pageUrl= ""
        pageUrlList=[pageUrl.format(i) for i in range(1,13)]
        return pageUrlList
    def get_response(self,url):
        """获取响应"""
        resp = requests.get(url,headers=self.headers)
        if resp.status_code == 200:
            return resp
        else:
            print(resp)
    def get_item_id(self):
        """获取每一节的id"""
        pageUrlList=self.get_page_url()
        resp = self.get_response(url=pageUrlList[0])
        content_list = resp.json()['data']['tracks']
        item_list = []
        for con in content_list:
            item={}
            key = self.audio_url.format(con['trackId'])
            item[key] = con["title"]
            item_list.append(item)
        return item_list
    def down_mp3(self,item):
        """下载音频"""
        (ite,) = item.items() # 拆包,返回一个元祖
        url,name = ite # 元组拆包
        resp=self.get_response(url)
        file_name = (lambda :''.join((lambda :re.split(r"[/ \\ : * \" < > | ?]+",name))()))() # 文件名处理
        print(file_name)
        mp3_url = resp.json()['data']['src']
        mp3_content = self.get_response(mp3_url).content
        with open(''.join(['三国志/',file_name,'.mp3']),'wb') as f:
            f.write(mp3_content)
    def run(self):
        """主函数"""
        item_list=self.get_item_id()
        for item in item_list:
            self.down_mp3(item)

if __name__ == '__main__':
    SpiderHimalaya().run()
发布了127 篇原创文章 · 获赞 25 · 访问量 3万+

猜你喜欢

转载自blog.csdn.net/weixin_44224529/article/details/104836401