Python 下载Mp3

import requests,os,time,sys,re
import urllib.request
import requests
from bs4 import BeautifulSoup
import os
from scrapy.selector import Selector

class wangyiyun():
    def __init__(self):
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
            'Referer': 'http://music.163.com/'}
        self.main_url='http://music.163.com/'
        self.session = requests.Session()
        self.session.headers=self.headers
        self.song_list={}
        
    def get_songurls(self,playlist,long):
        '''进入所选歌单页面,得出歌单里每首歌各自的ID 形式就是“song?id=64006"'''
        url=self.main_url+'playlist?id=%d'% playlist
        re= self.session.get(url)   #直接用session进入网页
        sel=Selector(text=re.text)   #用scrapy的Selector,懒得用BS4了
        songurls=sel.xpath('//ul[@class="f-hide"]/li/a/@href').extract()
        songurls = songurls[:long]                   # 这里我只选取了列表的前long个元素

        return songurls   #所有歌曲组成的list
        ##['/song?id=64006', '/song?id=63959', '/song?id=25642714', '/song?id=63914', '/song?id=4878122', '/song?id=63650']

    def get_songinfo(self,songurl):
        '''根据songid进入每首歌信息的网址,得到歌曲的信息
        return:'64006','陈小春-失恋王'''
        url=self.main_url+songurl
        re=self.session.get(url)
        sel=Selector(text=re.text)
        song_id = url.split('=')[1]
        song_name = sel.xpath("//em[@class='f-ff2']/text()").extract_first()
        singer= '&'.join(sel.xpath("//p[@class='des s-fc4']/span/a/text()").extract())
        songname=singer +' - '+ song_name
        b=['\\','/',':','*','?','"','<','>','"']
        for x in b:
            if x in songname:
                songname=songname.replace(x,'')
        return str(song_id),songname

    def download_song(self, i, songurl, dir_path):
        '''根据歌曲url,下载mp3文件'''
        try:
            song_id, songname = self.get_songinfo(songurl)  # 根据歌曲url得出ID、歌名
            song_url = 'http://music.163.com/song/media/outer/url?id=%s.mp3'%song_id
            path = dir_path + os.sep + songname + '.mp3'  # 文件路径
            if os.path.exists(path):
                print(str(i), songname+'     exists!',sep='. ')
            else:
                song=requests.get(song_url,headers = self.headers)
                with open(path, 'wb') as  f:  # 下载文件
                    print(str(i),songname+'.mp3',sep='. ')
                    f.write(song.content)
                self.song_list[i] = path
        except Exception as error_info:
            print('Error! ======= ' + songname)
            pass
            
            
    def fileSize(self):
        """
        判断下载下来的文件的大小,如果太小,就删掉。
        """
        for num in self.song_list:
            file = self.song_list[num]
            fsize = os.path.getsize(file)
            f_kb = fsize/float(1024)
            if f_kb < 1024:
                os.unlink(file)
                print(str(num),file.split('\\')[-1] + '                Small than 1M. Has been deleted.',sep='. ')
                

    def work(self, playlist,long,path):
        songurls = self.get_songurls(playlist,long)  # 输入歌单编号,得到歌单所有歌曲的url
        i = 1
        for songurl in songurls:
            self.download_song(i,songurl, path)  # 下载歌曲
            i += 1
        self.fileSize()
        

if __name__ == '__main__':
    long = int(input("How many songs do you want to download?\n"))
    path = r'D:\备份\新建文件夹'
    d = wangyiyun()
    d.work(400931460,long,path) # 400931460是网页版歌单网址中 id 后面的参数

以上内容来自网络,如有侵权,请联系删除。

猜你喜欢

转载自www.cnblogs.com/wztshine/p/12365231.html