版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sinat_34233802/article/details/79465015
爬取网易云2011-2017melon 年榜歌单并保存
# -*- coding: utf-8 -*-
"""
Created on Thu Jan 25 19:57:09 2018
@author: marson
"""
import requests
from bs4 import BeautifulSoup
import json
import create_songlist
def get_ist(i,id):
#i='2011'
#id='560117127'
play_url = 'http://music.163.com/playlist?id='+str(id)
#560117127 --2011
#560095569 --2012
#19020312 --2013
#559454416 --2014
#159677693 --2015
#560080737 --2016
#2074743371 --2017
s = requests.session()
s = BeautifulSoup(s.get(play_url,headers = headers).content,'lxml')
main = s.find('ul',{'class':'f-hide'})
f = open('E:\\python\\melon\\榜单\\'+str(i)+'.txt','a',encoding='utf-8') #保存本地
m=1
for music in main.find_all('a'):
#print('{} '.format(music['href']))
singer_url = 'http://music.163.com'+music['href']
#print(singer_url)
se = requests.session()
se = BeautifulSoup(se.get(singer_url,headers = headers).content,'lxml')
des=se.find('script',type="application/ld+json").get_text()
desb = json.loads(des)
singer = desb['description'].split('。')[0].split(':')[1]
album = desb['description'].split('。')[1].split(':')[1]
sg = singer.split(',')
for ss in sg:
f.write(str(m)+'|'+desb['title']+'|'+ss+'|'+album+'|'+str(i)+'\n')
data.append((m,desb['title'],ss,album,i))
m=m+1
#print (str(m),desb['title'],desb['description'])
f.close()
if __name__ == '__main__':
headers = {
'Referer':'http://music.163.com/',
'Host':'music.163.com',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Ic',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
}
bd=[560117127,560095569,19020312,559454416,159677693,560080737,2074743371] #歌单列表
i = 2011
data = []
create_songlist.create_table()
for id in bd:
get_ist(i,id)
i = i+1
create_songlist.insert_table(data)
df = create_songlist.get_data()
create_songlist 文件
import sqlite3
import pandas as pd
def create_table():
conn = sqlite3.connect('melon.db')
print ("Opened database successfully")
c = conn.cursor()
c.execute('''CREATE TABLE MUSIC
(ID INT NOT NULL,
song TEXT NOT NULL,
singer INT NOT NULL,
album TEXT,
year INT);''')
print ("Table created successfully")
conn.commit()
conn.close()
def insert_table(data):
conn = sqlite3.connect('melon.db')
c = conn.cursor()
print("Opened database successfully")
sql_word = "INSERT INTO MUSIC (ID,song,singer,album,year) VALUES (?,?,?,?,?);"
c.executemany(sql_word,data)
conn.commit()
print( "Records created successfully")
conn.close()
def get_data():
conn = sqlite3.connect('melon.db')
#c = conn.cursor()
print ("Opened database successfully")
sql_word = "SELECT * from MUSIC"
#cursor = c.execute("SELECT * from MUSIC")
#cursor.fetchall()
data1 = pd.read_sql(sql_word,conn)
conn.close()
return data1
def test():
conn = sqlite3.connect('melon.db')
c = conn.cursor()
c.execute("select singer,count(*) from music group by 1 having count(*)>5 order by 2 desc")
AN = c.fetchall()
conn.close()
print (AN)