网易云歌单爬取并保存

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/sinat_34233802/article/details/79465015

爬取网易云2011-2017melon 年榜歌单并保存

# -*- coding: utf-8 -*-
"""
Created on Thu Jan 25 19:57:09 2018

@author: marson
"""

import requests
from bs4 import BeautifulSoup
import json
import create_songlist

def get_ist(i,id):

    #i='2011'
    #id='560117127'
    play_url = 'http://music.163.com/playlist?id='+str(id)
    #560117127 --2011
    #560095569 --2012
    #19020312  --2013
    #559454416 --2014
    #159677693 --2015
    #560080737 --2016
    #2074743371 --2017

    s = requests.session()
    s = BeautifulSoup(s.get(play_url,headers = headers).content,'lxml')
    main = s.find('ul',{'class':'f-hide'})
    f = open('E:\\python\\melon\\榜单\\'+str(i)+'.txt','a',encoding='utf-8') #保存本地
    m=1

    for music in main.find_all('a'):
     #print('{} '.format(music['href']))
     singer_url = 'http://music.163.com'+music['href']
     #print(singer_url)
     se = requests.session()
     se = BeautifulSoup(se.get(singer_url,headers = headers).content,'lxml')
     des=se.find('script',type="application/ld+json").get_text()
     desb = json.loads(des)
     singer = desb['description'].split('。')[0].split(':')[1]
     album = desb['description'].split('。')[1].split(':')[1]
     sg = singer.split(',')
     for ss in sg:
         f.write(str(m)+'|'+desb['title']+'|'+ss+'|'+album+'|'+str(i)+'\n')
         data.append((m,desb['title'],ss,album,i))
     m=m+1
    #print (str(m),desb['title'],desb['description'])

    f.close()

if __name__ == '__main__':    
    headers = {
     'Referer':'http://music.163.com/',
     'Host':'music.163.com',
     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Firefox/38.0 Ic',
     'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
     }
    bd=[560117127,560095569,19020312,559454416,159677693,560080737,2074743371] #歌单列表
    i = 2011
    data = []
    create_songlist.create_table()
    for id in bd:
        get_ist(i,id)
        i = i+1
    create_songlist.insert_table(data)
    df = create_songlist.get_data()






create_songlist 文件

import sqlite3
import pandas as pd
def create_table():
    conn = sqlite3.connect('melon.db')
    print ("Opened database successfully")
    c = conn.cursor()
    c.execute('''CREATE TABLE MUSIC
           (ID INT      NOT NULL,
           song           TEXT    NOT NULL,
           singer            INT     NOT NULL,
           album        TEXT,
           year         INT);''')
    print ("Table created successfully")
    conn.commit()
    conn.close()

def insert_table(data):   
    conn = sqlite3.connect('melon.db')
    c = conn.cursor()
    print("Opened database successfully")
    sql_word = "INSERT INTO MUSIC (ID,song,singer,album,year) VALUES (?,?,?,?,?);"
    c.executemany(sql_word,data)

    conn.commit()
    print( "Records created successfully")
    conn.close()

def get_data():
    conn = sqlite3.connect('melon.db')
    #c = conn.cursor()
    print ("Opened database successfully")
    sql_word = "SELECT *  from MUSIC"
    #cursor = c.execute("SELECT *  from MUSIC")
    #cursor.fetchall()
    data1 = pd.read_sql(sql_word,conn)
    conn.close()
    return data1

def test(): 
    conn = sqlite3.connect('melon.db')
    c = conn.cursor()
    c.execute("select singer,count(*) from music group by 1 having count(*)>5 order by 2 desc")
    AN = c.fetchall()    
    conn.close()
    print (AN)

猜你喜欢

转载自blog.csdn.net/sinat_34233802/article/details/79465015