Reptile case _ NetEase cloud song list

Technical point:

selenium crawl data

csv data format conversion

 

NetEase cloud strategy did not change reptiles, then the code should be able to directly run

The bottom line reptile tried it is very simple to use one day to climb almost directly

Case Code:

from selenium import webdriver
import time
import json
import csv


class WangyiMusicSpider():

    def __init__(self, url):
        self.browser = webdriver.Chrome('chromedriver')
        self.browser.get(url)
        time.sleep(2)

    def json_to_scv(self):
        with open("wangyimisic.json", "r", encoding="utf-8") as r:
            results = json.load(r)
        f = open('wangyimusic.csv', 'w', encoding='utf-8')
        csv_writer = csv.writer(f)
        csv_writer.writerow(results[0].keys())
        for result in results:
            csv_writer.writerow(result.values())
        f.close()


    def main(self):
    
     
        iframe_element = self.browser.find_element_by_id('g_iframe')
        # 2> 切换 iframe
        self.browser.switch_to.frame(iframe_element)
   
        data_list = self.browser.find_elements_by_xpath('.//ul[@id="m-pl-container"]/li')

        resuilts = []

        for li in data_list:
            item = {}
           
            item['photo'] = li.find_element_by_xpath('.//img').get_attribute('src')
            item['music_link'] = li.find_element_by_xpath('.//div/a').get_attribute('href')
            item['hot'] = li.find_element_by_xpath('.//span[@class="nb"]').text
            item['title'] = li.find_element_by_xpath('./p/a').text
            item['name'] = li.find_element_by_xpath('.//p[last()]/a').text
            resuilts.append(item)
           

        resuilts_json = json.dumps(resuilts)
        with open('wangyimisic.json', 'w', encoding='utf-8') as a:
            a.write(resuilts_json)

        self.json_to_scv()

    def quit(self):
        self.browser.quit()


if __name__ == '__main__':
    url = 'https://music.163.com/#/discover/playlist'
    obj = WangyiMusicSpider(url)
    obj.main()
    obj.quit()

  

Guess you like

Origin www.cnblogs.com/renoyuan/p/11401602.html