技术点:
csv 转换数据格式
网易云没换爬虫策略的话代码应该是可以直接跑的
爬虫底线试了一下还是很简单的上手一天差不多可以直接爬了
案例代码:
from selenium import webdriver import time import json import csv class WangyiMusicSpider(): def __init__(self, url): self.browser = webdriver.Chrome('chromedriver') self.browser.get(url) time.sleep(2) def json_to_scv(self): with open("wangyimisic.json", "r", encoding="utf-8") as r: results = json.load(r) f = open('wangyimusic.csv', 'w', encoding='utf-8') csv_writer = csv.writer(f) csv_writer.writerow(results[0].keys()) for result in results: csv_writer.writerow(result.values()) f.close() def main(self): iframe_element = self.browser.find_element_by_id('g_iframe') # 2> 切换 iframe self.browser.switch_to.frame(iframe_element) data_list = self.browser.find_elements_by_xpath('.//ul[@id="m-pl-container"]/li') resuilts = [] for li in data_list: item = {} item['photo'] = li.find_element_by_xpath('.//img').get_attribute('src') item['music_link'] = li.find_element_by_xpath('.//div/a').get_attribute('href') item['hot'] = li.find_element_by_xpath('.//span[@class="nb"]').text item['title'] = li.find_element_by_xpath('./p/a').text item['name'] = li.find_element_by_xpath('.//p[last()]/a').text resuilts.append(item) resuilts_json = json.dumps(resuilts) with open('wangyimisic.json', 'w', encoding='utf-8') as a: a.write(resuilts_json) self.json_to_scv() def quit(self): self.browser.quit() if __name__ == '__main__': url = 'https://music.163.com/#/discover/playlist' obj = WangyiMusicSpider(url) obj.main() obj.quit()