Principle: my last blog post
import requests import time from bs4 import BeautifulSoup def get_html(url): ''' 获得 HTML ''' headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/53\ 7.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36' } response = requests.get(url, headers=headers) if response.status_code == 200: return response.text else: return DEF get_infos (HTML): '' ' extracts the data ' '' HTML = the BeautifulSoup (HTML) # Ranked # ranks = html.select ( ' #rankWrap> div.pc_temp_songlist> UL> Li> span.pc_temp_num ' ) # Singer + song names html.select = ( ' #rankWrap> div.pc_temp_songlist> UL> Li> A ' ) # playback time times = html.select ( ' #rankWrap> div.pc_temp_songlist> UL> Li> span.pc_temp_tips_r> span ') # Print information for R & lt, n-, T in zip(ranks,names,times): r = r.get_text().replace('\n','').replace('\t','').replace('\r','') n = n.get_text() t = t.get_text().replace('\n','').replace('\t','').replace('\r','') data = { 'Ranking ' : R & lt, ' song - Singer ' : n-, ' play time ' : T } Print (Data) DEF main (): ' '' master interface '' ' URLs = [ ' HTTPS: //www.kugou. COM / YY / Rank / Home / {} - 23784.html from Rank =? ' .format (STR (I)) for I in Range (. 1,. 6 )] for URL in URLs: HTML = get_html (URL) get_infos ( HTML) Time.sleep(1) if __name__ == '__main__': main()
result: