[Python] [] crawling reptiles cool dog music network red song list

Principle: my last blog post

import requests
import time
from bs4 import BeautifulSoup


def get_html(url):
    '''
    获得 HTML
    '''
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/53\
        7.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.text
    else:
        return 


DEF get_infos (HTML):
     '' ' 
    extracts the data 
    ' '' 
    HTML = the BeautifulSoup (HTML)
     # Ranked # 
    ranks = html.select ( ' #rankWrap> div.pc_temp_songlist> UL> Li> span.pc_temp_num ' )
     # Singer + song 
    names html.select = ( ' #rankWrap> div.pc_temp_songlist> UL> Li> A ' )
     # playback time 
    times = html.select ( ' #rankWrap> div.pc_temp_songlist> UL> Li> span.pc_temp_tips_r> span ') 

    # Print information 
    for R & lt, n-, T in zip(ranks,names,times):
        r = r.get_text().replace('\n','').replace('\t','').replace('\r','')
        n = n.get_text()
        t = t.get_text().replace('\n','').replace('\t','').replace('\r','')
        data = {
            'Ranking ' : R & lt,
             ' song - Singer ' : n-,
             ' play time ' : T 
        } 
        Print (Data) 


DEF main ():
     ' '' 
    master interface 
    '' ' 
    URLs = [ ' HTTPS: //www.kugou. COM / YY / Rank / Home / {} - 23784.html from Rank =? ' 
                .format (STR (I)) for I in Range (. 1,. 6 )]
     for URL in URLs: 
        HTML = get_html (URL) 
        get_infos ( HTML) 
        Time.sleep(1)


if __name__ == '__main__':
    main()

result:

Guess you like

Origin www.cnblogs.com/HGNET/p/12083066.html