Python Reptile Day 4

'' ' 
Crawling peas app data 
- a request URL 
PAGE2: 
https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=2&ctoken=vbw9lj1sRQsRddx0hD-XqCNF 
' '' 
Import Requests
 from BS4 Import the BeautifulSoup
 Import Re
 '' ' 
crawler trilogy 
' '' 
# 1 sends a request 
DEF the get_page (URL): 
    Response = requests.get (URL)
     return Response 

# 2. analysis data 
DEF parse_data (text): 
    Soup = the BeautifulSoup ( text, ' lxml ')
    # print(soup)
    li_list = soup.find_all(name='li', class_="card")
    # print(li_list)
    for li in li_list:
        # print(li)
        # print('tank' * 100)
        app_name = li.find(name='a', class_="name").text
        # print(app_name)

        app_url = li.find(name='a', class_="name").attrs.get('href' )
         # Print (APP_URL) 

        download_num = li.find (name = ' span ' , the class_ = " the install-COUNT " ) .text
         # Print (download_num) 

        APP_SIZE = li.find (name = ' span ' , attrs = { " title " : the re.compile ( ' \ D + MB ' .)}) text
         # Print (APP_SIZE) 


        App_Data = '' ' 
        game name: {} 
        game address: {} 
        number of downloads: {} 
        game size: {} 
        \ n- 
        ' ''.format (APP_NAME, APP_URL, download_num, APP_SIZE)
         Print (App_Data) 
        with Open ( ' wandoujia.txt ' , ' A ' , encoding = ' UTF-. 8 ' ) AS F: 
            f.write (App_Data) 
            f.flush () 

IF  the __name__ == ' __main__ ' :
     for Line in Range (. 1, 2 ): 
        URL = ' ' .format (Line)
         Print (URL)
         # 1. send request # fetch response data transmission request to the interfacehttps://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page={}&ctoken=vbw9lj1sRQsRddx0hD-XqCNF
        
        = Response the get_page (URL)
         # Print (response.text) 
        # Import json 
        # json.loads (response.text) 
        # Print (type (response.json ())) 
        # Print ( 'Tank' * 1000) 

        # the data json python dictionary format into 
        Data = response.json () 

        # Print (Data [ 'State']) 

        # acquired by the dictionary text values to li 
        text = data.get ( ' Data ' ) .get ( ' Content ' ) 

        # 2. parse the data 
        parse_data (text)

 

Guess you like

Origin www.cnblogs.com/merliah/p/11130418.html