day03 crawling peas

from BS4 Import the BeautifulSoup 
Import Requests 
# HTTPS request URL: // www.wandoujia.com/category/6001 
# mode request: GET 

DEF have_title (Tag): 
    IF tag.name == ' span ' and tag.has_attr ( " title " ):
         return Tag 

# acquires the web page 
DEF get_page (url): 
    index_res = Requests. gET (url)
     return index_res 
# parse pages 
DEF parse_detail (HTML): 
    Soup = BeautifulSoup (HTML, ' lxml ' ) 
    List= soup.find_all(name='li',class_='card')

    data = ""
    for i in list:
        app_name = i.a.img.attrs['alt']
        detail_url = i.a.attrs['href']
        download_num = i.find(name='div',class_='meta').find(class_='install-count').text
        app_size =  i.find(name='div ' , the class_ = ' Meta ' ) .find (have_title) .text 
        Data + = F "" "
               Name: {app_name} 
              details page url: {detail_url} 
              number of downloads: {} download_num 
               App Size: APP_SIZE {} 
                
                " ""
     return data 

# save data 
DEF save_games (data): 
    with Open ( ' games.txt ' , ' W ' , encoding = ' UTF-. 8 ' ) AS F: 
        f.write (data) 

IF the __name__ == '__main__':
    url = 'https://www.wandoujia.com/category/6001'
    index_res = requests.get(url)
    index_detail = index_res.text
    data = parse_detail(index_detail)
    save_games(data)

 

Guess you like

Origin www.cnblogs.com/zzf0601/p/11129414.html