'' ' Crawling peas app data - a request URL PAGE2: https://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page=2&ctoken=vbw9lj1sRQsRddx0hD-XqCNF ' '' Import Requests from BS4 Import the BeautifulSoup Import Re '' ' crawler trilogy ' '' # 1 sends a request DEF the get_page (URL): Response = requests.get (URL) return Response # 2. analysis data DEF parse_data (text): Soup = the BeautifulSoup ( text, ' lxml ') # print(soup) li_list = soup.find_all(name='li', class_="card") # print(li_list) for li in li_list: # print(li) # print('tank' * 100) app_name = li.find(name='a', class_="name").text # print(app_name) app_url = li.find(name='a', class_="name").attrs.get('href' ) # Print (APP_URL) download_num = li.find (name = ' span ' , the class_ = " the install-COUNT " ) .text # Print (download_num) APP_SIZE = li.find (name = ' span ' , attrs = { " title " : the re.compile ( ' \ D + MB ' .)}) text # Print (APP_SIZE) App_Data = '' ' game name: {} game address: {} number of downloads: {} game size: {} \ n- ' ''.format (APP_NAME, APP_URL, download_num, APP_SIZE) Print (App_Data) with Open ( ' wandoujia.txt ' , ' A ' , encoding = ' UTF-. 8 ' ) AS F: f.write (App_Data) f.flush () IF the __name__ == ' __main__ ' : for Line in Range (. 1, 2 ): URL = ' ' .format (Line) Print (URL) # 1. send request # fetch response data transmission request to the interfacehttps://www.wandoujia.com/wdjweb/api/category/more?catId=6001&subCatId=0&page={}&ctoken=vbw9lj1sRQsRddx0hD-XqCNF = Response the get_page (URL) # Print (response.text) # Import json # json.loads (response.text) # Print (type (response.json ())) # Print ( 'Tank' * 1000) # the data json python dictionary format into Data = response.json () # Print (Data [ 'State']) # acquired by the dictionary text values to li text = data.get ( ' Data ' ) .get ( ' Content ' ) # 2. parse the data parse_data (text)