1. Open the Web page https://tophub.today/
2. Press Ctrl + U to open the page source
3. Locate the data you want to crawl
4.
Import Requests Import PANDAS AS pd from BS4 Import BeautifulSoup from PANDAS Import DataFrame LST = [] # create an empty list url = ' https://tophub.today/ ' # URLs crawled pages DEF GET (url): the try : headers = { ' the User-- Agent ' : ' the Mozilla / 5.0 (the Windows NT 10.0; Win64; x64-) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 80.0.3987.122 Safari / 537.36 ' } # disguise reptiles = requests.get URL (URL, timeout = 30, headers = headers) # transmission request url.raise_for_status () url.encoding = ' UTF-. 8 ' return url.text the except : return " abnormal " # Create a data placement folder DEF Create (LST, HTML, NUM): Soup = the BeautifulSoup (HTML, ' html.parser ' ) A = soup.find_all ( ' span ' , the class_ = ' T ' ) B = soup.find_all ( 'span ' , the class_ = ' E ' ) Print ( ' {:} 10 ^ \ T {:} 30 ^ \ T {:} ^ 10 ' .format ( ' Rank ' , ' title ' , ' heat ' )) for I in Range (NUM): Print ( ' {:} 10 ^ \ T {:} 30 ^ \ T {:} ^ 10 ' .format (I +. 1, A [I + 50] .string, B [I + 50 ] .string)) # print out the content crawl lst.append ([I +. 1, a [I + 50] .string, B [I + 50] .string]) # the crawling data into the list = HTML GET (url) Create (LST, HTML, 10 ) DF = pd.DataFrame (LST, Columns = [ ' ranking ' , ' title ' , ' heat ' ]) ZHHot = ' E: \ New Folder \ know almost ten ranked data trending .xlsx ' df.to_excel (ZHHot)
The data crawling