. 1 Import Requests 2 from BS4 Import the BeautifulSoup . 3 Import BS4 . 4 DEF gethtmltext (URL): # Get html content, using the try and except frame may throw an exception . 5 try : . 6 R & lt requests.get = (URL, timeout = 30 ) # Get url, the time is 30 seconds . 7 r.raise_for_status () # 200 checks whether the connection state, i.e. the normal connection, except as otherwise if the thrown exception . 8 r.encoding = r.apparent_encoding # determines the encoding . 9 return r.text # returns html content of 10 the except : . 11 return '' 12 is 13 is 14 DEF fillunivlist (Ulist, html): # parse html content, extracts the desired data is Ulist an empty list, for accessing the desired data parsed 15 Soup = the BeautifulSoup (html, ' html.parser ' ) using a # beautifulsoup to resolve the html.parser r.text 16 for tr in soup.find ( ' tbody ' ) .children: # td tag analysis found the desired ordering, school name, out of the tr tag present in the label-like in tbody .children is to find tbody subclass 17 IF isinstance (tr, bs4.element.Tag): # because tr tag contains other string, and we just need bs4.element.Tag tag types, so use isinstance (obj, class ) for a determination 18 is TDS = tr ( ' td ' ) is determined if # is true tag lookup is performed td tr tag, and assigned to TDS . 19 ulist.append ([tds [0] .string, tds [. 1] .string, tds [2 ] .string]) # data is added to the tds Ulist 20 is 21 is DEF printunivlist (Ulist, NUM): # formatted output content 22 is Print ( ' {:} 10 ^ \ T {:}. 6 ^ \ T {:} ^ 10 ' .format ( ' rank ' , ' school name ' , ' out ' )) 23 is for I in Range (NUM) : 24 U = Ulist [I] 25 Print ( ' {:} 10 ^ \ T {:}. 6 ^ \ T {:} ^ 10 ' .format (U [0], U [. 1], U [2 ]) ) 26 27 28 DEF main (): # define a main function, and outputs to achieve specific grab 29 uInfo = [] 30 URL = ' http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html ' 31 is HTML = gethtmltext (URL) 32 fillunivlist (uInfo, HTML) 33 is printunivlist (uInfo, 20 is ) 34 is 35 main ()
The effect is as follows: