learning python reptile 1

. 1  Import Requests
 2  from BS4 Import the BeautifulSoup
 . 3  Import BS4
 . 4  DEF gethtmltext (URL): # Get html content, using the try and except frame may throw an exception
 . 5      try :
 . 6          R & lt requests.get = (URL, timeout = 30 ) # Get url, the time is 30 seconds
 . 7          r.raise_for_status () # 200 checks whether the connection state, i.e. the normal connection, except as otherwise if the thrown exception
 . 8          r.encoding = r.apparent_encoding # determines the encoding
 . 9          return r.text # returns html content of
 10      the except : 
 . 11          return  '' 
12 is         
13 is  
14  DEF fillunivlist (Ulist, html): # parse html content, extracts the desired data is Ulist an empty list, for accessing the desired data parsed
 15      Soup = the BeautifulSoup (html, ' html.parser ' ) using a # beautifulsoup to resolve the html.parser r.text
 16      for tr in soup.find ( ' tbody ' ) .children: # td tag analysis found the desired ordering, school name, out of the tr tag present in the label-like in tbody .children is to find tbody subclass
 17          IF isinstance (tr, bs4.element.Tag): # because tr tag contains other string, and we just need bs4.element.Tag tag types, so use isinstance (obj, class ) for a determination
 18 is              TDS = tr ( ' td ' ) is determined if # is true tag lookup is performed td tr tag, and assigned to TDS
 . 19             ulist.append ([tds [0] .string, tds [. 1] .string, tds [2 ] .string]) # data is added to the tds Ulist
 20 is  
21 is  DEF printunivlist (Ulist, NUM): # formatted output content
 22 is      Print ( ' {:} 10 ^ \ T {:}. 6 ^ \ T {:} ^ 10 ' .format ( ' rank ' , ' school name ' , ' out ' ))
 23 is      for I in Range (NUM) :
 24          U = Ulist [I]
 25          Print ( ' {:} 10 ^ \ T {:}. 6 ^ \ T {:} ^ 10 ' .format (U [0], U [. 1], U [2 ]) )
 26         
27      
28  DEF main (): # define a main function, and outputs to achieve specific grab
 29      uInfo = []
 30      URL = ' http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html ' 
31 is      HTML = gethtmltext (URL)
 32      fillunivlist (uInfo, HTML)
 33 is      printunivlist (uInfo, 20 is )
 34 is                  
35 main ()

The effect is as follows:

 

Guess you like

Origin www.cnblogs.com/dataxiong/p/11711786.html