python reptile pass through the blackboard lesson - the first hurdle

 

# ! / Usr / bin / Python 
# - * - Coding: UTF-8 - * - 
# Author: LiTianle 
# Time: 2019/9/24 15:36 
'' ' 
<h3> You need to enter the URL in the digital 53639 < / h3> 
<number> you need to enter a next h3 was 10963. </ h3> 
'' ' 
Import Requests, Re 
    
DEF get_num (S): 
    headers = {
         ' the User-- Agent ' : ' the Mozilla / 5.0 (the Windows NT 10.0 ; Win64; x64-) AppleWebKit / 537.36 (KHTML, like the Gecko) the Chrome / 75.0.3770.100 Safari / 537.36 ' , 
    } 
    EX = ' .? <H3> * figures (\ + D) '
    # Get the page content
    = requests.get page_text (URL = S, = headers headers) .text 
    Result = the re.findall (EX, page_text, re.S)
     IF Result:
         # matching digits 
        NUM = Result [0]
         # generates a new URL 
        the URL = ' HTTP : //www.heibanke.com/lesson/crawler_ex00/ ' + NUM
         Print (the URL of)
         return get_num (the URL of)
     the else :
         Print ( ' into being, the next level: HTTP: //www.heibanke.com ' + Re. findAll ( ' <the href = A "(. *) class =" BTN-BTN Primary. * ',page_text,re.S)[0])

if __name__ == '__main__':
    url = 'http://www.heibanke.com/lesson/crawler_ex00/'
    get_num(url)

 

Guess you like

Origin www.cnblogs.com/tianleblog/p/11672675.html