. 1 Import Requests 2 from Selenium Import the webdriver . 3 Import Time . 4 . 5 DEF GRASP (urlT): . 6 Driver = webdriver.Chrome (R & lt ' C: \ Program Files (the x86) \ the Google \ the Chrome \ the Application \ chromedriver.exe ' ) # Automation tool local test program location . 7 resAll = [] # for storing single data . 8 REST} = { # used to store individual data . 9 RES = requests.get (urlT) 10 for I in Range (0,29 ): . 11 Print(res.json()['data'][i]['title']) 12 try: 13 print(res.json()['data'][i]['newsTime']) 14 except: 15 print('None') 16 print(res.json()['data'][i]['source']) 17 print(res.json()['data'][i]['url']) 18 rest['title']=res.json()['data'][i]['title'] 19 try: 20 rest['newsTime'] = res.json()['data'][i]['newsTime'] 21 except: 22 rest['newsTime'] = 'None' 23 rest['source'] = res.json()['data'][i]['source'] 24 url = res.json()['data'][i]['url'] 25 rest['url'] = res.json()['data'][i]['url'] 26 try: 27 driver.get(url) 28 time.sleep(4) 29 contend = driver.find_element_by_class_name('text-3zQ3cZD4').text 30REST [ ' contend ' ] = STR (contend) 31 is Print (contend) 32 driver.back () 33 is the time.sleep (. 6 ) 34 is the except : 35 Print (F ' of failure news {i} ' ) 36 Print ( ' # ----------------------- some formats do not meet --------------------- # --- ' ) 37 [ resAll.append (REST) 38 is with Open ( ' ./news.txt ' , ' A + ' , encoding = ' UTF-. 8 ') as f: 39 try: 40 f.write(''.join(resAll[i].values())+'\n') 41 except: 42 print('写入失败') 43 44 url = "https://shankapi.ifeng.com/spring/finance/index/newInfoIndex/75219" 45 grasp(url) 46 47 48 class Grasp: 49 50 def __init__(self): 51 self.driver = webdriver.Chrome(r'C: \ Program Files (the x86) \ the Google \ the Chrome \ the Application \ chromedriver.exe ' ) 52 is self.resAll = [] # for storing a single data 53 is self.rest} = { # used to store individual data 54 is self.res requests.get = ( " https://shankapi.ifeng.com/spring/finance/index/newInfoIndex/75219 " ) # target links 55 56 DEF RUN (Self): 57 for i in the Range (0, len (Self. res.json () [ ' Data ' ])): 58 Print (self.res.json () [ ' Data ' ] [I] [ 'title ' ]) # Output Header 59 the try : 60 Print (self.res.json () [ ' Data ' ] [I] [ ' newstime ' ]) # output time 61 is the except : 62 is Print ( ' None ' ) 63 is Print ( self.res.json () [ ' Data ' ] [I] [ ' source ' ]) # output source 64 Print (self.res.json () [ ' Data ' ] [I] [' URL ' ]) # output link address 65 self.rest [ ' title ' ] = self.res.json () [ ' Data ' ] [I] [ ' title ' ] # Get Title 66 the try : 67 self.rest [ ' newstime ' ] = self.res.json () [ ' Data ' ] [I] [ ' newstime ' ] # acquisition time 68 the except : 69 self.rest [ ' newstime '] = 'None ' 70 self.rest [ ' Source ' ] = self.res.json () [ ' Data ' ] [I] [ ' Source ' ] # Get source 71 is self.url = self.res.json () [ ' Data ' ] [I] [ ' URL ' ] 72 self.rest [ ' URL ' ] = self.res.json () [ ' Data ' ] [I] [ ' URL ' ] # for a link address 73 is the try : 74 Self. driver.get (url) 75 the time.sleep (. 4 ) 76 self.contend = self.driver.find_element_by_class_name ( ' text-3zQ3cZD4 ' ) .text # text acquisition webpage tag under 77 self.rest [ ' contend ' ] = STR (self.contend) # insert single data 78 Print (F ' of successful news {i} ' ) 79 self.driver.back () 80 the time.sleep (. 4 ) 81 the except : 82 Print (F ' of failure news {i} ' ) 83 Print ( '# ----------------------- Some formats do not meet --------------------- # --- ' ) 84 self.resAll.append (self.rest) 85 with Open ( ' ./news.txt ' , ' A + ' , encoding = ' UTF-. 8 ' ) AS F: 86 the try : 87 F. write (F ' of {i} news starts ' ) 88 f.write ( '' .join (self.resAll [I] .values ()) + ' \ n- ' ) # write data 89 f.write (F ' The first end of the news article {i} ' ) 90 the except : 91 is Print ( ' write failure ' ) 92 93 G = Grasp () 94 g.run ()
There are written rules need to define their own judgment.
We hope to help everyone