. 1 Import Requests 2 from Selenium Import the webdriver . 3 Import Time . 4 DEF GRASP (urlT): . 5 Driver = webdriver.Chrome (R & lt ' C: \ Program Files (the x86) \ the Google \ the Chrome \ the Application \ chromedriver.exe ' ) # automated testing local programming tools location . 6 resAll = [] # for storing single data . 7 REST} = { # used to store individual data . 8 URLs = [] . 9 RES = requests.get (urlT) 10 for I in range(0,29): 11 12 print(f'第{i+1}条新闻开始') 13 print(res.json()['data'][i]['title']) 14 try: 15 print(res.json()['data'][i]['newsTime']) 16 except: 17 print('None') 18 print(res.json()['data'] [I] [ ' Source ' ]) . 19 REST [ ' title ' ] = res.json () [ ' Data ' ] [I] [ ' title ' ] 20 is the try : 21 is REST [ ' newstime ' ] = res.json () [ ' Data ' ] [I] [ ' newstime ' ] # some time is not 22 is the except : 23 is REST [ ' newstime ' ] = 'None' 24 rest['source'] = res.json()['data'][i]['source'] 25 url = res.json()['data'][i]['url'] 26 rest['url'] = res.json()['data'][i]['url'] 27 28 try: 29 30 driver.get(url) 31 time.sleep(4) 32 contend driver.find_element_by_class_name = ( ' text-3zQ3cZD4 ' ) .text 33 is REST [ ' contend ' ] = STR (contend) 34 is Print (F ' of {i + 1} end news ' ) 35 the time.sleep (. 6 ) 36 the except : 37 [ REST [ ' contend ' ] = ' nested ' 38 is the time.sleep (. 6 ) 39 L = driver.find_elements_by_xpath ( "P // [@ class = 'text-3YbAxaNR'] " ) # Get the number of connections 40 S = driver.find_elements_by_xpath ( " // P [@ class = 'text-3YbAxaNR'] / A " ) # Get the current page of all link 41 is for J in Range (0, len (L)): 42 is SS = S [J] .get_attribute ( ' the href ' ) 43 is Print (type (SS)) 44 is the try : 45 urls.append (STR (STR (SS ) .split ()). Replace ( ' " ' , '' ) .replace ( " '", "" ) .Replace ( ' [ ' , '' ) .replace ( ' ] ' , '' )) # links may be stored into a string 46 is Print (URLs) 47 the except : 48 Print (driver.find_element_by_class_name ( ' Topic--3bY8Hw. 9 ' ) .text) # output header 49 resAll.append (REST) 50 with Open ( ' ./news.txt ' , ' A + ' , encoding = ' UTF-. 8 ') as f: 51 try: 52 f.write(''.join(resAll[i].values())+'\n') 53 except: 54 print('写入失败') 55 56 57 resAll.clear() 58 print(urls) 59 for k in range(0,len(urls)): 60 try: 61 driver.get(urls[k]) 62 # time.sleep(3) 63 rest['title1'] = driver.find_element_by_class_name('topic-3bY8Hw-9').text 64 rest['source1'] = driver.find_element_by_class_name('source-2pXi2vGI').text 65 rest['newsTime1'] = driver.find_element_by_xpath('//p[@class="time-hm3v7ddj"]/span').text 66 rest['contend1'] = driver.find_element_by_class_name('text-3zQ3cZD4').text 67 resAll.append (REST) 68 the time.sleep (. 4 ) 69 with Open ( ' ./news.txt ' , ' A + ' , encoding = ' UTF-. 8 ' ) AS F: 70 the time.sleep (. 5 ) 71 is F. Write ( '' .join (resAll [K] .values ()) + ' \ n- ' ) 72 the except : 73 is Print ( ' too much content, the server is prohibited ' ) 74 75 76 URL = "https://shankapi.ifeng.com/spring/finance/index/newInfoIndex/75219 " # Finance API 77 T = GRASP (URL)
Has achieved the nesting get information on the website,
Can be used directly