Applet 1: Get all blog garden articles published by selenium

Ideas : Open with selenium page for a title by xpath, then click on the next page, so the cycle

 

1  from the Selenium Import webdriver
 2  from Time Import SLEEP
 3  from Copy Import Copy
 4  
5  
6  
7  DEF get_pro_titles (Page):
 8      "" " get the article title blog Park, the latest release of " "" 
9      all_title = dict ()
 10      the Option = webdriver .ChromeOptions ()
 . 11      option.add_argument ( ' --headless ' )
 12 is      option.add_argument ( " --disable-GPU " )
13     option.add_argument("--window-size=1280,800")
14     d = webdriver.Chrome(options=option)
15     d.get('https://www.cnblogs.com')
16     for i in range(1, int(page)+1):
17         one_page_title = []
18         for j in range(1, 21):
19             sleep(2)
20             title = d.find_element_by_xpath('//*[@id="post_list"]/div[{}]/div[2]/h3/a'.format(j)).text
21             # print(title)
22             one_page_title.append(title)
23         p = copy(one_page_title)
24         all_title['第{}页'.format(i)] = p
25         sleep(2)
26         js = 'document.documentElement.scrollTop=10000;'
27         d.execute_script(js)
28         sleep(2)
29         d.find_element_by_xpath('//div[@id="pager_bottom"]/div/div/a[text()="Next >"]').click()
30         one_page_title.clear()
31     sleep(2)
32     d.quit()
33     return all_title
34 
35 
36 print(get_pro_titles(2))

 

Guess you like

Origin www.cnblogs.com/qingy/p/11800800.html