selenium + headless browser

phantomJS is a headless browser, before we finish loading the dynamic load data by selenium operate PhantomJS,

Now PhantomJS have stopped updating, but you can use the Google browser headless browser instead PhantomJS to completion of the operation

Use Google headless browser code examples are as follows:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from time import sleep
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

bro = webdriver.Chrome(chrome_options=chrome_options)

bro.get('https://www.baidu.com')
sleep(3)

print(bro.page_source)
bro.save_screenshot('1.png')

bro.quit()

 

Scroll down to perform the operation

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

bro = webdriver.Chrome(chrome_options=chrome_options)

bro.get(url='https://movie.douban.com/typerank?type_name=%E7%88%B1%E6%83%85&type=13&interval_id=100:90&action=')

time.sleep(3)

bro.save_screenshot ( ' baidu / aiqing.png ' ) 

# so simple straight bro js code, in the end portion rolled analog 
js = ' window.scrollBy (500,100000) ' 
bro.execute_script (js) 
the time.sleep ( . 3 ) 

bro.save_screenshot ( ' baidu / aiqing2.png ' ) 

# acquires the web page code into a file in 
HTML = bro.page_source 
with Open ( ' douban.html ' , ' W ' , encoding = ' UTF8 ' ) AS F: 
    F. the Write (HTML) 

bro.quit ()

Lazy Load picture crawling through the selenium plus Scroll down

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')

bro = webdriver.Chrome(chrome_options=chrome_options)

bro.get(url='http://sc.chinaz.com/tupian/ribenmeinv.html')
time.sleep(2)
with open('lanjiazai.html', 'w', encoding='utf8') as f:
    f.write(bro.page_source)
# bro.save_screenshot('lanjiazai.png')

bro.execute_script('window.scrollBy(0,10000)')
time.sleep(3)
with open('lanjiazai2.html', 'w', encoding='utf8') as f:
    f.write(bro.page_source)
# bro.save_screenshot('lanjiazai2.png')
time.sleep(1)


bro.close()

 

Guess you like

Origin www.cnblogs.com/zhangjian0092/p/11407618.html