PhantomJS simple to use

PhantomJS Download: http://phantomjs.org/download.html

Simple to use:

from the Selenium Import webdriver
 # To call keys keyboard key operation is necessary to introduce the package 
from selenium.webdriver.common.keys Import Keys
 # calling environment variables specified PhantomJS browser create browser object 
device_path = r ' the I: \ reptile_demo \ PhantomJS-2.1 Windows-.1 \ bin \ phantomjs.exe ' 
Driver = webdriver.PhantomJS (= executable_path the device_path)
 # If no PhantomJS position # driver webdriver.PhantomJS environment variable = (executable_path = "./ PhantomJS")) 
# GET method will wait until the page is fully loaded, and then will continue the program, testing will usually choose here the time.sleep (2) 
driver.get ( " http://www.baidu.com/ " )
 # get the page id named wrapper text labels
= driver.find_element_by_id the Data ( " wrapper " ) .text
 # print data content 
Print (the Data)
 # print the page title "Baidu, you know 
Print (driver.title)
 # generate a snapshot of the current page and save 
driver.save_screenshot ( " baidu .png " )
 # the above mentioned id =" kw "is Baidu search input box, enter the string" street beat " 
driver.find_element_by_id ( " kw " ) .send_keys ( " street beat " )
 # the above mentioned id =" su "is Baidu search button, click () is simulate a click 
driver.find_element_by_id ( " su " ).
the Click () # get the new page snapshot
driver.save_screenshot ( " Street beat .png " )
 # source code printed page rendering 
Print (driver.page_source)
 # Get the current page Cookie 
Print (driver.get_cookies ())
 # Ctrl + A Select the input box contents 
driver.find_element_by_id ( " kW " ) .send_keys (Keys.CONTROL, ' A ' )
 # Ctrl + X shear content input box 
driver.find_element_by_id ( " kW " ) .send_keys (Keys.CONTROL, ' X ' )
 # input box to re-enter content 
driver.find_element_by_id ( " kw ") .send_keys ( " atguigu " )
 # simulate the Enter key 
driver.find_element_by_id ( " su " ) .send_keys (Keys.RETURN)
 # clear the input box contents 
driver.find_element_by_id ( " kw " ) .clear ()
 # create a new snapshot page 
driver.save_screenshot ( " atguigu.png " )
 # get the current url 
Print (driver.current_url)
 # close the current page, if there is only one page, the browser closes driver.close # () 
# close the browser 
driver.quit ( )

Label positioning

find_element_by_id()
find_element_by_name()
find_element_by_class_name()
find_element_by_tag_name()
find_element_by_link_text()
find_element_by_partial_link_text()
find_element_by_xpath()
find_element_by_css_selector()

note:

1, find_element_by_xxx looking for a qualified first label, find_elements_by_xxx looking for all eligible label.

2, according to ID, CSS selectors and XPath acquired, they return the results exactly.

3 In addition, Selenium also provides a general method find_element(), it takes two arguments: Find ways Byand values. In fact, it is the find_element_by_id()generic version of the function of this method, for example find_element_by_id(id)it is equivalent to find_element(By.ID, id)the result obtained both identical.

Crawling watercress

from Selenium Import the webdriver
 from Time Import SLEEP
 Import Time 

IF  the __name__ == ' __main__ ' : 
    URL = ' https://movie.douban.com/typerank?type_name=%E6%81%90%E6%80%96&type=20&interval_id= 100: 90 & Action = ' 
    # before making a request, can make the page url represents the dynamic loading more data 
    path = r ' the I: \ reptile_demo \ PhantomJS-2.1.1-Windows \ bin \ phantomjs.exe ' 
    # create a faceless the browser object 
    Bro = webdriver.PhantomJS (path)
     # launch url request 
    bro.get (url)
    the time.sleep ( 3)
     # Screenshot 
    bro.save_screenshot ( ' 1.png ' ) 

    # execute js code (so that the scroll strip offset n pixels (the downward: dynamic loading more movie information)) 
    js = ' the window.scrollTo (0, document.body.scrollHeight) ' 
    bro.execute_script (js)   # this function may be performed in the form of a set of strings js code 
    the time.sleep (2 ) 

    bro.execute_script (js)   # this function may be performed in the form of a set of strings js Code 
    the time.sleep (2 ) 
    bro.save_screenshot ( ' 2.png ' ) 
    the time.sleep ( 2 )
     # using crawlers creeping current content in the url
    = bro.page_source html_source #This property may be acquired current browser source of this page (HTML) 
    with Open ( ' ./source.html ' , ' W ' , encoding = ' UTF-. 8 ' ) AS FP: 
        fp.write (html_source) 
    bro.quit ()