Crawler notes: Detailed explanation of Python Selenium

1. Introduction
Selenium is an automated testing tool for testing websites. It supports various browsers including mainstream interface browsers such as Chrome, Firefox, and Safari, and it also supports phantomJS interfaceless browsers.
Support multiple operating systems such as Windows, Linux, IOS, Android, etc.
2. Install
pip install Selenium
3. Install the browser driver
When selenium is upgraded to 3.0, different browser drivers are standardized. If you want to use selenium to drive different browsers, you must download and set up different browser drivers separately.

Download address of each browser:

Firefox browser driver: https://github.com/mozilla/geckodriver/releases
Chrome browser driver: link 1
link 2

IE browser driver: http://selenium-release.storage.googleapis.com/index.html
Edge browser driver: https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/
Opera Browser driver: https://github.com/operasoftware/operachromiumdriver/releases
PhantomJS browser driver: https://phantomjs.org/

1. Basic use

from selenium import webdriver#浏览器驱动对象
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)#声明一个浏览器对象
try:
    browser.get('https://www.baidu.com')#访问网址
    input = browser.find_element_by_id('kw')
    input.send_keys('Python')#在键盘里输入python
    input.send_keys(Keys.ENTER)#输入回车
    wait = WebDriverWait(browser, 10)#等待10秒
    wait.until(EC.presence_of_element_located((By.ID, 'content_left')))#等待ID为content_left加载出来
    print(browser.current_url)
    print(browser.get_cookies())
    print(browser.page_source)#源代码
finally:
    browser.close()

2. Declare the browser object

from selenium import webdriver

browser = webdriver.Chrome()
browser = webdriver.Firefox()
browser = webdriver.Edge()
browser = webdriver.PhantomJS()
browser = webdriver.Safari()

3. Visit the page

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.taobao.com')
print(browser.page_source)#打印源代码
#browser.close()

The browser will automatically open the Taobao page, if you want to close it, you can add browser.close() at the end.
The browser Taobao page will flash by

page_source

Find element
4. Find a single element

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.taobao.com')
input_first = browser.find_element_by_id('q')
input_second = browser.find_element_by_css_selector('#q')
input_third = browser.find_element_by_xpath('//*[@id="q"]')
print(input_first, input_second, input_third)
browser.close()

Positioning element method

General method

from selenium import webdriver
from selenium.webdriver.common.by import By
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.taobao.com')
input_first = browser.find_element(By.ID, 'q')#通过id
print(input_first)
browser.close()

5. Find multiple elements

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.taobao.com')
lis = browser.find_elements_by_css_selector('.service-bd li')
print(lis)
browser.close()

Selected is the left navigation bar label

General writing

from selenium import webdriver
from selenium.webdriver.common.by import By

path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.taobao.com')
lis = browser.find_elements(By.CSS_SELECTOR, '.service-bd li')
print(lis)
browser.close()

Element interaction operation
6. Call the interaction method on the acquired element

from selenium import webdriver
import time

path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.taobao.com')
input = browser.find_element_by_id('q')#捕捉输入框
input.send_keys('iPhone')#在输入框里输入iphone
time.sleep(1)#等待1秒
input.clear()#清除
input.send_keys('iPad')#再次输入ipad
button = browser.find_element_by_class_name('btn-search')#搜索按钮
button.click()#点击按钮

The result is shown in the figure

The analysis input box id is q and

the class of the click button is btn-search

More operations: http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement

Interactive operation
7. Interactive action, attach the action to the action chain for serial execution

from selenium import webdriver
from selenium.webdriver import ActionChains
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element_by_css_selector('#draggable')
target = browser.find_element_by_css_selector('#droppable')
actions = ActionChains(browser)
actions.drag_and_drop(source, target)
actions.perform()

More actions

Execute JavaScript

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录

browser = webdriver.Chrome(path)
browser.get('https://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')

Get element information
9. Get attributes

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录

from selenium.webdriver import ActionChains

browser = webdriver.Chrome(path)
url = 'https://blog.csdn.net/KOBEYU652453/article/details/113743933'
browser.get(url)
logo = browser.find_element_by_id('article_content')#定位
print(logo)
print(logo.get_attribute('class'))#获取logo的class

result

Class in the webpage

10. Get the text value

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录

browser = webdriver.Chrome(path)
url = 'https://blog.csdn.net/KOBEYU652453/article/details/113743933'
browser.get(url)

input = browser.find_element_by_class_name('article_content')
print(input.text)

11. Get ID, location, tag name, size

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录

browser = webdriver.Chrome(path)
url = 'https://blog.csdn.net/KOBEYU652453/article/details/113743933'
browser.get(url)

input = browser.find_element_by_class_name('article_content')
print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)

Frame

path='D:\chromedriver_win32\chromedriver.exe'#驱动目录

import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException

browser = webdriver.Chrome(path)
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element_by_css_selector('#draggable')
print(source)
try:
    logo = browser.find_element_by_class_name('logo')
except NoSuchElementException:
    print('NO LOGO')
browser.switch_to.parent_frame()
logo = browser.find_element_by_class_name('logo')
print(logo)
print(logo.text)

wait

When the implicit wait is used to execute the test, if WebDriver does not find the element in the DOM, it will continue to wait. After the set time is exceeded, it will throw an exception that the element cannot be found. In other words, when the element or element is not found When it appears immediately, the implicit wait will wait for a while before searching the DOM. The default time is 0

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录

browser = webdriver.Chrome(path)

browser.implicitly_wait(10)
browser.get('https://www.zhihu.com/explore')
input = browser.find_element_by_class_name('zu-top-add-question')
print(input)

Show waiting

path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

browser = webdriver.Chrome(path)
browser.get('https://www.taobao.com/')
wait = WebDriverWait(browser, 10)
input = wait.until(EC.presence_of_element_located((By.ID, 'q')))#等待条件
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
print(input, button)

The waiting conditions are as follows

* title_is 标题是某内容
* title_contains  标题包含某内容
* presence_of_element_located 元素加载出，传入定位元组，如(By.ID, 'p')
* visibility_of_element_located  元素可见，传入定位元组
* visibility_of 可见，传入元素对象
* presence_of_all_elements_located 所有元素加载出
* text_to_be_present_in_element 某个元素文本包含某文字
* text_to_be_present_in_element_value  某个元素值包含某文字
* frame_to_be_available_and_switch_to_it  frame加载并切换
* invisibility_of_element_located 元素不可见
* element_to_be_clickable  元素可点击
* staleness_of  判断一个元素是否仍在DOM，可判断页面是否已经刷新
* element_to_be_selected  元素可选择，传元素对象
* element_located_to_be_selected  元素可选择，传入定位元组
* element_selection_state_to_be  传入元素对象以及状态，相等返回True，否则返回False
* element_located_selection_state_to_be  传入定位元组以及状态，相等返回True，否则返回False
* alert_is_present 是否出现Alert

Wait for details

forward, backward

import time
from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.baidu.com/')
browser.get('https://www.taobao.com/')
browser.get('https://www.python.org/')
browser.back()
time.sleep(1)
browser.forward()
browser.close()

Cookies

from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())#打印cookies
browser.add_cookie({
    
    'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})#添加cookies
print(browser.get_cookies())
browser.delete_all_cookies()#删除所有cookies
print(browser.get_cookies())

Tab management

import time
from selenium import webdriver
path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
browser = webdriver.Chrome(path)
browser.get('https://www.baidu.com')
browser.execute_script('window.open()')
print(browser.window_handles)
browser.switch_to_window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to_window(browser.window_handles[0])
browser.get('https://python.org')

Exception handling

path='D:\chromedriver_win32\chromedriver.exe'#驱动目录
from selenium import webdriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException

browser = webdriver.Chrome(path)
try:
    browser.get('https://www.baidu.com')
except TimeoutException:
    print('Time Out')
try:
    browser.find_element_by_id('hello')
except NoSuchElementException:
    print('No Element')
finally:
    browser.close()

Detailed documentation
http://selenium-python.readthedocs.io/api.html#module-selenium.common.exceptions

Insert picture description here

Crawler notes: Detailed explanation of Python Selenium

Guess you like