selenium学习〈Python3网络爬虫开发实战〉

仅做记录

#动态渲染页面爬取(selenium)

#1)模块导入

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver import ActionChains
from selenium.common.exceptions import NoSuchElementException, TimeoutException

# 2)声明浏览器对象
browser = webdriver.Chrome()
browser = webdriver.Firefox()
browser = webdriver.Edge()
browser = webdriver.PhantomJS()
browser = webdriver.Safari()

# 3)访问页面
browser.get('https://www.jd.com')
browser.close()

# 4)查找节点
# 单个节点
input_first = browser.find_element_by_id('q')
input_first = browser.find_element(By.ID, 'q')
input_second = browser.find_element_by_class_name('#q')
input_third = browser.find_element_by_xpath('//*[@id="q"]')

# 多个节点
lis = browser.find_elements_by_css_selector('.service-bd li')

# 5)节点交互
input = browser.find_element_by_id('q')
input.send_keys('iPhone')
button = browser.find_element_by_class_name('btn-search')
button.click()

# 6)动作链
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element_by_css_selector('#draggable')
target = browser.find_element_by_css_selector('#droppable')
actions = ActionChains(browser)
actions.drag_and_drop(source, target)
actions.perform()

# 7)执行JavaScript
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')

# 8)获取节点信息
# 获取属性
logo = browser.find_element_by_id('zh-top-link-logo')
print(logo)
print(logo.get_attribute('class'))

# 获取文本值
input = browser.find_element_by_class_name('zu-top-add-question')
print(input.text)

# 获取id,位置,标签名和大小
input = browser.find_element_by_class_name('zu-top-add-question')
print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)

# 9)切换 Frame
browser = webdriver.Chrome()
url = 'https://mail.163.com/'
browser.get(url)

browser.switch_to.frame('scoreIndexPopIfm')
try:
    logo = browser.find_element_by_id('scoreIndexPop')
except NoSuchElementException:
    print('NO LOGO')
browser.switch_to.parent_frame()
logo = browser.find_element_by_id('scoreIndexPop')
print(logo)
print(logo.text)

# 10)延时等待
# 隐式等待(注意代码位置)
browser = webdriver.Chrome()
browser.implicitly_wait(10)
url = 'https://www.zhihu.com/explore'
browser.get(url)
input = browser.find_element_by_class_name('zu-top-add-question')
print(input)

# 显示等待
browser = webdriver.Chrome()
browser.get('https://www.jd.com')
wait = WebDriverWait(browser, 10)
input = wait.until(EC.presence_of_all_elements_located((By.ID, 'key')))
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.button')))
print(input, button)

# 等待条件
# presence_of_element_located : 节点加载出来 ,传入定位元组,如(By.ID, 'p')
# element_to_be_clickable : 节点可点击,也传入定位元组。

# 11)前进和后退
import time
browser.get('https://www.baidu.com')
browser.get('https://www.jd.com')
browser.get('https://www.python.org')
browser.back()
time.sleep(1)
browser.forward()
browser.close()

# 12)Cookies,获取,添加,删除cookies
browser = webdriver.Firefox()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germy'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())  # cookiew清空了

# 13) 选项卡管理
browser.execute_script('window.open()') # 在浏览器中打开一个新选项卡
print(browser.window_handles) # 这里调用window_handlers属性获取当前开启的所有选项卡,返回的是选项卡的代号列表
browser.switch_to.window(browser.window_handles[1]) # 切换到第二个选项卡
browser.get('https://www.taobao.com') # 在第二个选项卡中淘宝网页
time.sleep(1)
browser.switch_to.window(browser.window_handles[0]) # 切换到第一个选项卡
browser.get('https://www.python.org') # 在第二个选项卡中打开python官网

# 14)异常处理
browser = webdriver.Chrome()
try:
    browser.get('https://www.baidu.com')
except TimeoutException:
    print('Time out')
try:
    browser.find_element_by_id('hello')
except NoSuchElementException:
    print('No Element')
finally:
    browser.close()

猜你喜欢

转载自blog.csdn.net/sisqzy86/article/details/84203409