selenium用法回顾

一、Selenium+PhantomJs

最新版本的Selenium已不再支持PhantomJs,详情参考官网

from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities

def getSource(url):
   # 设置请求头
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4882.400 QQBrowser/9.7.13059.400',
        'referer':'http://www.taobao.com'
    }
    #使用copy()防止修改原代码定义dict
    cap = DesiredCapabilities.PHANTOMJS.copy() 

    for key, value in headers.items():
        cap['phantomjs.page.customHeaders.{}'.format(key)] = value

    # 不载入图片
    cap["phantomjs.page.settings.loadImages"] = False

    driver = webdriver.PhantomJS(desired_capabilities=cap)
    driver.get(url)
    driver.quit()

getSource(url)

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
# 启动PhantomJS
driver = webdriver.PhantomJS()
# 设置User-Agent
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36"
)
driver = webdriver.PhantomJS(desired_capabilities=dcap)
driver.get("http://www.baidu.com")
# assert u"百度" in driver.title
elem = driver.find_element_by_name("wd")
# elem.clear()
# elem.send_keys(u"网络")
#按下回车键
# elem.send_keys(Keys.ENTER)
# time.sleep(3)
# assert u"网络爬虫." not in driver.page_source
# driver.quit()
# data = driver.page_source
#把当前网页保存为图片
# driver.save_screenshot("3.png")
print(elem)
# quit和close的区别
driver.quit()

二、Selenium+Chrome

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

#修改Chrome的User-Agent
options = webdriver.ChromeOptions()
options.add_argument("Accept-Language=zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3")
options.add_argument("Accept=text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
options.add_argument('user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.103 Safari/537.36"')

# 设置图片不加载
# 1:允许所有图片;2:阻止所有图片;3:阻止第三方服务器图片
prefs = {"profile.managed_default_content_settings.images":1}
options.add_experimental_option("prefs",prefs)

driver = webdriver.Chrome(chrome_options=options)
# 设置Cookie
driver.add_cookie({'name': 'key-aaaaaaa'}, {'value': 'value-bbbbb'})
# driver.delete_all_cookies()#删除所有的Cookie

driver.get("http://www.taobao.com")
# 浏览器全屏
driver.maximize_window()

# 编写js代码,控制滚动条向下滑动
js="document.documentElement.scrollTop=11800"
# 执行js代码
driver.execute_script(js)

data2 = driver.page_source.encode("utf-8")
time.sleep(10)
with open("taobao02.html","wb") as f:
	f.write(data2)
time.sleep(5)
driver.quit()


猜你喜欢

转载自blog.csdn.net/weixin_41601173/article/details/80044035