下列是有关PhantomJS的学习网站:
- PhantomJS官方地址:http://phantomjs.org/
PhantomJS官方API:http://phantomjs.org/api/
PhantomJS官方示例:http://phantomjs.org/examples/PhantomJS
GitHub:https://github.com/ariya/phantomjs/
中文:https://selenium-python-zh.readthedocs.io/en/latest/index.html
'''
官方 : http://phantomjs.org/api/command-line.html
--disk-cache=[true|false] 缓存设置
--ignore-ssl-errors=[true|false] 忽略ssl错误,用于访问https网页时,需要登录
--load-images=[true|false] 加载图片
--proxy=address:port 设置代理
'''
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.common.proxy import ProxyType
from selenium.webdriver.common import desired_capabilities
#增加头信息
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "zh-CN,en,*",
"Accept-Charset": "utf-8",
"User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/538.1 (KHTML, like Gecko) PhantomJS/2.1.1 Safari/538.1"
}
for key in headers:
webdriver.DesiredCapabilities.PHANTOMJS['phantomjs.page.customHeaders.{}'.format(key)] = headers[key]
proxy = Proxy( {
'proxyType':ProxyType.MANUAL,
'httpProxy':'212.90.168.150:52589'
} )
desired_capabilities = desired_capabilities.DesiredCapabilities.PHANTOMJS.copy()
#把代理ip加入到技能中
proxy.add_to_capabilities(desired_capabilities)
#禁止加载图片
desired_capabilities["phantomjs.page.settings.loadImages"]=False
#设置请求cookie
desired_capabilities["phantomjs.page.customHeaders.Cookie"]='_gauges_unique_month=1; …ear=1; _gauges_unique=1'
browser = webdriver.PhantomJS( desired_capabilities=desired_capabilities )
url=u'http://httpbin.org/get'
browser.get(url)
print( browser.page_source)
browser.close()
browser.quit()