官方文档:Splinter — Splinter 0.10.0 documentation
from splinter import Browser # 导入包
browser = Browser() # 创建一个实例
# if you don’t provide any driver to the Browser function, firefox will be used
# 如果没有提供任何浏览器驱动默认使用火狐浏览器驱动(需安装火狐浏览器)
# 谷歌浏览器驱动地址: http://chromedriver.storage.googleapis.com/index.html
或者这样使用,退出with自动关闭浏览器
from splinter import Browser
with Browser() as b:
# stuff using the browser
使用其他驱动创建实例
browser = Browser('chrome')
browser = Browser('firefox')
browser = Browser('zope.testbrowser')
导航到某个URL
browser.visit('http://cobrateam.info')
浏览器窗口管理
# 所有打开的窗口
browser.windows # all open windows
# 第一个窗口
browser.windows[0] # the first window
# 名字为window_name值的窗口
browser.windows[window_name] # the window_name window
# 当前窗口
browser.windows.current # the current window
# 设置第三个窗口为当前窗口
browser.windows.current = browser.windows[3] # set current window to window 3
window = browser.windows[0]
# 返回一个布尔值表示当前这个窗口对象是否是活动窗口
window.is_current # boolean - whether window is current active window
# 设置window为当前活动窗口
window.is_current = True # set this window to be current window
# 获取window的下一个窗口
window.next # the next window
# 获取window的上一个窗口
window.prev # the previous window
# 关闭这个窗口
window.close() # close this window
# 关闭其他窗口
window.close_others() # close all windows except this one
重新加载页面
browser.reload()
跳转历史
browser.visit('http://cobrateam.info')
browser.visit('https://splinter.readthedocs.io')
# 上一个历史记录
browser.back()
# 下一个历史记录
browser.forward()
网页内容操作
# You can get the title of the visited page using the title attribute:
browser.title
# You can use the html attribute to get the html content of the visited page:
browser.html
# The visited page’s url can be accessed by the url attribute:
browser.url
# You can pass a User-Agent header on Browser instantiation.
b = Browser(user_agent="Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en)")
splinter 选择器提供了6个方法来定位页面元素,css, xpath, tag, name, id, value, text.
browser.find_by_css('h1')
browser.find_by_xpath('//h1')
browser.find_by_tag('h1')
browser.find_by_name('name')
browser.find_by_text('Hello World!')
# 返回一个元素,其他返回一个元素列表
browser.find_by_id('firstheader')
browser.find_by_value('query')
你可以通过fist、last、index的方式访问选择器返回的元素列表
# Each of these methods returns a list with the found elements.
# You can get the first found element with the first shortcut:
first_found = browser.find_by_name('name').first
# There’s also the last shortcut – obviously, it returns the last found element:
last_found = browser.find_by_name('name').last
# You also can use an index to get the desired element in the list of found elements:
second_found = browser.find_by_name('name')[1]
A web page should have only one id, so the find_by_id method returns always a list with just one element.
寻找超链接的一些方法
#根据描述,即<a>和</a>标签中间的内容,直接做内容匹配
links_found = browser.find_link_by_text('Link for Example.com')
# 不同于上面的这个是做正则匹配
links_found = browser.find_link_by_partial_text('for Example')
# 根据href属性做内容匹配
links_found = browser.find_link_by_href('http://example.com')
# 根据href属性做正则匹配
links_found = browser.find_link_by_partial_href('example')
If an element is not found, the find_ methods return an empty list. But if you try to access an element in this list, the method will raise the splinter.exceptions.ElementDoesNotExist exception.*
对定位到的一些元素的事件触发
# 触发某个元素的点击事件
browser.find_by_tag('h1').click()
# 双击事件
browser.find_by_tag('h1').double_click()
# 右键点击
browser.find_by_tag('h1').right_click()
you can drag an element and drop it to another element! The example below drags the <h1>…</h1> element and drop it to a >container element (identified by a CSS class).
draggable = browser.find_by_tag('h1')
target = browser.find_by_css('.container')
draggable.drag_and_drop(target)
获取元素的值
# method one
browser.find_by_css('h1').first.value
# method two
element = browser.find_by_css('h1').first
element.value
点击一个链接、按钮
# link
browser.click_link_by_href('http://www.the_site.com/my_link')
browser.click_link_by_partial_href('my_link')
browser.click_link_by_text('my link')
browser.click_link_by_partial_text('part of link text')
browser.click_link_by_id('link_id')
#button
browser.find_by_name('send').first.click()
browser.find_link_by_text('my link').first.click()
cookie操作
# 添加
browser.cookies.add({'whatever': 'and ever'})
# 获取所有cookie
browser.cookies.all()
# 删除
browser.cookies.delete('mwahahahaha') # deletes the cookie 'mwahahahaha'
# 删除多个
browser.cookies.delete('whatever', 'wherever') # deletes two cookies
# 删除所有
browser.cookies.delete() # deletes all cookies
结语:splinter很强大,还有很多api没有整理,请自行去官网查看,链接在这里