selenium使用，xpath解析模块，# 四套解析数据的方式 # 1.bs4 2.css_selector 3.xpath 4.re

from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options # 导入自定义配置模块

from selenium.webdriver.common.keys import  Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By


chorme_options=Options()

# chrome_options.add_argument('window-size=1920x3000') #指定浏览器分辨率
chrome_options.add_argument('--disable-gpu') #谷歌文档提到需要加上这个属性来规避bug
# chrome_options.add_argument('--hide-scrollbars') #隐藏滚动条, 应对一些特殊页面
chrome_options.add_argument('blink-settings=imagesEnabled=false') #不加载图片, 可以提升速度
chrome_options.add_argument('--headless') #浏览器不提供可视化页面. linux下如果系统如果无界面不加这条会启动失败
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])#取消浏览器驱动提示

# 如果将驱动的路径设置到环境变量中 可以不用传参数
# options 表示配置项

driver=Chrome(options=chrome_option)


driver.get('https://www.baidu.com')

 tag = driver.find_element_by_class_name("index-logo-src")
# tag = driver.find_element_by_css_selector(".index-logo-src")#css选择器
#
#
#
#
#
# print(tag)
#
# #标签相关内容
# print(tag.text)
# #print(tag.parent) # 得到的是driver对象 不是父标签
# print(tag.get_attribute("src"))
# print(tag.tag_name)
#

# 隐式等待
driver.implicitly_wait(10) # 当要查找的某个元素不存在时 会过一会儿在查找一次(轮询)知道找到未知 一致到超过10就报错


# key_input = driver.find_element(By.ID,"kw")


# 找到输入框
key_input = driver.find_element_by_id("kw")
key_input.send_keys("基佬")
key_input.send_keys(Keys.ENTER)




# 显示等待 明确的等待某一个元素 满足某个条件

# 传入 要等到的driver 和 等到超时时间
# 等到页面上出现了一个id为content_left 的元素位为止  最长等10秒
WebDriverWait(driver,10).until(EC.presence_of_element_located((By.ID,"content_left")))
print("============================")

# 获取搜索结果  如果直接获取元素 可能因为页面没有加载完毕 导致获取失败
div = driver.find_element_by_id("content_left")

print(div)

import time
time.sleep(1)

key_input = driver.find_element_by_id("kw")
key_input.clear() # 清空输入框
key_input.send_keys("泰国美女")
key_input.send_keys(Keys.ENTER)

动作链
指的是一系列动作的集合
例如: 滑动验证
1.点击并按住
2.移动鼠标
3.移到指定位置 松手


"""
from selenium.webdriver import Chrome
from selenium.webdriver import ActionChains

driver = Chrome(r"D:\jerry\spiderDay3\selenium模块\chromedriver.exe")
driver.get("http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable")
driver.implicitly_wait(5)


# 切换fream
driver.switch_to.frame("iframeResult")


# 获取请拖拽我这个标签
tag = driver.find_element_by_id("draggable")
print(tag.location)

# 获取目标位置的标签
tag2 = driver.find_element_by_id("droppable")
print(tag2.location)


# x移动距离
# dis = tag2.location["x"] - tag.location["x"]
# 创建一个动作对象
# asc = ActionChains(driver)
# asc.click_and_hold(tag).perform() # perform()表示执行这个动作   点击并按住
# asc.move_to_element(tag2).perform() # 移动标签到tag2的位置
# asc.release().perform() # 松手
# 上述方式 不像人 很容易被判断为机器人(程序)


# 线性移动
asc = ActionChains(driver)
asc.click_and_hold(tag).perform()

# 循环逐渐移动
while tag.location["x"] < tag2.location["x"]:
    ActionChains(driver).move_by_offset(1,0).perform()
asc.release().perform()





# # 如果要访问当前fream 之上(父辈)的内容  需要回到父级fream
# driver.switch_to.parent_frame()
# driver.find_element_by_id("textareaCode")

from selenium.webdriver import Chrome


driver = Chrome(r"D:\jerry\spiderDay3\selenium模块\chromedriver.exe")
driver.get("https://www.baidu.com")

# driver.execute_script("alert('你是杀马特码?')")



# 导航(前进后退)


# driver.get("https://www.baidu.com")
# driver.get("https://www.qq.com")
# driver.get("https://www.sina.com")
# driver.get("https://www.4399.com")
#
#
# driver.back()# 后退
# driver.forward() # 前进


# 切换选项卡
driver.execute_script("window.open()")
# print(driver.window_handles) # 获取所有windows对象
driver.switch_to.window(driver.window_handles[1])

driver.get("https://www.qq.com")
driver.switch_to.window(driver.window_handles[0])

"""
xpath 也是一种用于解析xml文档数据的方式
xml path
"""

doc = """
<?xml version="1.0" encoding="ISO-8859-1"?>
<html>
    <body>
        <bookstore id="test" class="ttt">
        
        <book id= "1" class = "2">
          <title lang="eng">Harry Potter</title>
          <price>29.99</price>
        </book>
        
        <book id = "2222222222222">11111111111111111111
          <title lang="abc">Learning XML</title>
          <price>39.95</price>
        </book>
        
        </bookstore>
    <a></a>
    </body>
</html>
"""
from lxml import etree
html = etree.HTML(doc)
print(html.xpath("/bookstore")) # 从根标签开始找所有匹配的
print(html.xpath("//bookstore")) # 全文中找所有匹配的


# 通配符 *
print(html.xpath("//book"))
print(html.xpath("//*"))


# 获取属性
print(html.xpath("//bookstore/@id"))
print(html.xpath("//bookstore/@*"))


# 嵌套
print(html.xpath("//bookstore/book/title/text()"))



# 加上谓语(条件)   ==========================================================================================

# 指定要获取的索引
# print(html.xpath("//bookstore/book[1]/title/text()")) # 获取第一个
# print(html.xpath("//bookstore/book[last()-1]/title/text()")) # last() 最后一个     last()-1 倒数第二个
# print(html.xpath("//bookstore/book[position()>1]/title/text()")) # 索引大于1的


# print(html.xpath("//book[price > 30]"))
# # xpath 原生 既能查找属性 又能查找标签   而在selenium只能查找标签
#
#
# # 查找price的值大于30的book标签
# e = html.xpath("//book[price > 30]")[0]
# print(type(e))
# from lxml.etree import _Element
# print(e.text) # 访问文本 不包含子标签的文本
# print(e.attrib) # 访问属性


# 用属性来作限制
# 只要存在lang属性即可
print(html.xpath("//*[@lang]"))

# 找的是具备lang并且值为abc的标签
print(html.xpath("//*[@lang='abc']")[0].attrib)

# 只要 有属性即可
print(html.xpath("//*[@*]"))

# 多个匹配条件
print(html.xpath("//title|//price"))




# 轴匹配  (先拿到一个标签 在相对这个标签找其他标签)   ===========================================

print(html.xpath("//bookstore/ancestor::*")) # 所有先辈
print(html.xpath("//bookstore/ancestor::body")) # 所有叫body的先辈
print(html.xpath("//bookstore/ancestor-or-self::*")) # 所有叫body的先辈


# 获取属性
print(html.xpath("//bookstore/attribute::id"))
print(html.xpath("//bookstore/@id"))

# 所有子级标签
print(html.xpath("//bookstore/child::*"))

# 所有后代标签
print(html.xpath("//bookstore/descendant::*"))

# 在这个标签后面的所有标签  与层级无关
print(html.xpath("//book[1]/following::*"))
# 它弟弟们
print(html.xpath("//book[1]/following-sibling::*"))
# 它哥哥们
print(html.xpath("//book[1]/preceding-sibling::*"))


# 获取父级
# print(html.xpath("//book[1]/parent::*"))


# 获取既有id属性 又有class属性的标签
print(html.xpath("//*[@id and @class]"))

selenium使用，xpath解析模块，# 四套解析数据的方式 # 1.bs4 2.css_selector 3.xpath 4.re

猜你喜欢