Selenium 基本功能

#driver.quit()
'''
from selenium import webdriver
from selenium.webdriver.chrome.service import Service

# 尝试传参
s = Service("chromedriver.exe")
driver = webdriver.Chrome(service=s)


driver.get('https://www.baidu.com/')
input()
'''
#1/导入Selenium库
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import selenium.webdriver.support.ui as ui
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

#2创建一个浏览器实例
driver_path='/Users/yanghao31/Desktop/software/chromedriver.exe'
options=Options()
options.add_argument("--headless")
driver = webdriver.Chrome(options=options)

#3/使用浏览器实例访问网页并模拟人类操作
driver.get('http://www.baidu.com')

wait = WebDriverWait(driver,10,0.2)#等待渲染
elem = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'.s_ipt')))   #查找输入框
elem.send_keys('Hello world')  #模拟输入搜索项

clickEle=wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'.s_btn_wr > input')))

clickEle.click()


driver.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(3)
# 向上滚动到顶部
driver.execute_script('window.scrollTo(0,0)')
time.sleep(3)
# 4，获取网页中的动态加载内容
print(driver.page_source)

# 5，关闭浏览器
driver.quit()

# 启动Chrome
import os

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
import requests
from parsel import  Selector
from fake_useragent     import UserAgent

def getInformation(html,cssString):
    sel=Selector(text=html)
    itmes=sel.css(cssString).getall()
    return itmes


def getactle(html,saveDir):
    cssactle='.ChapterContent_bible-reader__Du4dP'
    actile=getInformation(html,cssactle)[0]

    cssTitle='.ChapterContent_reader__UZc2K h1::text'
    title=getInformation(actile,cssTitle)[0]
    text=''
    Cssrow='.ChapterContent_p___9Q1l, .ChapterContent_heading__RjEme'
    rows=getInformation(actile,Cssrow)


    for row in rows:

        cssText='.ChapterContent_content__dkdqo *::text'
        texts=getInformation(row,cssText)
        if len(texts)==0:
            cssText = '.ChapterContent_heading__RjEme *::text'
            texts = getInformation(row, cssText)
        textFinal=''.join(texts)
        text+=textFinal+'\n'

    savePath=os.path.join(saveDir,title+'.txt')
    with open(savePath,'w',encoding='utf-8') as file:
        file.write(title)
        file.write(text)

    return title,text

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
def GetHtml_run(url):
    options = webdriver.ChromeOptions()
    options.add_experimental_option("detach", True)
    options.add_argument(f'user-agent={
      
      UserAgent().random}')
    service = Service(executable_path='/Users/yanghao31/Desktop/software/chromedriver_mac_arm64/chromedriver')
    driver = webdriver.Chrome(service=service)
    print('准备完毕')

    # 设置页面加载超时时间为10秒钟
    driver.set_page_load_timeout(30)

    try:
        driver.get(url)
        # 等待页面完全加载完成
        driver.implicitly_wait(30)
        #WebDriverWait(driver, 30).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
        html = driver.page_source
    except TimeoutException:
        print("页面加载超时")
        html = ''
    driver.quit()
    return html
from selenium.webdriver.chrome.options import Options
def GetHtml_TitleAndDes(url):
    options = Options()
    options.add_argument('--headless')
    options.add_argument(f'user-agent={
      
      UserAgent().random}')
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=options)
    print('准备完毕')

    # 设置页面加载超时时间为10秒钟
    driver.set_page_load_timeout(30)

    try:
        driver.get(url)
        # 等待页面完全加载完成
        driver.implicitly_wait(30)
        #WebDriverWait(driver, 30).until(lambda driver: driver.execute_script('return document.readyState') == 'complete')
      #  html = driver.page_source
        # 获取网站标题
        title = driver.title

        # 获取网站描述
        description = driver.find_element(by=By.CSS_SELECTOR,value='meta[name="description"]').get_attribute('content')

    except TimeoutException:
        print("页面加载超时")
        html = ''
        title, description='',''


    html=driver.page_source
    cssdes='meta[name="description"]::attr(content)'
    des1=getInformation(html,cssdes)
    csstitle='title *::text'
    title1=getInformation(html,csstitle)
    print({
    
    "Title":title1,"Des":des1})

    driver.quit()
    return title,description

def main():
    textPath='/Users/yanghao31/Desktop/test/SeleniumTest/text'
    audioPath='/Users/yanghao31/Desktop/test/SeleniumTest/audio'
    if not os.path.exists(textPath):
        os.makedirs(textPath)
    if not os.path.exists(audioPath):
        os.makedirs(audioPath)

    options = webdriver.ChromeOptions()
    options.add_experimental_option("detach", True)
    options.add_argument(f'user-agent={
      
      UserAgent().random}')
    service = Service(executable_path='/Users/yanghao31/Desktop/software/chromedriver_mac_arm64/chromedriver')
    driver = webdriver.Chrome(service=service)
    print('准备完毕')
    driver.get('https://www.flaru.com/en/soundgasm.net/M4F-audio-nsfw')  # 打开百度
    html = driver.page_source
    print(html)
    a=input()
def GetInformation(html,cssString):
    sel= Selector(text=html)
    items=sel.css(cssString).getall()
    return items

if "__main__" == __name__:

    url='https://www.pinterest.jp/search/pins/?q=natural%20japanese%20makeup&rs=typed'
    html=GetHtml_run(url)
    print(html)
    hrefs='a.Wk9.CCY.S9z.ho-.kVc.xQ4::attr(href)'
    href=GetInformation(html,hrefs)
    print(href)

猜你喜欢