Python——Using the automation module for sliding verification, a package that ichrome continues to update.

There are already many articles introducing selenium on the Internet. This article is a continuously updated package, and not many people use it for the time being—ichrome.

pip install ichrome  

1. Requirements: Solve the Ali sliding verification problem.

insert image description here

It is very troublesome to configure the liunx system when using selenum to access, and the injection of JS cannot pass the verification.

2. Not much to say about the code, start asynchronously:

asyncio.run(get_detail_anj()) 

3. pyautogui - perform sliding operation (written separately, need to be called multiple times):

pip install pyautogui  
# 因为需要代理,但是不知道怎么加,就只能这样加上去了。teb是启动的浏览器,循环最多十次跳出。
async def hdyz(tab):
    n = 0
    while True:
        title = await tab.title
        if title == '':
            pyautogui.moveTo(860, 230)
            pyautogui.click()
            pyautogui.typewrite('账号')
            time.sleep(1)
            pyautogui.press('tab')
            pyautogui.typewrite('密码')
            pyautogui.press('Enter')
            time.sleep(5)
        elif "滑动验证页面" in title:
            pyautogui.press('F5')
            pyautogui.PAUSE = 1  # 调用在执行动作后暂停的秒数,只能在执行一些pyautogui动作后才能使用,建议用time.sleep
            pyautogui.FAILSAFE = False
            pyautogui.moveTo(880, 430, duration=0.5)  # 定位元素 可以将x,y写入,duration 是移动时间
            pyautogui.dragTo(random.randint(1190, 1220), random.randint(420, 440), duration=0.5, button='left')

        elif "滑动验证页面" not in title:
            time.sleep(10)
            html = await tab.get_html(timeout=5)	#给出html
            break
        elif '无法访问' in title or '无法连接':
            break
        n += 1
        if n == 10:
            break
    return html

Fourth, start the browser and output part:

async def get_detail_anj():

	headers = {
    
    
	    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/8"
	                  "5.0.4183.102 Safari/537.36",
	}
	n = 1
	while True:
        try:
			url = 'baidu.com'
			proxyurl = f"{
      
      '127.0.0.1':8000}" 	#代理
			#headless为是否显示浏览器
		    async with AsyncChromeDaemon(headless=False, user_agent=headers, proxy=proxyurl) as cd:	 
		       async with cd.connect_tab(index=0) as tab:
		           for page in range(n, 1000):
		               url = url+page 	#页数
		               await tab.goto(requrl, timeout=5) 		#浏览器访问网页
					   html = await hdyz(tab)		#调用滑动取出html
					   r = etree.HTML(html)
					   cookies = await tab.get_cookies() 		#还可以取出全部cookies
					   if not in html:			#设置个xpath页数位了跳出
					       break
		 except:
            # print('访问错误重新访问', url)
            n = page
		

Fifth, integrate the following code:

import random
import asyncio
import re
import time
import pyautogui
import redis
import requests
from ichrome import AsyncChromeDaemon
from lxml import etree

async def hdyz(tab):
    n = 0
    while True:
        title = await tab.title
        if title == '':
            pyautogui.moveTo(860, 230)
            pyautogui.click()
            pyautogui.typewrite('账号')
            time.sleep(1)
            pyautogui.press('tab')
            pyautogui.typewrite('密码')
            pyautogui.press('Enter')
            time.sleep(5)
        elif "滑动验证页面" in title:
            pyautogui.press('F5')
            pyautogui.PAUSE = 1  # 调用在执行动作后暂停的秒数,只能在执行一些pyautogui动作后才能使用,建议用time.sleep
            pyautogui.FAILSAFE = False
            pyautogui.moveTo(880, 430, duration=0.5)  # 定位元素 可以将x,y写入,duration 是移动时间
            pyautogui.dragTo(random.randint(1190, 1220), random.randint(420, 440), duration=0.5, button='left')

        elif "滑动验证页面" not in title:
            time.sleep(10)
            html = await tab.get_html(timeout=5)	#给出html
            break
        elif '无法访问' in title or '无法连接':
            break
        n += 1
        if n == 10:
            break
    return html
    
async def get_detail_anj():

	headers = {
    
    
	    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/8"
	                  "5.0.4183.102 Safari/537.36",
	}
	n = 1
	# ip 或者其他出问题可以重新访问这一页
	while True:
        try:
			url = 'baidu.com'
			proxyurl = f"{
      
      '127.0.0.1':8000}" 	#代理
			#headless为是否显示浏览器
		    async with AsyncChromeDaemon(headless=False, user_agent=headers, proxy=proxyurl) as cd:	 
		       async with cd.connect_tab(index=0) as tab:
		           for page in range(n, 1000):
		               url = url+page 	#页数
		               await tab.goto(requrl, timeout=5) 		#浏览器访问网页
					   html = await hdyz(tab)		#调用滑动取出html
					   r = etree.HTML(html)
					   pg_list = r.xpath("//span[@class='wel']/text()")
					   cookies = await tab.get_cookies() 		#还可以取出全部cookies
					   if page == int(pg_list[-1]):			#设置个xpath页数位了跳出
					       break
		 except:
            # print('访问错误重新访问', url)
            n = page
if __name__ == "__main__":
    asyncio.run(get_detail_anj()) 
		

The code needs short-answer modification and cannot be used directly.

おすすめ

転載: blog.csdn.net/weixin_45195493/article/details/127865269