Write in front:
In fact, this program still has a lot to be improved and improved, and it will be improved later. Please bear with me
Overview
-
Determine the position of the slider by comparing the pixels of the complete picture with the picture without the slider
-
Edge detection algorithm to determine location
-
Avoid detection, simulate human behavior and slide the slider
achieve
-Here is an example with brushing the net to show the effect of the verification code sliding
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2021/1/2 18:34 # @Author: huni # @File: Verification code 2.py # @Software: PyCharm from selenium import webdriver import time import base64 from PIL import Image from io import BytesIO from selenium.webdriver.support.ui import WebDriverWait import random import copy VeriImageUtil class (); def __init__(self): self.defaultConfig = { "grayOffset": 20, "opaque": 1, "minVerticalLineCount": 30 } self.config = copy.deepcopy(self.defaultConfig) def updateConfig(self, config): # temp = copy.deepcopy(config) for k in self.config: if k in config.keys(): self.config[k] = config[k] def getMaxOffset(self, *args): # Calculate the largest number of offset averages av = sum (args) / len (args) maxOffset = 0 for a in args: offset = abs (av - a) if offset > maxOffset: maxOffset = offset return maxOffset def isGrayPx(self, r, g, b): # Whether it is a gray-scale pixel point, allow fluctuation offset return self.getMaxOffset(r, g, b) < self.config["grayOffset"] def isDarkStyle(self, r, g, b): # Gray style return r < 128 and g < 128 and b < 128 def isOpaque(self, px): # Opaque return px[3] >= 255 * self.config["opaque"] def getVerticalLineOffsetX(self, bgImage): # bgImage = Image.open("./image/bg.png") # bgImage.im.mode = 'RGBA' bgBytes = bgImage.load() x = 0 while x < bgImage.size[0]: y = 0 # 点》》Line, the number of gray lines verticalLineCount = 0 while y < bgImage.size[1]: px = bgBytes [x, y] r = px [0] g = px[1] b = px[2] # alph = px[3] # print(px) if self.isDarkStyle(r, g, b) and self.isGrayPx(r, g, b) and self.isOpaque(px): verticalLineCount += 1 else: verticalLineCount = 0 y + = 1 continue if verticalLineCount >= self.config["minVerticalLineCount"]: # Consecutive pixels are grayscale pixels, straight lines, think that it needs to slide so much # print(x, y) return x y + = 1 x += 1 pass class DragUtil(): def __init__(self, driver): self.driver = driver def __getRadomPauseScondes(self): """ :return: random drag pause time """ return random.uniform(0.6, 0.9) def simulateDragX(self, source, targetOffsetX): """ Imitate human dragging action: drag quickly along the X axis (there is an error), then pause, and then correct the error Prevent verification failures such as "pictures were eaten by monsters" from being detected as robots :param source: the html element to be dragged :param targetOffsetX: drag target x-axis distance :return: None """ action_chains = webdriver.ActionChains(self.driver) # Click, ready to drag action_chains.click_and_hold(source) # Drag times, two to three times dragCount = random.randint(2, 3) if dragCount == 2: # Total error value sumOffsetx = random.randint(-15, 15) action_chains.move_by_offset(targetOffsetX + sumOffsetx, 0) # Pause for a while action_chains.pause(self.__getRadomPauseScondes()) # Correct the error to prevent the verification from being detected as a robot and the image being eaten by a monster. action_chains.move_by_offset(-sumOffsetx, 0) elif dragCount == 3: # Total error value sumOffsetx = random.randint(-15, 15) action_chains.move_by_offset(targetOffsetX + sumOffsetx, 0) # Pause for a while action_chains.pause(self.__getRadomPauseScondes()) # Corrected error sum fixedOffsetX = 0 # First correction error if sumOffsetx < 0: offsetx = random.randint(sumOffsetx, 0) else: offsetx = random.randint(0, sumOffsetx) fixedOffsetX = fixedOffsetX + offsetx action_chains.move_by_offset(-offsetx, 0) action_chains.pause(self.__getRadomPauseScondes()) # Last correction error action_chains.move_by_offset(-sumOffsetx + fixedOffsetX, 0) action_chains.pause(self.__getRadomPauseScondes()) else: raise Exception("Is there a problem with the system?!") # 参考action_chains.drag_and_drop_by_offset() action_chains.release() action_chains.perform() def simpleSimulateDragX(self, source, targetOffsetX): """ Simple drag to imitate human drag: quickly drag along the X axis to reach the correct position in one step, pause for a while, and then release the drag action Station B distinguishes between human and machine based on whether there is a pause time. This method is applicable. :param source: :param targetOffsetX: :return: None """ action_chains = webdriver.ActionChains(self.driver) # Click, ready to drag action_chains.click_and_hold(source) action_chains.pause(0.2) action_chains.move_by_offset(targetOffsetX, 0) action_chains.pause(0.6) action_chains.release() action_chains.perform() def checkVeriImage(driver): WebDriverWait(driver, 5).until( lambda driver: driver.find_element_by_css_selector('.geetest_canvas_bg.geetest_absolute')) time.sleep(1) im_info = driver.execute_script( 'return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png");') # Get base64 encoded picture information im_base64 = im_info.split (',') [1] # Convert to bytes type im_bytes = base64.b64decode(im_base64) with open('./temp_bg.png', 'wb') as f: # Save the picture locally f.write(im_bytes) image_data = BytesIO(im_bytes) bgImage = Image.open(image_data) # The slider is 5 pixels away from the left offsetX = VeriImageUtil().getVerticalLineOffsetX(bgImage) print("offsetX: {}".format(offsetX)) if not type(offsetX) == int: # Can't calculate, reload driver.find_element_by_css_selector(".geetest_refresh_1").click() checkVeriImage(driver) return elif offsetX == 0: # Can't calculate, reload driver.find_element_by_css_selector(".geetest_refresh_1").click() checkVeriImage(driver) return else: dragVeriImage(driver, offsetX) def dragVeriImage(driver, offsetX): # It is possible to detect the right edge # Drag and drop eleDrag = driver.find_element_by_css_selector(".geetest_slider_button") dragUtil = DragUtil (driver) dragUtil.simulateDragX(eleDrag, offsetX - 10) time.sleep(2.5) if isNeedCheckVeriImage(driver): checkVeriImage(driver) return dragUtil.simulateDragX(eleDrag, offsetX - 6) time.sleep(2.5) if isNeedCheckVeriImage(driver): checkVeriImage(driver) return # Slider width is about 40 dragUtil.simulateDragX(eleDrag, offsetX - 56) time.sleep(2.5) if isNeedCheckVeriImage(driver): checkVeriImage(driver) return dragUtil.simulateDragX(eleDrag, offsetX - 52) if isNeedCheckVeriImage(driver): checkVeriImage(driver) return def isNeedCheckVeriImage(driver): if driver.find_element_by_css_selector(".geetest_panel_error").is_displayed(): driver.find_element_by_css_selector(".geetest_panel_error_content").click(); return True return False def task(): # This step is very important. Set chrome to developer mode to prevent Selenium from being recognized by major websites # options = webdriver.ChromeOptions() # options.add_experimental_option('excludeSwitches', ['enable-automation']) # driver = webdriver.Firefox(executable_path=r"../../../res/webdriver/geckodriver_x64_0.26.0.exe",options=options) driver = webdriver.Chrome() driver.get('https://www.ieqq.net/?cid=222&tid=5584') time.sleep(3) # driver.find_element_by_xpath('//*[@id="gt-register-mobile"]/div/div[2]/div[1]/div[2]/div/div[2]/div[' # '1]/input').send_keys("17633935269") # driver.find_element_by_xpath('//*[@id="gt-register-mobile"]/div/div[2]/div[1]/div[2]/div/div[2]/div[2]/div[' # '1]/div').click() # driver.find_element_by_css_selector(".btn.btn-login").click() # time.sleep(2) # Search bar label positioning search_input = driver.find_element_by_xpath('//*[@id="inputvalue"]') time.sleep(3) # Tag interaction search_input.send_keys('xxxxxx') # Execute a set of js programs driver.execute_script('window.scrollTo(0,document.body.scrollHeight)') time.sleep(2) # Search button positioning btn = driver.find_element_by_xpath('//*[@id="submit_buy"]') # Click the search button btn.click() time.sleep(6) driver.find_element_by_xpath('//*[@id="captcha"]/div[3]/div[3]').click() time.sleep(3) checkVeriImage(driver) pass # This method is used to confirm whether the element exists, if it exists, it returns flag=true, otherwise it returns false def isElementExist(driver, css): try: driver.find_element_by_css_selector(css) return True except: return False if __name__ == '__main__': task()
Write at the back
Although the verification code cracking can solve the login crawler problem to a certain extent, But the recognition rate cannot reach 100% recognition, so it is recommended to log in The crawler program that can proceed can use cookies to simulate login, You only need to log in for the first time to manually identify the login verification code, or scan the QR code, and you can use it for a period of time. Of course, each has its pros and cons. Cookies will also become invalid after a period of time. This and the verification code are operations of different opinions.
For details on the verification code, please refer to https://blog.csdn.net/weixin_43881394/article/details/108360729
postscript
Recently, many friends consulted about Python learning issues through private messages. To facilitate communication, click on the blue to join the discussion and answer resource base by yourself