Then jump form, fill in the account password, the identification code which three have been completed, log in below to start the simulation (explain in advance that this simulation is to sign each have a fixed url in the verification code to download local verification code and verification code requires the same page when opening the job. you can find the src code is open twice a web page, if the picture is the same then the following should be able to log in successfully, and if so under a different presentation How to deal with it)
# Web crawler code synchronization from the Selenium Import webdriver from selenium.common.exceptions Import TimeoutException from selenium.webdriver.support.ui Import WebDriverWait from selenium.webdriver.support Import expected_conditions AS EC from selenium.webdriver.common.keys Import Keys Import Time from the urllib.request Import urlretrieve # below to identify a function from in ShiBie Import recognize Driver =webdriver.Firefox ()
# The following is the agent used in the application detect problems # Profile webdriver.FirefoxProfile = () # profile.set_preference ( 'network.proxy.type',. 1) # profile.set_preference ( 'network.proxy.http' , '127.0.0.1') # profile.set_preference ( 'network.proxy.http_port', 8080) # int # profile.update_preferences () # Driver = webdriver.Firefox (firefox_profile = Profile) driver.get ( " HTTP: // xxxxxxxxx / Home.aspx " ) # jump form driver.switch_to_frame ( ' frm_login ' ) # enter the account password driver.find_element_by_id ( "txt_asmcdefsddsd " ) .send_keys ( " xxxxxxx " ) driver.find_element_by_id ( " txt_asmcdefsddsd " ) .send_keys (Keys.TAB) driver.find_element_by_id ( " txt_pewerwedsdfsdff " ) .send_keys ( " xxxxxxx " ) # Click starting js code in order to get id to not displayed can not find, if this code already displayed on the page the following code is ignored driver.find_element_by_id ( " txt_sdertfgsadscxcadsads " ) .click () JPG = driver.find_element_by_id ( " imgCode " ) # get verification code attribute image = jpg.get_attribute("src") img_name = 'check.jpg' path_img = './easy_code/' + img_name urlretrieve(image, path_img) # 去除验证码中的空格 code = recognize(img_name).replace(" ", "") driver.find_element_by_id("txt_sdertfgsadscxcadsads").send_keys(code) print(code) time.sleep(5) driver.find_element_by_id("btn_login").click()
Detecting a number of the identification codes can be changed into a slightly codes identified, processing functions of this chapter, the following code is attached:
from PIL import Image from pytesseract import * from fnmatch import fnmatch from queue import Queue import matplotlib.pyplot as plt import cv2 import time import os def clear_border(img,img_name): '''去除边框 ''' h, w = img.shape[:2] for y in range(0, w): for x in range(0, h): #Y Y == 0 or IF == Y == W W -1 or - 2: IF Y <. 4 or Y> W -4 : IMG [X, Y] = 255 # IF X or X == H == 0 - or X ==. 1 H - 2: IF X <. 4 or X> H -. 4 : IMG [X, Y] = 255 return IMG DEF interference_line (IMG, img_name): '' ' interference noise line ' '' H , W = img.shape [: 2 ] # ! ! ! opencv matrix point is reversed # IMG [1,2]. 1: height of the image, 2: width of the picture for R & lt in Range (0,2 ): for Y in range(1, w - 1): for x in range(1, h - 1): count = 0 if img[x, y - 1] > 245: count = count + 1 if img[x, y + 1] > 245: count = count + 1 if img[x - 1, y] > 245: count = count + 1 if img[x + 1, y] > 245: count = count + 1 if count > 2: img[x, y] = 255 return img DEF interference_point (IMG, img_name, X = 0, Y = 0): "" " dot noise 9 neighbor block to the current point as the center swastika block, the number of black dots : param X: : param Y: : return: "" " # TODO long-width determines the lower limit image cur_pixel IMG = [X, Y] # current pixel value of the height, width = img.shape [: 2 ] for Y in Range (0, width -. 1 ): for X in Range (0, height -. 1 ): IF Y == 0: # The first line IF X == 0: # top-left corner, four neighbor # next to the center point of the three-point sum = int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x + 1, y]) \ + int(img[x + 1, y + 1]) if sum <= 2 * 245: img[x, y] = 0 elif x == height - 1: # 右上顶点 sum = int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x - 1, y]) \ + int(img[x - 1, y + 1]) if sum <= 2 * 245: IMG [X, Y] = 0 the else : # uppermost non-vertex, six neighborhood SUM = int (IMG [X -. 1 , Y]) \ + int (IMG [X -. 1, Y +. 1 ]) \ + int ( cur_pixel) \ + int (IMG [X, Y +. 1 ]) \ + int (IMG [X +. 1 , Y]) \ + int (IMG [X +. 1, Y +. 1 ]) IF SUM <=. 3 * 245 : IMG [X, Y] = 0 elif Y == width -. 1: # the bottom line IF X == 0: # Lower left apex # center point of the next three points SUM = int (cur_pixel) \ + int (IMG [+. 1 X , Y]) \ + int (IMG [. 1 X +, Y -. 1 ]) \ + int (IMG [ X, Y -. 1 ]) IF SUM <= 245 * 2 : IMG [X, Y] = 0 elif X == height -. 1: # lower right vertex SUM = int (cur_pixel) \ + int (IMG [X, Y -. 1 ]) \ + int (IMG [X -. 1 , Y]) \ + int (IMG [X -. 1, Y -. 1]) if sum <= 2 * 245: img[x, y] = 0 else: # 最下非顶点,6邻域 sum = int(cur_pixel) \ + int(img[x - 1, y]) \ + int(img[x + 1, y]) \ + int(img[x, y - 1]) \ + int(img[x - 1, y - 1]) \ + int(img[x + 1, y - 1]) if sum <= 3 * 245: img[x, y] = 0 else: #y is not the boundary IF X == 0: # left vertex non SUM = int (IMG [X, y -. 1 ]) \ + int (cur_pixel) \ + int (IMG [X, y +. 1 ]) \ + int (IMG [X +. 1, Y -. 1 ]) \ + int (IMG [X +. 1 , Y]) \ + int (IMG [X +. 1, Y +. 1 ]) IF SUM <=. 3 * 245 : IMG [X, Y] = 0 elif X == height -. 1: # the non-vertex to the right SUM = int (IMG [X, Y -. 1 ]) \ + int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x - 1, y - 1]) \ + int(img[x - 1, y]) \ + int(img[x - 1, y + 1]) if sum <= 3 * 245: img[x, y] = 0 else: # 具备9领域条件的 sum = int(img[x - 1, y - 1]) \ + int(img[x - 1, y]) \ + int(img[x - 1, y + 1]) \ + int(img[x, y - 1]) \ + int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x + 1, y - 1]) \ + int(img[x + 1, y]) \ + int(img[x + 1, y + 1]) if sum <= 4 * 245: img[x, y] = 0 return img def _get_dynamic_binary_image(img_name): ''' 自适应阀值二值化 ''' img_name = './easy_code/' + img_name im = cv2.imread(img_name) im = Cv2.cvtColor (IM, cv2.COLOR_BGR2GRAY) ThI = cv2.adaptiveThreshold (IM, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21 is,. 1 ) return ThI DEF Recognize (img_name): IF the fnmatch (img_name, ' * .jpg ' ): # adaptive binarization threshold IM = _get_dynamic_binary_image (img_name) # remove border IM = clear_border (IM, img_name) # of line noise interference image IM = interference_line (IM, img_name) # of noise picture point im = interference_point(im,img_name) # 识别验证码 str_img = pytesseract.image_to_string(im, lang='mob') code = str_img.encode("GBK","ignore").decode('GBK') return code
The test I did not succeed, because the code is not the problem, nor is the verification code recognition rate problem is that the aforementioned code downloaded to the local and displayed on a web page is not the same code, through the analysis of packet capture page, verify different time code only once passed over, let your friends recovery into a picture of the code, and code is found on the same page, and then open a web site and through the verification code and restore into a picture, then find two pictures, so I guess, in the picture Download urltetrieve function, it will open a web site again, download thus leading to different authentication code and verification code to download the page display problems. So noted previously this only if for each code has a fixed src possible (though this is rare, ha ha).