Python Reptile notes [a] user to access the simulated users log in - first (6)

Then jump form, fill in the account password, the identification code which three have been completed, log in below to start the simulation (explain in advance that this simulation is to sign each have a fixed url in the verification code to download local verification code and verification code requires the same page when opening the job. you can find the src code is open twice a web page, if the picture is the same then the following should be able to log in successfully, and if so under a different presentation How to deal with it)

# Web crawler code synchronization 
from the Selenium Import webdriver
 from selenium.common.exceptions Import TimeoutException
 from selenium.webdriver.support.ui Import WebDriverWait
 from selenium.webdriver.support Import expected_conditions AS EC
 from selenium.webdriver.common.keys Import Keys
 Import Time
 from the urllib.request Import urlretrieve
 # below to identify a function 
from in ShiBie Import recognize 

Driver =webdriver.Firefox () 
# The following is the agent used in the application detect problems
# Profile webdriver.FirefoxProfile = () # profile.set_preference ( 'network.proxy.type',. 1) # profile.set_preference ( 'network.proxy.http' , '127.0.0.1') # profile.set_preference ( 'network.proxy.http_port', 8080) # int # profile.update_preferences () # Driver = webdriver.Firefox (firefox_profile = Profile) driver.get ( " HTTP: // xxxxxxxxx / Home.aspx " ) # jump form driver.switch_to_frame ( ' frm_login ' ) # enter the account password driver.find_element_by_id ( "txt_asmcdefsddsd " ) .send_keys ( " xxxxxxx " ) driver.find_element_by_id ( " txt_asmcdefsddsd " ) .send_keys (Keys.TAB) driver.find_element_by_id ( " txt_pewerwedsdfsdff " ) .send_keys ( " xxxxxxx " ) # Click starting js code in order to get id to not displayed can not find, if this code already displayed on the page the following code is ignored driver.find_element_by_id ( " txt_sdertfgsadscxcadsads " ) .click () JPG = driver.find_element_by_id ( " imgCode " ) # get verification code attribute image = jpg.get_attribute("src") img_name = 'check.jpg' path_img = './easy_code/' + img_name urlretrieve(image, path_img) # 去除验证码中的空格 code = recognize(img_name).replace(" ", "") driver.find_element_by_id("txt_sdertfgsadscxcadsads").send_keys(code) print(code) time.sleep(5) driver.find_element_by_id("btn_login").click()

Detecting a number of the identification codes can be changed into a slightly codes identified, processing functions of this chapter, the following code is attached:

from PIL import Image
from pytesseract import *
from fnmatch import fnmatch
from queue import Queue
import matplotlib.pyplot as plt
import cv2
import time
import os


def clear_border(img,img_name):
  '''去除边框
  '''

  h, w = img.shape[:2]
  for y in range(0, w):
    for x in range(0, h):
      #Y Y == 0 or IF == Y == W W -1 or - 2: 
      IF Y <. 4 or Y> W -4 : 
        IMG [X, Y] = 255
       # IF X or X == H == 0 - or X ==. 1 H - 2: 
      IF X <. 4 or X> H -. 4 : 
        IMG [X, Y] = 255 return IMG DEF interference_line (IMG, img_name):
   '' ' 
  interference noise line ' '' 
  H , W = img.shape [: 2 ]
   # ! ! ! opencv matrix point is reversed # IMG [1,2]. 1: height of the image, 2: width of the picture for R & lt in Range (0,2 ):
     for Y in


  


  

  
   range(1, w - 1):
      for x in range(1, h - 1):
        count = 0
        if img[x, y - 1] > 245:
          count = count + 1
        if img[x, y + 1] > 245:
          count = count + 1
        if img[x - 1, y] > 245:
          count = count + 1
        if img[x + 1, y] > 245:
          count = count + 1
        if count > 2:
          img[x, y] = 255

  return img

DEF interference_point (IMG, img_name, X = 0, Y = 0):
     "" " dot noise 
    9 neighbor block to the current point as the center swastika block, the number of black dots 
    : param X: 
    : param Y: 
    : return: 
    "" " 
    
    # TODO long-width determines the lower limit image 
    cur_pixel IMG = [X, Y] # current pixel value of the 
    height, width = img.shape [: 2 ] 

    for Y in Range (0, width -. 1 ):
       for X in Range (0, height -. 1 ):
         IF Y == 0:   # The first line 
            IF X == 0:   # top-left corner, four neighbor 
                # next to the center point of the three-point 
                sum = int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x + 1, y]) \
                      + int(img[x + 1, y + 1])
                if sum <= 2 * 245:
                  img[x, y] = 0
            elif x == height - 1:  # 右上顶点
                sum = int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x - 1, y]) \
                      + int(img[x - 1, y + 1])
                if sum <= 2 * 245:
                  IMG [X, Y] = 0
             the else :   # uppermost non-vertex, six neighborhood 
                SUM = int (IMG [X -. 1 , Y]) \
                       + int (IMG [X -. 1, Y +. 1 ]) \
                       + int ( cur_pixel) \
                       + int (IMG [X, Y +. 1 ]) \
                       + int (IMG [X +. 1 , Y]) \
                       + int (IMG [X +. 1, Y +. 1 ])
                 IF SUM <=. 3 * 245 : 
                  IMG [X, Y] = 0
         elif Y == width -. 1:   # the bottom line 
            IF X == 0:  # Lower left apex 
                # center point of the next three points 
                SUM = int (cur_pixel) \
                       + int (IMG [+. 1 X , Y]) \
                       + int (IMG [. 1 X +, Y -. 1 ]) \
                       + int (IMG [ X, Y -. 1 ])
                 IF SUM <= 245 * 2 : 
                  IMG [X, Y] = 0
             elif X == height -. 1:   # lower right vertex 
                SUM = int (cur_pixel) \
                       + int (IMG [X, Y -. 1 ]) \
                       + int (IMG [X -. 1 , Y]) \
                       + int (IMG [X -. 1, Y -. 1])

                if sum <= 2 * 245:
                  img[x, y] = 0
            else:  # 最下非顶点,6邻域
                sum = int(cur_pixel) \
                      + int(img[x - 1, y]) \
                      + int(img[x + 1, y]) \
                      + int(img[x, y - 1]) \
                      + int(img[x - 1, y - 1]) \
                      + int(img[x + 1, y - 1])
                if sum <= 3 * 245:
                  img[x, y] = 0
        else:  #y is not the boundary 
            IF X == 0:   # left vertex non 
                SUM = int (IMG [X, y -. 1 ]) \
                       + int (cur_pixel) \
                       + int (IMG [X, y +. 1 ]) \
                       + int (IMG [X +. 1, Y -. 1 ]) \
                       + int (IMG [X +. 1 , Y]) \
                       + int (IMG [X +. 1, Y +. 1 ]) 

                IF SUM <=. 3 * 245 : 
                  IMG [X, Y] = 0
             elif X == height -. 1:   # the non-vertex to the right 
                SUM = int (IMG [X, Y -. 1 ]) \
                       + int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x - 1, y - 1]) \
                      + int(img[x - 1, y]) \
                      + int(img[x - 1, y + 1])

                if sum <= 3 * 245:
                  img[x, y] = 0
            else:  # 具备9领域条件的
                sum = int(img[x - 1, y - 1]) \
                      + int(img[x - 1, y]) \
                      + int(img[x - 1, y + 1]) \
                      + int(img[x, y - 1]) \
                      + int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x + 1, y - 1]) \
                      + int(img[x + 1, y]) \
                      + int(img[x + 1, y + 1])
                if sum <= 4 * 245:
                  img[x, y] = 0
    return img

def _get_dynamic_binary_image(img_name):
  '''
  自适应阀值二值化
  '''
  img_name = './easy_code/' + img_name
  im = cv2.imread(img_name)
  im = Cv2.cvtColor (IM, cv2.COLOR_BGR2GRAY) 

  ThI = cv2.adaptiveThreshold (IM, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21 is,. 1 ) 

  return ThI 

DEF Recognize (img_name):
     IF the fnmatch (img_name, ' * .jpg ' ):
         # adaptive binarization threshold 
        IM = _get_dynamic_binary_image (img_name)
         # remove border 
        IM = clear_border (IM, img_name)
         # of line noise interference image 
        IM = interference_line (IM, img_name)
         # of noise picture point 
        im = interference_point(im,img_name)
        # 识别验证码

        str_img = pytesseract.image_to_string(im, lang='mob')
        code = str_img.encode("GBK","ignore").decode('GBK')
        return code
View Code

The test I did not succeed, because the code is not the problem, nor is the verification code recognition rate problem is that the aforementioned code downloaded to the local and displayed on a web page is not the same code, through the analysis of packet capture page, verify different time code only once passed over, let your friends recovery into a picture of the code, and code is found on the same page, and then open a web site and through the verification code and restore into a picture, then find two pictures, so I guess, in the picture Download urltetrieve function, it will open a web site again, download thus leading to different authentication code and verification code to download the page display problems. So noted previously this only if for each code has a fixed src possible (though this is rare, ha ha).

Guess you like

Origin www.cnblogs.com/dfy-blog/p/11565992.html