Note Python crawler clean codes {a user to access the simulated] (4)

Clean-up pictures, the picture is binarized, to the border, to interfere with the line, to the point

from PIL import Image
from pytesseract import *
from fnmatch import fnmatch
from queue import Queue
import matplotlib.pyplot as plt
import cv2
import time
import os


def clear_border(img,img_name):
  '''去除边框
  '''

  h, w = img.shape[:2]
  for y in range(0, w):
    for x in range(0, h):
      #Y Y == 0 or IF == Y == W W -1 or - 2: 
      IF Y <. 4 or Y> W -4 : 
        IMG [X, Y] = 255
       # IF X or X == H == 0 - or X ==. 1 H - 2: 
      IF X <. 4 or X> H -. 4 : 
        IMG [X, Y] = 255 return IMG DEF interference_line (IMG, img_name):
   '' ' 
  interference noise line ' '' 
  H , W = img.shape [: 2 ]
   # ! ! ! opencv matrix point is reversed # IMG [1,2]. 1: height of the image, 2: width of the picture for R & lt in Range (0,2 ):
     for Y in


  


  

  
   range(1, w - 1):
      for x in range(1, h - 1):
        count = 0
        if img[x, y - 1] > 245:
          count = count + 1
        if img[x, y + 1] > 245:
          count = count + 1
        if img[x - 1, y] > 245:
          count = count + 1
        if img[x + 1, y] > 245:
          count = count + 1
        if count > 2:
          img[x, y] = 255

  return img

DEF interference_point (IMG, img_name, X = 0, Y = 0):
     "" " dot noise 
    9 neighbor block to the current point as the center swastika block, the number of black dots 
    : param X: 
    : param Y: 
    : return: 
    "" " 
    # TODO long-width determines the lower limit image 
    cur_pixel IMG = [X, Y] # current pixel value of the 
    height, width = img.shape [: 2 ] 

    for Y in Range (0, width -. 1 ):
       for X in Range (0, height -. 1 ):
         IF Y == 0:   # The first line 
            IF X == 0:   # top-left corner, four neighbor 
                # next to the center point of the three-point 
                sum = int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x + 1, y]) \
                      + int(img[x + 1, y + 1])
                if sum <= 2 * 245:
                  img[x, y] = 0
            elif x == height - 1:  # 右上顶点
                sum = int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x - 1, y]) \
                      + int(img[x - 1, y + 1])
                if sum <= 2 * 245:
                  IMG [X, Y] = 0
             the else :   # uppermost non-vertex, six neighborhood 
                SUM = int (IMG [X -. 1 , Y]) \
                       + int (IMG [X -. 1, Y +. 1 ]) \
                       + int ( cur_pixel) \
                       + int (IMG [X, Y +. 1 ]) \
                       + int (IMG [X +. 1 , Y]) \
                       + int (IMG [X +. 1, Y +. 1 ])
                 IF SUM <=. 3 * 245 : 
                  IMG [X, Y] = 0
         elif Y == width -. 1:   # the bottom line 
            IF X == 0:  # Lower left apex 
                # center point of the next three points 
                SUM = int (cur_pixel) \
                       + int (IMG [+. 1 X , Y]) \
                       + int (IMG [. 1 X +, Y -. 1 ]) \
                       + int (IMG [ X, Y -. 1 ])
                 IF SUM <= 245 * 2 : 
                  IMG [X, Y] = 0
             elif X == height -. 1:   # lower right vertex 
                SUM = int (cur_pixel) \
                       + int (IMG [X, Y -. 1 ]) \
                       + int (IMG [X -. 1 , Y]) \
                       + int (IMG [X -. 1, Y -. 1])

                if sum <= 2 * 245:
                  img[x, y] = 0
            else:  # 最下非顶点,6邻域
                sum = int(cur_pixel) \
                      + int(img[x - 1, y]) \
                      + int(img[x + 1, y]) \
                      + int(img[x, y - 1]) \
                      + int(img[x - 1, y - 1]) \
                      + int(img[x + 1, y - 1])
                if sum <= 3 * 245:
                  img[x, y] = 0
        else:  #y is not the boundary 
            IF X == 0:   # left vertex non 
                SUM = int (IMG [X, y -. 1 ]) \
                       + int (cur_pixel) \
                       + int (IMG [X, y +. 1 ]) \
                       + int (IMG [X +. 1, Y -. 1 ]) \
                       + int (IMG [X +. 1 , Y]) \
                       + int (IMG [X +. 1, Y +. 1 ]) 

                IF SUM <=. 3 * 245 : 
                  IMG [X, Y] = 0
             elif X == height -. 1:   # the non-vertex to the right 
                SUM = int (IMG [X, Y -. 1 ]) \
                       + int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x - 1, y - 1]) \
                      + int(img[x - 1, y]) \
                      + int(img[x - 1, y + 1])

                if sum <= 3 * 245:
                  img[x, y] = 0
            else:  # 具备9领域条件的
                sum = int(img[x - 1, y - 1]) \
                      + int(img[x - 1, y]) \
                      + int(img[x - 1, y + 1]) \
                      + int(img[x, y - 1]) \
                      + int(cur_pixel) \
                      + int(img[x, y + 1]) \
                      + int(img[x + 1, y - 1]) \
                      + int(img[x + 1, y]) \
                      + int(img[x + 1, y + 1])
                if sum <= 4 * 245:
                  img[x, y] = 0
              
    return img

def _get_dynamic_binary_image(filedir,img_name):
  '''
  自适应阀值二值化
  '''
  filename =   './easy_code/' + img_name.split('.')[0] + '-binary.jpg'
  img_name = filedir + '/' + img_name
  im = cv2.imread(img_name)
  im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

  th1 = cv2.adaptiveThreshold(im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 1)

  return th1

def recognize():  
filedir
= './images'  #验证码路径 for file in os.listdir(filedir): if fnmatch(file, '*.jpg'): img_name =File # adaptive binarization threshold IM = _get_dynamic_binary_image (filedir, img_name) # remove border IM = clear_border (IM, img_name) # of line noise interference image IM = interference_line (IM, img_name) # of noise picture point = IM interference_point (IM, img_name) filename = ' ./easy_code/ ' + img_name.split ( ' . ' ) [0] + ' -interferencePoint.jpg ' #easy_code to save path cv2.imwrite (filename, im) # save pictures recognize ()

The above code changes from "python identification codes" that there is a detailed description with the boss Dian fish rough identification codes for big brother.

Attached link: https: //www.cnblogs.com/qqandfqr/p/7866650.html

Guess you like

Origin www.cnblogs.com/dfy-blog/p/11563331.html