Clean-up pictures, the picture is binarized, to the border, to interfere with the line, to the point
from PIL import Image from pytesseract import * from fnmatch import fnmatch from queue import Queue import matplotlib.pyplot as plt import cv2 import time import os def clear_border(img,img_name): '''去除边框 ''' h, w = img.shape[:2] for y in range(0, w): for x in range(0, h): #Y Y == 0 or IF == Y == W W -1 or - 2: IF Y <. 4 or Y> W -4 : IMG [X, Y] = 255 # IF X or X == H == 0 - or X ==. 1 H - 2: IF X <. 4 or X> H -. 4 : IMG [X, Y] = 255 return IMG DEF interference_line (IMG, img_name): '' ' interference noise line ' '' H , W = img.shape [: 2 ] # ! ! ! opencv matrix point is reversed # IMG [1,2]. 1: height of the image, 2: width of the picture for R & lt in Range (0,2 ): for Y in range(1, w - 1): for x in range(1, h - 1): count = 0 if img[x, y - 1] > 245: count = count + 1 if img[x, y + 1] > 245: count = count + 1 if img[x - 1, y] > 245: count = count + 1 if img[x + 1, y] > 245: count = count + 1 if count > 2: img[x, y] = 255 return img DEF interference_point (IMG, img_name, X = 0, Y = 0): "" " dot noise 9 neighbor block to the current point as the center swastika block, the number of black dots : param X: : param Y: : return: "" " # TODO long-width determines the lower limit image cur_pixel IMG = [X, Y] # current pixel value of the height, width = img.shape [: 2 ] for Y in Range (0, width -. 1 ): for X in Range (0, height -. 1 ): IF Y == 0: # The first line IF X == 0: # top-left corner, four neighbor # next to the center point of the three-point sum = int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x + 1, y]) \ + int(img[x + 1, y + 1]) if sum <= 2 * 245: img[x, y] = 0 elif x == height - 1: # 右上顶点 sum = int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x - 1, y]) \ + int(img[x - 1, y + 1]) if sum <= 2 * 245: IMG [X, Y] = 0 the else : # uppermost non-vertex, six neighborhood SUM = int (IMG [X -. 1 , Y]) \ + int (IMG [X -. 1, Y +. 1 ]) \ + int ( cur_pixel) \ + int (IMG [X, Y +. 1 ]) \ + int (IMG [X +. 1 , Y]) \ + int (IMG [X +. 1, Y +. 1 ]) IF SUM <=. 3 * 245 : IMG [X, Y] = 0 elif Y == width -. 1: # the bottom line IF X == 0: # Lower left apex # center point of the next three points SUM = int (cur_pixel) \ + int (IMG [+. 1 X , Y]) \ + int (IMG [. 1 X +, Y -. 1 ]) \ + int (IMG [ X, Y -. 1 ]) IF SUM <= 245 * 2 : IMG [X, Y] = 0 elif X == height -. 1: # lower right vertex SUM = int (cur_pixel) \ + int (IMG [X, Y -. 1 ]) \ + int (IMG [X -. 1 , Y]) \ + int (IMG [X -. 1, Y -. 1]) if sum <= 2 * 245: img[x, y] = 0 else: # 最下非顶点,6邻域 sum = int(cur_pixel) \ + int(img[x - 1, y]) \ + int(img[x + 1, y]) \ + int(img[x, y - 1]) \ + int(img[x - 1, y - 1]) \ + int(img[x + 1, y - 1]) if sum <= 3 * 245: img[x, y] = 0 else: #y is not the boundary IF X == 0: # left vertex non SUM = int (IMG [X, y -. 1 ]) \ + int (cur_pixel) \ + int (IMG [X, y +. 1 ]) \ + int (IMG [X +. 1, Y -. 1 ]) \ + int (IMG [X +. 1 , Y]) \ + int (IMG [X +. 1, Y +. 1 ]) IF SUM <=. 3 * 245 : IMG [X, Y] = 0 elif X == height -. 1: # the non-vertex to the right SUM = int (IMG [X, Y -. 1 ]) \ + int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x - 1, y - 1]) \ + int(img[x - 1, y]) \ + int(img[x - 1, y + 1]) if sum <= 3 * 245: img[x, y] = 0 else: # 具备9领域条件的 sum = int(img[x - 1, y - 1]) \ + int(img[x - 1, y]) \ + int(img[x - 1, y + 1]) \ + int(img[x, y - 1]) \ + int(cur_pixel) \ + int(img[x, y + 1]) \ + int(img[x + 1, y - 1]) \ + int(img[x + 1, y]) \ + int(img[x + 1, y + 1]) if sum <= 4 * 245: img[x, y] = 0 return img def _get_dynamic_binary_image(filedir,img_name): ''' 自适应阀值二值化 ''' filename = './easy_code/' + img_name.split('.')[0] + '-binary.jpg' img_name = filedir + '/' + img_name im = cv2.imread(img_name) im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) th1 = cv2.adaptiveThreshold(im, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 1) return th1 def recognize():
filedir = './images' #验证码路径 for file in os.listdir(filedir): if fnmatch(file, '*.jpg'): img_name =File # adaptive binarization threshold IM = _get_dynamic_binary_image (filedir, img_name) # remove border IM = clear_border (IM, img_name) # of line noise interference image IM = interference_line (IM, img_name) # of noise picture point = IM interference_point (IM, img_name) filename = ' ./easy_code/ ' + img_name.split ( ' . ' ) [0] + ' -interferencePoint.jpg ' #easy_code to save path cv2.imwrite (filename, im) # save pictures recognize ()
The above code changes from "python identification codes" that there is a detailed description with the boss Dian fish rough identification codes for big brother.
Attached link: https: //www.cnblogs.com/qqandfqr/p/7866650.html