python:验证码识别

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接: https://blog.csdn.net/u013419318/article/details/102545735

1 介绍

本文介绍利用pytesseract中的image_to_string()函数实现验证码识别的方法。在使用该函数之前,需要将图像进行去噪和二值化处理。完整代码如下。

2 代码 

import cv2
import numpy as np
import matplotlib.pylab as plt
import pytesseract
from PIL import Image

# 噪点处理
def interference_point(img_path):
    img_source = cv2.imread(img_path)

    h, w = img_source.shape[:2]
    # 灰值化
    img_source = cv2.cvtColor(img_source, cv2.COLOR_BGR2GRAY)
    # 遍历像素点进行处理
    for y in range(0, w):
        for x in range(0, h):
            # 去掉边框上的点
            if y == 0 or y == w - 1 or x == 0 or x == h - 1:
                img_source[x, y] = 255
                continue
            count = 0
            if img_source[x, y - 1] == 255:
                count += 1
            if img_source[x, y + 1] == 255:
                count += 1
            if img_source[x - 1, y] == 255:
                count += 1
            if img_source[x + 1, y] == 255:
                count += 1
            if count > 2:
                img_source[x, y] = 255
    return img_source

def binary(image):
    # 二值化
    ret,result = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_TRIANGLE)
    return result

def pixel_invert(image):
    result = []
    for i in range(0,len(image)):
        line = []
        for j in range(0,len(image[0])):
            if image[i][j] > 200:
                line.append(0)
            else:
                line.append(255)
        result.append(line)
    return np.array(result)

def image_binary(image_path):
    img = interference_point(image_path)
    img_binary = binary(img)
    invert = pixel_invert(img_binary)
    num = pytesseract.image_to_string(invert)
    return num

if __name__ == '__main__':

    num = image_binary('1.jpg')
    # img = cv2.imread('E:/20190701/G/095632/images/cut_0_116.jpg')
    # cv2.imshow('s',img)
    print(num)

3 效果展示

 

识别结果:

 

猜你喜欢

转载自blog.csdn.net/u013419318/article/details/102545735
今日推荐