Python: Super simple character segmentation algorithm (license plate recognition, instrument recognition, etc.)

background

In problems such as license plate recognition and digital instrument recognition, the most critical thing is to separate individual characters and then identify them separately, as shown in the figure below. I just happened to use it recently, so I wrote a simple algorithm for character segmentation to record it.

Insert image description here

Image preprocessing

The color image is binarized to reduce the number of parameters, and then corroded and expanded to remove noise.

image = cv2.imread('F://demo.jpg', 0)  # 读取为灰度图
_, image = cv2.threshold(image, 50, 255, cv2.THRESH_BINARY)  # 二值化
kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))  # 腐蚀膨胀核
kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))  # 腐蚀膨胀核
image = cv2.erode(image, kernel=kernel1)  # 腐蚀
image = cv2.dilate(image, kernel=kernel2)  # 膨胀

Insert image description here

Determine character area

Considering the most ideal situation, the characters in the picture are straight and not tilted or distorted. Add the columns and rows of the pixel grayscale matrices respectively, and then the interval wrapped from the first non-0 element index to the last non-0 element index in the obtained column sum, row sum array is the character area.

h, w = image.shape  # 原图的高和宽
list1 = []  # 列和
list2 = []  # 行和

for i in range(w):
    list1.append(1 if image[:, i].sum() != 0 else 0)  # 列求和,不为0置1
for i in range(h):
    list2.append(1 if image[i, :].sum() != 0 else 0)  # 行求和,不为0置1

# 裁剪字符区域
# 求行的范围
flag = 0
for i, e in enumerate(list1):
    if e != 0:
        if flag == 0:  # 第一个不为0的位置记录
            start_w = i
            flag = 1
        else:  # 最后一个不为0的位置
            end_w = i
# 求列的范围
flag = 0
for i, e in enumerate(list2):
    if e != 0:
        if flag == 0:  # 第一个不为0的位置记录
            start_h = i
            flag = 1
        else:  # 最后一个不为0的位置
            end_h = i

print(start_w, end_w)  # 行索引范围
print(start_h, end_h)  # 列索引范围

Insert image description here

Split a single character

In the same way as dividing all character areas, the range of non-zero element indexes in rows and arrays is the area of ​​a single character.

l = ([i for i, e in enumerate(list1) if e != 0])  # 列和列表中不为0的索引
img_list = []  # 分割数字图片存储列表
temp = []  # 存储某一个数字的所有行索引值
n = 0  # 数字图片数量

for x in l:
    temp.append(x)
    if x+1 not in l:  # 索引不连续的情况
        if len(temp) != 1:
            start_w = min(temp)  # 索引最小值
            end_w = max(temp)  # 索引最大值
            img_list.append(image[start_h:end_h, start_w:end_w])  # 对该索引包括数字切片
            n += 1
        temp = []

print(n)  # 字符数

Insert image description here

Complete source code

import cv2

start_h, end_h, start_w, end_w = 0, 0, 0, 0  # 字符区域的高和宽起止

image = cv2.imread('F://001_1.jpg', 0)  # 直接读取为灰度图
cv2.imshow('img_GRAY', image)

_, image = cv2.threshold(image, 50, 255, cv2.THRESH_BINARY)  # 二值化
cv2.imshow('img_BINARY', image)

# 去噪点
kernel1 = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))  # 简单腐蚀膨胀核
kernel2 = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))  # 简单腐蚀膨胀核
image = cv2.erode(image, kernel=kernel1)  # 腐蚀
image = cv2.dilate(image, kernel=kernel2)  # 膨胀
cv2.imshow('img_denoise', image)


h, w = image.shape  # 原图的高和宽
# print(h, w)

list1 = []  # 列和
list2 = []  # 行和

for i in range(w):
    list1.append(1 if image[:, i].sum() != 0 else 0)  # 列求和,不为0置1
for i in range(h):
    list2.append(1 if image[i, :].sum() != 0 else 0)  # 行求和,不为0置1

# print(len(list1))
# print(len(list2))

# 裁剪字符区域
# 求行的范围
flag = 0
for i, e in enumerate(list1):
    if e != 0:
        if flag == 0:  # 第一个不为0的位置记录
            start_w = i
            flag = 1
        else:  # 最后一个不为0的位置
            end_w = i
# 求列的范围
flag = 0
for i, e in enumerate(list2):
    if e != 0:
        if flag == 0:  # 第一个不为0的位置记录
            start_h = i
            flag = 1
        else:  # 最后一个不为0的位置
            end_h = i

print(start_w, end_w)  # 行索引范围
print(start_h, end_h)  # 列索引范围

cv2.imshow('img_number', image[start_h:end_h, start_w:end_w])

l = ([i for i, e in enumerate(list1) if e != 0])  # 列和列表中不为0的索引
# print(l)

img_list = []  # 分割数字图片存储列表

temp = []  # 存储某一个数字的所有行索引值
n = 0  # 数字图片数量
for x in l:
    temp.append(x)
    if x+1 not in l:  # 索引不连续的情况
        if len(temp) != 1:
            start_w = min(temp)  # 索引最小值
            end_w = max(temp)  # 索引最大值
            img_list.append(image[start_h:end_h, start_w:end_w])  # 对该索引包括数字切片
            n += 1
            # print(temp)
        temp = []

print(n)  # 字符数

for i in range(n):  # 显示保存字符
    cv2.imshow('number'+str(i), img_list[i])
    cv2.imwrite('F://demo'+str(i+1).zfill(2)+'.jpg', img_list[i])

cv2.waitKey(0)

Conclusion

The method of simply dividing characters by adding columns and rows is not suitable for more complex segmentation requirements. In addition, the decimal point segmentation problem is not considered in the algorithm. It is only used as a learning reference. Welcome to discuss and exchange any questions.

Guess you like

Origin blog.csdn.net/Wenyuanbo/article/details/120432356