基于python keras 卷积神经网络的报文识别代码

项目要求：利用卷积神经网络来对电报报文进行识别，因为报文数据集较小，需要用到MNIST数据集来作为训练集。

实现步骤：

1. 数据预处理：将报文图片处理成单个数字

2. 网络训练

3. 网络参数微调

4. 分类预测

一. 数据集

如下所示，每个报文由四个数字组成

为了增加识别率，需要对图片进行预处理，使其对机器识别更友好。

针对以上原始报文的处理步骤如下：

读取原始图片素材
将彩色图片二值化为黑白图片
去除背景噪点

from img_tools import get_clear_bin_image, get_crop_imgs,save_crop_imgs

import matplotlib.pyplot as plt
from PIL import Image

image = Image.open('test_image/1.png') #读取图片
image.show()
imgry = image.convert('L')  # 转化为灰度图字符，保存在内存中

bin_clear_img = get_clear_bin_image(imgry)  # 处理获得去噪的二值图

child_img_list = get_crop_imgs(bin_clear_img)  # 切割图片为单个t图片，例如：4位电报报文就可以分割成4个child



for i in range(4):
    new_image = child_img_list[i]  # 提取第i个子图
    
    new_img=new_image.resize((28,28),Image.BILINEAR)   # 图片大小统一调整为28*28
    new_img.show()   #显示图片

图片预处理函数：

import os
from PIL import Image
from cfg import img_path, bin_clear_folder, origin_pic_folder, cut_pic_folder, data_root
from os.path import join


def get_bin_table(threshold=155):  ## 155为自定义像素值，使图片预处理更好的显示数字轮廓
    """
    获取灰度转二值的映射table
    :param threshold:
    :return:
    """
    table = []
    for i in range(256):
        if i < threshold:
            table.append(1)
        else:
            table.append(0)

    return table

def sum_9_region(img, x, y):
    """
    9邻域框,以当前点为中心的田字框,黑点个数,作为移除一些孤立的点的判断依据
    :param img: Image
    :param x:
    :param y:
    :return:
    """
    cur_pixel = img.getpixel((x, y))  # 当前像素点的值
    width = img.width
    height = img.height

    if cur_pixel == 1:  # 如果当前点为白色区域,则不统计邻域值
        return 0

    if y == 0:  # 第一行
        if x == 0:  # 左上顶点,4邻域
            # 中心点旁边3个点
            sum = cur_pixel \
                  + img.getpixel((x, y + 1)) \
                  + img.getpixel((x + 1, y)) \
                  + img.getpixel((x + 1, y + 1))
            return 4 - sum
        elif x == width - 1:  # 右上顶点
            sum = cur_pixel \
                  + img.getpixel((x, y + 1)) \
                  + img.getpixel((x - 1, y)) \
                  + img.getpixel((x - 1, y + 1))

            return 4 - sum
        else:  # 最上非顶点,6邻域
            sum = img.getpixel((x - 1, y)) \
                  + img.getpixel((x - 1, y + 1)) \
                  + cur_pixel \
                  + img.getpixel((x, y + 1)) \
                  + img.getpixel((x + 1, y)) \
                  + img.getpixel((x + 1, y + 1))
            return 6 - sum
    elif y == height - 1:  # 最下面一行
        if x == 0:  # 左下顶点
            # 中心点旁边3个点
            sum = cur_pixel \
                  + img.getpixel((x + 1, y)) \
                  + img.getpixel((x + 1, y - 1)) \
                  + img.getpixel((x, y - 1))
            return 4 - sum
        elif x == width - 1:  # 右下顶点
            sum = cur_pixel \
                  + img.getpixel((x, y - 1)) \
                  + img.getpixel((x - 1, y)) \
                  + img.getpixel((x - 1, y - 1))

            return 4 - sum
        else:  # 最下非顶点,6邻域
            sum = cur_pixel \
                  + img.getpixel((x - 1, y)) \
                  + img.getpixel((x + 1, y)) \
                  + img.getpixel((x, y - 1)) \
                  + img.getpixel((x - 1, y - 1)) \
                  + img.getpixel((x + 1, y - 1))
            return 6 - sum
    else:  # y不在边界
        if x == 0:  # 左边非顶点
            sum = img.getpixel((x, y - 1)) \
                  + cur_pixel \
                  + img.getpixel((x, y + 1)) \
                  + img.getpixel((x + 1, y - 1)) \
                  + img.getpixel((x + 1, y)) \
                  + img.getpixel((x + 1, y + 1))

            return 6 - sum
        elif x == width - 1:  # 右边非顶点
            # print('%s,%s' % (x, y))
            sum = img.getpixel((x, y - 1)) \
                  + cur_pixel \
                  + img.getpixel((x, y + 1)) \
                  + img.getpixel((x - 1, y - 1)) \
                  + img.getpixel((x - 1, y)) \
                  + img.getpixel((x - 1, y + 1))

            return 6 - sum
        else:  # 具备9领域条件的
            sum = img.getpixel((x - 1, y - 1)) \
                  + img.getpixel((x - 1, y)) \
                  + img.getpixel((x - 1, y + 1)) \
                  + img.getpixel((x, y - 1)) \
                  + cur_pixel \
                  + img.getpixel((x, y + 1)) \
                  + img.getpixel((x + 1, y - 1)) \
                  + img.getpixel((x + 1, y)) \
                  + img.getpixel((x + 1, y + 1))
            return 9 - sum


def remove_noise_pixel(img, noise_point_list):
    """
    根据噪点的位置信息，消除二值图片的黑点噪声
    :type img:Image
    :param img:
    :param noise_point_list:
    :return:
    """
    for item in noise_point_list:
        img.putpixel((item[0], item[1]), 1)


def get_clear_bin_image(image):
    """
    获取干净的二值化的图片。
    图像的预处理：
    1. 先转化为灰度
    2. 再二值化
    3. 然后清除噪点
    参考:http://python.jobbole.com/84625/
    :type img:Image
    :return:
    """
    imgry = image.convert('L')  # 转化为灰度图

    table = get_bin_table()
    out = imgry.point(table, '1')  # 变成二值图片:0表示黑色,1表示白色

#    noise_point_list = []  # 通过算法找出噪声点,第一步比较严格,可能会有些误删除的噪点
#    for x in range(out.width):
#        for y in range(out.height):
##            res_9 = sum_9_region(out, x, y)
#            if (0 < res_9 < 3) and out.getpixel((x, y)) == 0:  # 找到孤立点
#                pos = (x, y)  #
#                noise_point_list.append(pos)
#    remove_noise_pixel(out, noise_point_list)
    return out


def get_crop_imgs(img):
    """
    按照图片的特点,进行切割,这个要根据具体的验证码来进行工作. # 见本例验证图的结构原理图
    分割图片是传统机器学习来识别验证码的重难点，如果这一步顺利的话，则多位验证码的问题可以转化为1位验证字符的识别问题
    :param img:
    :return:
    """
    child_img_list = []
    for i in range(4):
#        x = 2 + i * (6 + 4)  # 见原理图
#        y = 0
#        child_img = img.crop((x, y, x + 6, y + 10))
        x1 = [6,26,54,69]  ## 每个数字得左边界
        x2 = [25,46,67,91]  ## 每个数字右边界
        child_img = img.crop((x1[i],10,x2[i],40))   # 数字区域，10为上边界，40为下边界

        child_img_list.append(child_img)

    return child_img_list


def print_line_x(img, x):
    """
    打印一个Image图像的第x行，方便调试
    :param img:
    :type img:Image
    :param x:
    :return:
    """
    print("line:%s" % x)
    for w in range(img.width):
        print(img.getpixel((w, x)), end='')
    print('')


def print_bin(img):
    """
    输出二值后的图片到控制台，方便调试的函数
    :param img:
    :type img: Image
    :return:
    """
    print('current binary output,width:%s-height:%s\n')
    for h in range(img.height):
        for w in range(img.width):
            print(img.getpixel((w, h)), end='')
        print('')


def save_crop_imgs(bin_clear_image_path, child_img_list):
    """
    输入：整个干净的二化图片
    输出：每张切成4版后的图片集
    保存切割的图片

    例如： A.png ---> A-1.png,A-2.png,... A-4.png 并保存，这个保存后需要去做label标记的
    :param bin_clear_image_path: xxxx/xxxxx/xxxxx.png 主要是用来提取切割的子图保存的文件名称
    :param child_img_list:
    :return:
    """
    full_file_name = os.path.basename(bin_clear_image_path)  # 文件名称
    full_file_name_split = full_file_name.split('.')
    file_name = full_file_name_split[0]
    # file_ext = full_file_name_split[1]

    i = 0
    for child_img in child_img_list:
        cut_img_file_name = file_name + '-' + ("%s.png" % i)
        child_img.save(join(cut_pic_folder, cut_img_file_name))
        i += 1


# 训练素材准备：文件目录下面的图片的批量操作

def batch_get_all_bin_clear():
    """
    训练素材准备。
    批量操作：获取所有去噪声的二值图片
    :return:
    """

    file_list = os.listdir(origin_pic_folder)
    for file_name in file_list:
        file_full_path = os.path.join(origin_pic_folder, file_name)
        image = Image.open(file_full_path)
        get_clear_bin_image(image)


def batch_cut_images():
    """
    训练素材准备。
    批量操作：分割切除所有 "二值 -> 除噪声" 之后的图片，变成所有的单字符的图片。然后保存到相应的目录，方便打标签
    """

    file_list = os.listdir(bin_clear_folder)
    for file_name in file_list:
        bin_clear_img_path = os.path.join(bin_clear_folder, file_name)
        img = Image.open(bin_clear_img_path)

        child_img_list = get_crop_imgs(img)
        save_crop_imgs(bin_clear_img_path, child_img_list)  # 将切割的图进行保存，后面打标签时要用


# 中间的demo效果演示


def demo_cut_pic():
    """
    做实验研究时的演示代码
    :return:
    """
    img_path = join(data_root, 'demo-6937/ocr-simple-char-captcha-bin-clear-6937.png')
    img = Image.open(img_path)
    cut_save = data_root + '/demo-6937'
    child_img_list = get_crop_imgs(img)

    index = 0
    for child_img in child_img_list:
        child_img.save(cut_save + '/cut-%d.png' % index)
        index += 1


def get_bin_img_name(img_path):
    """
    根据原始origin 文件路径,获取二值而且去噪声的文件路径
    :param img_path:
    :type img_path:str
    :return:
    """
    path_split = img_path.split('/')
    file_name_split = path_split[-1].split('.')
    file_name = file_name_split[0]  # 文件名
    # file_ext = file_name_split[1]  # 扩展名

    new_file = '/'.join(item for item in path_split[:-2]) + '/bin_clear/' + file_name + '.png'
    return new_file


def demo_handle_save_bin_clear_pic(image):
    """
    图像处理函数的演示
    在训练分析阶段的时候使用:保存二次的二值图,
    :type img:Image
    :return:
    """
    out = get_clear_bin_image(image)
    new_file_path = get_bin_img_name(img_path)
    print(new_file_path)
    out.save(new_file_path)


if __name__ == "__main__":
    print(get_bin_table())
    # batch_get_all_bin_clear()  # 获取所有的二值化的初步去噪的图片
    # cut_all_pic()  # 切割图片成单个字
    # save_train_txt()
    # save_test_txt()
    # crack_captcha()
    # img = Image.open(img_path)
    # handle_save_bin_clear_pic(img)
    # demo_cut_pic()
    pass

完成图片处理之后，报文图片被处理成

完成图片处理之后需要使用MNIST的数据集进行训练，先用lenet5模型，训练集为MNIST的全部数据集，测试集为报文数据集。

数据集官网下载。

import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import os
from PIL import Image

import scipy.io as sio


from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras.models import Model
from keras.layers.core import Flatten

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x_train, y_train = mnist.train.images,mnist.train.labels  
x_var, y_var = mnist.validation.images,mnist.validation.labels 
x_test, y_test = mnist.test.images, mnist.test.labels  

train = np.concatenate((x_train,x_var, x_test)) ## 矩阵拼接
train = np.uint8(train*255)
train = train.reshape(train.shape[0], 28, 28).astype('float32')

train_label = np.concatenate((y_train,y_var, y_test))

train_data = train.reshape(train.shape[0], 28, 28, 1).astype('float32')

## 读取测试集
str = "./new_test"   # 数据集路径
#name = os.listdir(str)  # 读取路径下所有文件名
name = open('name.txt')

name_list = [] 
for f in name.readlines():
    name_list.append(f.strip('\n'))

result =[]   # 建立空白的数组
for filename in name_list:
    img = Image.open(os.path.join(str, filename))
#   
    new_image = img.resize((28,28),Image.BILINEAR)
    imgry_new = np.array( new_image.convert('L'))
    result.append(imgry_new)
test = np.array(result) # 将一维数组转化为count行3072列的二维数组

#读取测试集标签
test_labels = np.loadtxt('test_label.txt') 
test_label = (np.arange(10) == test_labels[:,None]).astype(int)


## 数据集处理
## 格式转换 [样本数][宽][高][通道数]
test_data = test.reshape(test.shape[0],28,28,1).astype('float32')


# 归一化 0-255 to 0-1
train_data = train_data/ 255
test_data = test_data / 255



 # 建立模型
input_img = Input(shape=(28,28, 1))  # 一维卷积输入层，维度28*28（特征数）
x1 = Conv2D(10,(5,5), activation='relu', padding='same')(input_img)   #卷积层，32个核，宽度为5，激活函数为relu
x1 = BatchNormalization()(x1)   #  规范层，加速模型收敛，防止过拟合
x1 = MaxPooling2D(2, )(x1)   # 池化层

x1 = Conv2D(16,(5,5), activation='relu')(x1)
x1 = BatchNormalization()(x1)   #  规范层，加速模型收敛，防止过拟合
encoded = MaxPooling2D(2)(x1) 

encoded = Flatten()(encoded)# 展开
decoded = Dense(120, activation='relu')(encoded)   # softmax激活函数，输出层
decoded = Dense(84, activation='relu')(decoded)   # softmax激活函数，输出层
decoded = Dense(10, activation='softmax')(decoded)   # softmax激活函数，输出层

autoencoder = Model(input_img, decoded)   #编码
autoencoder.compile(optimizer='adam', loss='categorical_crossentropy')  # 优化器为adam,损失函数为categorical_crossentropy
autoencoder.fit(train_data, train_label, epochs=50,
                    batch_size=512, shuffle=True)   # 模型性能计算，内置打印函数，输出训练集，验证集的损失函数值  

pr=autoencoder.predict(test_data)  # 测试集预测结果
predict=np.argmax(pr, axis=1)

acc =sum(predict==test_labels)/len(test_labels)
print("The predict accuracy is:",acc)

lenet5网络报文数字预测结果为0.85左右。

后面自己定义卷积神经网络：

import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
import os
from PIL import Image

import scipy.io as sio


from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.layers.normalization import BatchNormalization
from keras import backend as K
from keras.models import Model
from keras.layers.core import Flatten

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
x_train, y_train = mnist.train.images,mnist.train.labels  
x_test, y_test = mnist.test.images, mnist.test.labels  
train = np.concatenate((x_train, x_test)) ## 矩阵拼接
train_label = np.concatenate((y_train, y_test))

train_data = train.reshape(-1, 28, 28,1).astype('float32')  

## 读取测试集
str = "./new_test"   # 数据集路径
#name = os.listdir(str)  # 读取路径下所有文件名
name = open('name.txt')

name_list = [] 
for f in name.readlines():
    name_list.append(f.strip('\n'))

result =[]   # 建立空白的数组
for filename in name_list:
    img = Image.open(os.path.join(str, filename))
#   
    new_image = img.resize((28,28),Image.BILINEAR)
    imgry_new = np.array( new_image.convert('L'))
    result.append(imgry_new)
test = np.array(result) # 将一维数组转化为count行3072列的二维数组

#读取测试集标签
test_labels = np.loadtxt('test_label.txt') 
test_label = (np.arange(10) == test_labels[:,None]).astype(int)


## 数据集处理
## 格式转换 [样本数][宽][高][通道数]
test_data = test.reshape(test.shape[0],28,28,1).astype('float32')


# 归一化 0-255 to 0-1
train_data = train_data/ 255
test_data = test_data / 255



 # 建立模型
input_img = Input(shape=(28,28, 1))  # 一维卷积输入层，维度28*28（特征数）
x1 = Conv2D(128,(5,5), activation='relu', padding='same')(input_img)   #卷积层，32个核，宽度为5，激活函数为relu
x1 = BatchNormalization()(x1)   #  规范层，加速模型收敛，防止过拟合
x1 = MaxPooling2D(2, )(x1)   # 池化层



x1 = Conv2D(64,(5,5), activation='relu', padding='same')(input_img)   #卷积层，32个核，宽度为5，激活函数为relu
x1 = BatchNormalization()(x1)   #  规范层，加速模型收敛，防止过拟合
x1 = MaxPooling2D(2, )(x1)   # 池化层

#x1 = Conv2D(32,(5,5), activation='relu', padding='same')(input_img)   #卷积层，32个核，宽度为5，激活函数为relu
#x1 = BatchNormalization()(x1)   #  规范层，加速模型收敛，防止过拟合
#x1 = MaxPooling2D(2, )(x1)   # 池化层

x1 = Conv2D(32,(5,5), activation='relu')(x1)
x1 = BatchNormalization()(x1)   #  规范层，加速模型收敛，防止过拟合
encoded = MaxPooling2D(2)(x1) 

encoded = Flatten()(encoded)# 展开
decoded = Dense(120 ,activation='relu')(encoded)   # softmax激活函数，输出层
decoded = Dense(84,activation='relu')(decoded)   # softmax激活函数，输出层
decoded = Dense(10,activation='softmax')(decoded)   # softmax激活函数，输出层

autoencoder = Model(input_img, decoded)   #编码
autoencoder.compile(optimizer='adam', loss='categorical_crossentropy')  # 优化器为adam,损失函数为categorical_crossentropy
autoencoder.fit(train_data, train_label, epochs=50,
                    batch_size=512, shuffle=True)   # 模型性能计算，内置打印函数，输出训练集，验证集的损失函数值  

pr=autoencoder.predict(test_data)  # 测试集预测结果
predict=np.argmax(pr, axis=1)

acc =sum(predict==test_labels)/len(test_labels)
print("The predict accuracy is:",acc)

自定义网络模型的结果为0.95.

后续思路：MNIST为训练集训练网络模型，再用报文部分数据对训练好的模型进行微调，再用报文测试集测试获得结果。

基于python keras 卷积神经网络的报文识别代码

猜你喜欢