Simple verification code recognition (three) ---------------- code implementation

The verification code is relatively simple, but the interference line is relatively, as follows:

Network structure:

Here, a two-layer LSTM is used as a hidden layer, the output results of the last four cells are retained, a layer of full connection is added, and the final output is obtained by concat

1. First look at the code structure

model: model file saved every 1000 iterations

result: the txt file saved in the final test

test_data, train_data: test set of verification code, verification set

validation_data: the final test set

Two, generate verification code test set and verification set

config.py defines constants:

The location of the project and the symbols formed by the verification code, as well as the hidden layer, network layer, number of iterations, etc., are posted in the code when constructing the lstm network structure

# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time    : 2018/9/26 14:24
# @Author  : xhh
# @Desc    : 定义常量
# @File    : config.py
# @Software: PyCharm
import os

path = os.getcwd()  # 项目所在路径
captcha_path = path + '/train_data'  # 训练集-验证码所在路径
validation_path = path + '/validation_data' # 验证集-验证码所在路径
test_data_path = path + '/test_data'    # 测试集-验证码文件存放路径
output_path = path + '/result/result.txt'   # 测试结果存放路径
model_path = path + '/model/model.ckpt' # 模型存放路径

# 要识别的字符
number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
alphabet = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

batch_size = 64  # 每迭代一次需选择64个样本
time_steps = 26   # unrolled through 28 time steps ，每个time_step是图像的一行像素 height
n_input = 80  # rows of 28 pixels ，width,设置图片的宽
image_channels = 1  # 图像的通道数
captcha_num = 4  # 验证码中字符个数
n_classes = len(number) + len(ALPHABET) + len(alphabet)    # 类别分类

learning_rate = 0.001   # learning rate for adam，Adam一种基于一阶梯度来优化随机目标函数的算法，定义其学习率
num_units = 128   # hidden LSTM units,隐层单元大小
layer_num = 2   # 网络层数
iteration = 10000   # 训练迭代次数

util.py processes the image and sets the verification code to a fixed format:

The final format is as follows:

# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time    : 2018/9/26 14:24
# @Author  : xhh
# @Desc    : 验证码图片处理
# @File    : util.py
# @Software: PyCharm
import random
import numpy as np
from PIL import Image
from config import *


def get_batch(data_path=captcha_path, is_training=True):
    target_file_list = os.listdir(data_path)    # 读取路径下的所有文件名
    batch = batch_size if is_training else len(target_file_list)   # 确认batch 大小
    batch_x = np.zeros([batch, time_steps, n_input])   # batch 数据
    batch_y = np.zeros([batch, captcha_num, n_classes])   # batch 标签

    for i in range(batch):
        file_name = random.choice(target_file_list) if is_training else target_file_list[i]  # 确认要打开的文件名
        img = Image.open(data_path + '/' + file_name)  # 打开图片
        img = np.array(img)
        if len(img.shape) > 2:
            img = np.mean(img, -1)  # 转换成灰度图像:(26,80,3) =>(26,80)
            img = img / 255   # 标准化，为了防止训练集的方差过大而导致的收敛过慢问题。
            img = np.reshape(img,[time_steps, n_input])  #转换格式：(2080,) => (26,80)
        batch_x[i] = img

        label = np.zeros(captcha_num * n_classes)
        for num, char in enumerate(file_name.split('.')[0]):
            index = num * n_classes + char2index(char)
            label[index] = 1
        label = np.reshape(label, [captcha_num, n_classes])
        batch_y[i] = label
    return batch_x, batch_y


def char2index(c):
    k = ord(c)
    index = -1
    if k >= 48 and k <= 57:  # 数字索引
        index = k - 48
    if k >= 65 and k <= 90:  # 大写字母索引
        index = k - 55
    if k >= 97 and k <= 122:  # 小写字母索引
        index = k - 61
    if index == -1:
        raise ValueError('No Map')
    return index


def index2char(k):
    # k = chr(num)
    index = -1
    if k >= 0 and k < 10:  # 数字索引
        index = k + 48
    if k >= 10 and k < 36:  # 大写字母索引
        index = k + 55
    if k >= 36 and k < 62:  # 小写字母索引
        index = k + 61
    if index == -1:
        raise ValueError('No Map')
    return chr(index)

# 测试打印
# print(index2char(61))

Three, build a model through RNN recurrent neural network

Training process:

Using Adam algorithm instead of gradient descent, iterates to 3000 times, the accuracy reaches 0.65, and the loss is less than 0.03. Continue to iterate and optimize to achieve higher accuracy.

# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time    : 2018/9/26 14:24
# @Author  : xhh
# @Desc    : 利用RNN(循环神经网络)进行模型的训练
# @File    : computational_graph_lstm.py
# @Software: PyCharm
import tensorflow as tf
from config import *


def computational_graph_lstm (x, y, batch_size=batch_size):
    # 设置权重，和偏差Variable，random_normal并进行高斯初始化，num_units隐层单元，n_classes所属类别
    # weights and  biases of appropriate shape to accomplish above task
    out_weights = tf.Variable(tf.random_normal([num_units, n_classes]), name='out_weight')
    out_bias = tf.Variable(tf.random_normal([n_classes]), name='out_bias')

    # 构建网络,for _ in range(layer_num)进行循环迭代
    lstm_layer = [tf.nn.rnn_cell.LSTMCell(num_units, state_is_tuple=True) for _ in range(layer_num)]    # 创建两层的lstm
    mlstm_cell = tf.nn.rnn_cell.MultiRNNCell(lstm_layer, state_is_tuple=True)   # 将lstm连接在一起，即多个网络层进行迭代
    init_state = mlstm_cell.zero_state(batch_size, tf.float32)  # cell的初始状态

    # 输出层
    outputs = list()    # 每个cell的输出
    state = init_state

    # RNN 递归的神经网络
    with tf.variable_scope('RNN'):
        for timestep in range(time_steps):
            if timestep > 0:
                tf.get_variable_scope().reuse_variables()
            (cell_output, state) = mlstm_cell(x[:, timestep, :], state)  # 这里的state保存了每一层 LSTM 的状态
            outputs.append(cell_output)

    # h_state = outputs[-1] #取最后一个cell输出
    # 计算输出层的第一个元素, 获取最后time-step的输出，使用全连接, 得到第一个验证码输出结果，out_bias偏差变量
    prediction_1 = tf.nn.softmax(tf.matmul(outputs[-4], out_weights)+out_bias)
    # 计算输出层的第二个元素, 输出第二个验证码预测结果
    prediction_2 = tf.nn.softmax(tf.matmul(outputs[-3], out_weights)+out_bias)
    # 计算输出层的第三个元素，输出第三个验证码预测结果
    prediction_3 = tf.nn.softmax(tf.matmul(outputs[-2], out_weights)+out_bias)
    # 计算输出层的第四个元素, 输出第四个验证码预测结果,size:[batch,num_class]
    prediction_4 = tf.nn.softmax(tf.matmul(outputs[-1], out_weights)+out_bias)
    # 输出连接
    prediction_all = tf.concat([prediction_1, prediction_2, prediction_3, prediction_4], 1)   #  4 * [batch, num_class] => [batch, 4 * num_class]
    prediction_all = tf.reshape(prediction_all, [batch_size, captcha_num, n_classes], name='prediction_merge')  # [4, batch, num_class] => [batch, 4, num_class]

    # 损失函数reduce_mean函数，计算batch纬度，对算法计算损失值计算方法，loss=-logp
    loss = -tf.reduce_mean(y * tf.log(prediction_all), name='loss')
    # loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(prediction_all), reduction_indices=1))
    # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction_all,labels=y))
    # AdamOptimizer模型优化
    opt = tf.train.AdamOptimizer(learning_rate=learning_rate, name='opt').minimize(loss)

    # 模型评估
    pre_arg = tf.argmax(prediction_all, 2, name='predict')
    y_arg = tf.argmax(y,2)
    correct_prediction = tf.equal(pre_arg, y_arg)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32), name='accuracy')

    return opt, loss, accuracy, pre_arg, y_arg

4. Validate the model through the training set and train to get the final model

# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time    : 2018/9/26 14:24
# @Author  : xhh
# @Desc    : 定义测试集
# @File    : train.py
# @Software: PyCharm

from util import *
from computational_graph_lstm import *


# 定义训练集
def train():
    # 初始化x,y都不是一个特定的值，placeholder是定义占位符
    x = tf.placeholder("float", [None, time_steps, n_input], name="x")  # 输入的图片
    y = tf.placeholder("float", [None, captcha_num, n_classes], name="y")  # 输入图片的标签

    # 计算图
    opt, loss, accuracy, pre_arg, y_arg = computational_graph_lstm(x, y)
    saver = tf.train.Saver()  # 创建训练模型保存类
    init = tf.global_variables_initializer()    # 初始化变量值

    # 创建 tensorflow session，session对象在使用完之后需要关闭资源，
    # 除显示的调用close外，在这里使用with代码块，自动关闭
    with tf.Session() as sess:
        sess.run(init)
        iter = 1
        while iter < iteration:
            batch_x, batch_y = get_batch()
            sess.run(opt, feed_dict={x: batch_x, y: batch_y})   # 只运行优化迭代计算图
            # 让模型进行运行计算，每100次计算一下其损失值
            if iter % 100 == 0:
                los, acc, parg, yarg = sess.run([loss, accuracy, pre_arg, y_arg], feed_dict={x: batch_x, y: batch_y})
                print("For iter ", iter)
                print("Accuracy ", acc)
                print("Loss ", los)
                if iter % 1000 == 0:
                    print("predict arg:", parg[0:10])
                    print("yarg:", yarg[0:10])
                print("__________________")
                if acc > 0.95:
                    print("training complete, accuracy:", acc)
                    break
            if iter % 1000 == 0:   # 保存模型，每迭代1000次，将模型进行保存
                saver.save(sess, model_path, global_step=iter)
            iter += 1
        # 计算验证集准确率
        valid_x, valid_y = get_batch(data_path=validation_path, is_training=False)
        print("Validation Accuracy:", sess.run(accuracy, feed_dict={x: valid_x, y: valid_y}))

        
if __name__ == '__main__':
    train()

Save the file every 1000 iterations, as follows:

Five, verify through the test set

# -*- coding: utf-8 -*-
# !/usr/bin/env python
# @Time    : 2018/9/26 14:24
# @Author  : xhh
# @Desc    : 通过已有的模型对训练集测试
# @File    : predict.py
# @Software: PyCharm
from computational_graph_lstm import *
from util import *


def get_test_set():
    target_file_list = os.listdir(test_data_path)   # 获取测试集路径下的所有文件
    print("预测的验证码文件:",len(target_file_list))

    # 判断条件
    flag = len(target_file_list) // batch_size  # 计算待检测验证码个数能被batch size 整除的次数
    batch_len = flag if flag > 0 else 1  # 共有多少个batch
    flag2 = len(target_file_list) % batch_size  # 计算验证码被batch size整除后的取余
    batch_len = batch_len if flag2 == 0 else batch_len + 1  # 若不能整除，则batch数量加1

    print("共生成batch数:", batch_len)
    print("验证码根据batch取余:", flag2)

    batch =  np.zeros([batch_len * batch_size, time_steps, n_input])
    for i, file in enumerate(target_file_list):
        batch[i] = open_iamge(file)
    batch = batch.reshape([batch_len, batch_size, time_steps, n_input])
    return batch, target_file_list  # batch_file_name


def open_iamge(file):
    img = Image.open(test_data_path + '/' + file)  # 打开图片
    img = np.array(img)
    if len(img.shape) > 2:
        img = np.mean(img, -1)  # 将验证码图片转换成灰度图像:(26,80,3) =>(26,80)
        img = img / 255
    return img


def predict():
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(path + "/model/" + "model.ckpt-5000.meta")
        saver.restore(sess, tf.train.latest_checkpoint(path + "/model/")) # 读取已训练模型

        graph = tf.get_default_graph()  # 获取原始计算图，并读取其中的tensor
        x = graph.get_tensor_by_name("x:0")
        y = graph.get_tensor_by_name("y:0")
        pre_arg = graph.get_tensor_by_name("predict:0")

        test_x, file_list = get_test_set()  # 获取测试集
        predict_result = []
        for i in range(len(test_x)):
            batch_test_x = test_x[i]
            batch_test_y = np.zeros([batch_size, captcha_num,n_classes])    # 创建空的y输入
            test_predict = sess.run([pre_arg], feed_dict={x: batch_test_x, y:batch_test_y})
            print(test_predict)
            # predict_result.extend(test_predict)

            for line in test_predict[0]:    # 将预测结果转换为字符
                character = ""
                for each in line:
                    character += index2char(each)
                predict_result.append(character)

        predict_result = predict_result[:len(file_list)]    # 预测结果
        write_to_file(predict_result, file_list)    # 保存到文件


def write_to_file(predict_list, file_list):
    with open(output_path, 'a') as f:
        for i, res in enumerate(predict_list):
            if i == 0:
                f.write("id\tfile\tresult\n")
            f.write(str(i) + "\t" + file_list[i] + "\t" + res + "\n")
    print("预测结果保存在：", output_path)


if __name__ == '__main__':
    predict()
    get_test_set()

The final prediction result:

Test the verification code in the validation_data folder:

file: verification code picture, the name is the correct verification code

result: It is the verification code recognized by the final model of the model. The error is still relatively large. You can adjust it yourself

The above is the verification code is not divided, and the verification code is divided, there are many online materials, you can find it online

Code address: https://github.com/XHHz/LSTM_captcha

You can pay attention to the official account of my friend and me~~~ Here are some python technical information that my friend and I update from time to time! ! I hope you can support me and pay attention to it. Thank you everyone~~

Simple verification code recognition (three) ---------------- code implementation

Guess you like