OpenCV + TensorFlow image recognition of handwritten numerals (with source)

Initial contact TensorFlow, and digital handwriting recognition training is the most basic introductory tutorial, online tutorials about training a lot, but most of the test model are some of the material provided by the official, neither able to just write a string of numbers to identify the machine out of it ? Paper come Zhongjue, with this question last night under study, use this article to record some of their own ideas!

The following is this image I wrote a random string of numbers, my goal is using these models to identify the handwriting digital pictures inside, I began to combat!

 

 

 

Layer 2 convolution neural network training:

from tensorflow.examples.tutorials.mnist Import input_data
 # library to save the model required 
from tensorflow.python.framework.graph_util Import convert_variables_to_constants
 from tensorflow.python.framework Import graph_util
 # import other libraries 
Import tensorflow AS TF
 Import CV2
 Import numpy AS NP
 # Gets MINIST data 
MNIST = input_data.read_data_sets ( " MNIST_data " , one_hot = True)
 # create session 
sess = tf.InteractiveSession ()
 # placeholder
x = tf.placeholder("float", shape=[None, 784], name="Mul")
y_ = tf.placeholder("float", shape=[None, 10], name="y_")
# 变量
W = tf.Variable(tf.zeros([784, 10]), name='x')
b = tf.Variable(tf.zeros([10]), 'y_')
# 权重
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
# Deviation 
DEF bias_variable (Shape): 
Initial = tf.constant (0.1, Shape = Shape)
 return tf.Variable (Initial)
 # convolution 
DEF conv2d (X, W is):
 return tf.nn.conv2d (X, W is, Strides = [. 1,. 1,. 1,. 1], padding = ' SAME ' )
 # maximum pooled 
DEF max_pool_2x2 (X):
 return tf.nn.max_pool (X, ksize = [. 1, 2, 2,. 1 ], 
Strides = [. 1, 2, 2,. 1], padding = ' SAME ' ) 

# Create related variables 
W_conv1 weight_variable = ([. 5,. 5,. 1, 32 ]) 
b_conv1= bias_variable([32])
x_image = tf.reshape(x, [-1, 28, 28, 1])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
# 激活函数
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])
W_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable ([10 ]) 
h_pool2_flat = tf.reshape (h_pool2, [-1,. 7. 7 * 64 * ]) 
h_fc1 = tf.nn.relu (tf.matmul (h_pool2_flat, W_fc1) + b_fc1) 
keep_prob = TF. placeholder ( " a float " , name = ' Rob ' ) 
h_fc1_drop = tf.nn.dropout (h_fc1, keep_prob)
 # SoftMax function is used for training 
y_conv = tf.nn.softmax (tf.matmul (h_fc1_drop, W_fc2) + b_fc2 , name = ' RES ' )
 # is used for training after, the test for softmax function 
y_conv2 = tf.nn.softmax (tf.matmul (h_fc1, W_fc2) + b_fc2, name = "final_result " )
 # calculating cross entropy, comprising a return loss value Tensor. 
cross_entropy = -tf.reduce_sum (Y_ * tf.log (y_conv))
 # optimizer, responsible to minimize cross entropy 
train_step = tf.train.AdamOptimizer ( . 4-1E ) .minimize (cross_entropy) 

correct_prediction = tf.equal (tf.argmax (y_conv,. 1), tf.argmax (Y_,. 1 ))
 # calculation accuracy 
accuracy = tf.reduce_mean (tf.cast (correct_prediction, " a float " ))
 # initialized to variable 
sess.run (tf.global_variables_initializer ())
 # save input and output, may be followed by 
tf.add_to_collection ( ' RES ' , y_conv) 
tf.add_to_collection (' Output ' , y_conv2) 
tf.add_to_collection ( ' X ' , X)
 # training start 
for I in Range (10000 ): 
BATCH = mnist.train.next_batch (50 )
 IF I 100% == 0: 
train_accuracy = accuracy.eval (feed_dict = { 
X: BATCH [0], Y_: BATCH [ . 1], keep_prob: 1.0 })
 Print ( " STEP D%, Accuracy% Training G " % (I, train_accuracy))
 # RUN () can be seen as the input related to the function value to the placeholder, then the result of the calculation, where the batch [0], to xbatch [1] to y_
train_step.run (feed_dict = {X: BATCH [0], Y_: BATCH [. 1], keep_prob: 0.5 })
 # current as the default FIG FIG 
graph_def = . tf.get_default_graph () as_graph_def ()
 # The above variables when converted into constant need to save the model pb model, pay attention here final_result and front y_con2 the same name, the only way to save it, otherwise it will error, 
# If you need to save just have to let other tensor and tensor names have been kept here to 
output_graph_def = tf.graph_util.convert_variables_to_constants (Sess, graph_def, [ ' final_result ' ])
 # with stored models saver 
saver = tf.train.Saver () 
saver.save (Sess, " model_data / model " )

After the success of network training the following four files in the folder model_data:

 

Verify that the network model can be roughly the following three parts: The
next step is to use the image above to test our models. Indeed the image pre-processing section is critical, is how to accurately extract the digital image above the area, and dividing the threshold value, the traditional single threshold segmentation difficult to achieve, so this modified based segmentation of Niblack segmentation, we are interested can access relevant information.
To communicate over after the division mark area, removing the small dot area. Find its bounding rectangle, a rectangular area is considered the area of interest.
Downsampling the size of 28 * 28 to be identified.
Code portions as follows:

"""
基于TensorFlow的手写数字识别
Author_Zjh
2018/12/3
"""
import numpy as np
import cv2
import matplotlib.pyplot as plt
import imutils
import matplotlib.patches as mpatches
from skimage import data,segmentation,measure,morphology,color
import tensorflow as tf
class Number_recognition():
""" 模型恢复初始化"""
def __init__(self,img):
self.sess = tf.InteractiveSession()
saver = tf.train.import_meta_graph('model_data / model.meta ' ) 
saver.restore (self.sess, ' model_data / Model ' ) # model restoration 
# Graph = tf.get_default_graph () 
# get input tensor ,, obtain an output Tensor 
self.input_x = self.sess.graph .get_tensor_by_name ( " adder Mul: 0 " ) 
self.y_conv2 = self.sess.graph.get_tensor_by_name ( " final_result: 0 " ) 
self.Preprocessing (IMG) # image preprocessing 
DEF Recognition (Self, IM): 
IM = cv2.resize (IM, (28, 28), interpolation = cv2.INTER_CUBIC) 
x_imgNp.reshape = (IM, [-1, 784 ]) 
Output = self.sess.run (self.y_conv2, feed_dict = {self.input_x: x_img})
 Print ( ' The number you entered is D% ' % (NP .argmax (Output)))
 return np.argmax (Output) # returns the result of the recognition 

DEF Preprocessing These (Self, Image):
 IF image.shape [0]> 800 : 
Image = imutils.resize (Image, height = 800) # If the image is too large local threshold segmentation slightly slower speed, so too downsampling image 

IMG = cv2.cvtColor (image, cv2.COLOR_BGR2GRAY) # Convert to Gray picture 
M1, N1 = img.shape 
K = int (M1 /. 19) +. 1 
LInt = (N1 /. 19) +. 1 
IMG = cv2.GaussianBlur (IMG, (. 3,. 3), 0) # Gaussian 
IMM = img.copy ()
 # based Niblack local Thresholding Method for Image Segmentation extracting text the results were better 
for X in Range (K):
 for Y in Range (L): 
S = IMM [. 19 * X:. 19 * (X +. 1),. 19 * Y:. 19 * (Y +. 1 )] 
Me = S .mean () # mean 
var = np.std (S) # variance 
T = Me * (. 1 - 0.2 * ((125 - var) / 125 )) 
RET, IMM [ . 19 X *: *. 19 (X +. 1) , Y *. 19:. 19 * (Y +. 1)] = cv2.threshold ( 
IMM [ . 19 X *: *. 19 (X +. 1), Y *. 19:. 19 * (Y +. 1)], T, 255, Cv2.THRESH_BINARY_INV) 
label_image = measure.label (IMM) # connected component labeling 
for Region in measure.regionprops (label_image): # circulating the communication area of each of a set of attributes 
# ignore small area 
IF region.area <100 :
 Continue 
MINR, MiNC, MAXR, MAXC = region.bbox # obtain parameters of the bounding rectangle 
cv2.rectangle (Image, (MiNC, MINR), (MAXC, MAXR), (0, 255, 0), 2) # drawing area communication 
im2 = imm [ MINR -. 5: MAXR +. 5, MiNC -. 5: MAXC +. 5] # obtaining the region of interest, i.e., each number area 
Number = self.recognition (IM2) # identification
cv2.putText (Image, STR (Number), (MiNC, MINR - 10), 0, 2, (0, 0, 255), 2) # recognition result written on the original 
cv2.imshow ( " Nizi " , IMM ) 
cv2.imshow ( " Annie " , Image) 
cv2.waitKey (0) 
IF  the __name__ == ' __main__ ' : 
IMG = cv2.imread ( " num.jpg " ) 
X = Number_recognition (IMG)

Segmentation results are shown below:

 

Recognition results are as follows:

 

 

 

9 and 4 found recognition errors, and the rest were correctly identified, there may be a smaller amount of data networks and the number of iterations reason!


Disclaimer: This article is the original article CSDN bloggers "zzzzjh", and follow CC 4.0 BY-SA copyright agreement, reproduced, please attach the original source link and this statement.
Original link: https: //blog.csdn.net/zzzzjh/article/details/84783277

[OpenCV depth attention and learning AI]

Long press or scan the following QR code to follow

Guess you like

Origin www.cnblogs.com/stq054188/p/11831983.html