Initial contact TensorFlow, and digital handwriting recognition training is the most basic introductory tutorial, online tutorials about training a lot, but most of the test model are some of the material provided by the official, neither able to just write a string of numbers to identify the machine out of it ? Paper come Zhongjue, with this question last night under study, use this article to record some of their own ideas!
The following is this image I wrote a random string of numbers, my goal is using these models to identify the handwriting digital pictures inside, I began to combat!
Layer 2 convolution neural network training:
from tensorflow.examples.tutorials.mnist Import input_data # library to save the model required from tensorflow.python.framework.graph_util Import convert_variables_to_constants from tensorflow.python.framework Import graph_util # import other libraries Import tensorflow AS TF Import CV2 Import numpy AS NP # Gets MINIST data MNIST = input_data.read_data_sets ( " MNIST_data " , one_hot = True) # create session sess = tf.InteractiveSession () # placeholder x = tf.placeholder("float", shape=[None, 784], name="Mul") y_ = tf.placeholder("float", shape=[None, 10], name="y_") # 变量 W = tf.Variable(tf.zeros([784, 10]), name='x') b = tf.Variable(tf.zeros([10]), 'y_') # 权重 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) # Deviation DEF bias_variable (Shape): Initial = tf.constant (0.1, Shape = Shape) return tf.Variable (Initial) # convolution DEF conv2d (X, W is): return tf.nn.conv2d (X, W is, Strides = [. 1,. 1,. 1,. 1], padding = ' SAME ' ) # maximum pooled DEF max_pool_2x2 (X): return tf.nn.max_pool (X, ksize = [. 1, 2, 2,. 1 ], Strides = [. 1, 2, 2,. 1], padding = ' SAME ' ) # Create related variables W_conv1 weight_variable = ([. 5,. 5,. 1, 32 ]) b_conv1= bias_variable([32]) x_image = tf.reshape(x, [-1, 28, 28, 1]) h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) # 激活函数 h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) W_fc1 = weight_variable([7 * 7 * 64, 1024]) b_fc1 = bias_variable([1024]) W_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable ([10 ]) h_pool2_flat = tf.reshape (h_pool2, [-1,. 7. 7 * 64 * ]) h_fc1 = tf.nn.relu (tf.matmul (h_pool2_flat, W_fc1) + b_fc1) keep_prob = TF. placeholder ( " a float " , name = ' Rob ' ) h_fc1_drop = tf.nn.dropout (h_fc1, keep_prob) # SoftMax function is used for training y_conv = tf.nn.softmax (tf.matmul (h_fc1_drop, W_fc2) + b_fc2 , name = ' RES ' ) # is used for training after, the test for softmax function y_conv2 = tf.nn.softmax (tf.matmul (h_fc1, W_fc2) + b_fc2, name = "final_result " ) # calculating cross entropy, comprising a return loss value Tensor. cross_entropy = -tf.reduce_sum (Y_ * tf.log (y_conv)) # optimizer, responsible to minimize cross entropy train_step = tf.train.AdamOptimizer ( . 4-1E ) .minimize (cross_entropy) correct_prediction = tf.equal (tf.argmax (y_conv,. 1), tf.argmax (Y_,. 1 )) # calculation accuracy accuracy = tf.reduce_mean (tf.cast (correct_prediction, " a float " )) # initialized to variable sess.run (tf.global_variables_initializer ()) # save input and output, may be followed by tf.add_to_collection ( ' RES ' , y_conv) tf.add_to_collection (' Output ' , y_conv2) tf.add_to_collection ( ' X ' , X) # training start for I in Range (10000 ): BATCH = mnist.train.next_batch (50 ) IF I 100% == 0: train_accuracy = accuracy.eval (feed_dict = { X: BATCH [0], Y_: BATCH [ . 1], keep_prob: 1.0 }) Print ( " STEP D%, Accuracy% Training G " % (I, train_accuracy)) # RUN () can be seen as the input related to the function value to the placeholder, then the result of the calculation, where the batch [0], to xbatch [1] to y_ train_step.run (feed_dict = {X: BATCH [0], Y_: BATCH [. 1], keep_prob: 0.5 }) # current as the default FIG FIG graph_def = . tf.get_default_graph () as_graph_def () # The above variables when converted into constant need to save the model pb model, pay attention here final_result and front y_con2 the same name, the only way to save it, otherwise it will error, # If you need to save just have to let other tensor and tensor names have been kept here to output_graph_def = tf.graph_util.convert_variables_to_constants (Sess, graph_def, [ ' final_result ' ]) # with stored models saver saver = tf.train.Saver () saver.save (Sess, " model_data / model " )
After the success of network training the following four files in the folder model_data:
Verify that the network model can be roughly the following three parts: The
next step is to use the image above to test our models. Indeed the image pre-processing section is critical, is how to accurately extract the digital image above the area, and dividing the threshold value, the traditional single threshold segmentation difficult to achieve, so this modified based segmentation of Niblack segmentation, we are interested can access relevant information.
To communicate over after the division mark area, removing the small dot area. Find its bounding rectangle, a rectangular area is considered the area of interest.
Downsampling the size of 28 * 28 to be identified.
Code portions as follows:
""" 基于TensorFlow的手写数字识别 Author_Zjh 2018/12/3 """ import numpy as np import cv2 import matplotlib.pyplot as plt import imutils import matplotlib.patches as mpatches from skimage import data,segmentation,measure,morphology,color import tensorflow as tf class Number_recognition(): """ 模型恢复初始化""" def __init__(self,img): self.sess = tf.InteractiveSession() saver = tf.train.import_meta_graph('model_data / model.meta ' ) saver.restore (self.sess, ' model_data / Model ' ) # model restoration # Graph = tf.get_default_graph () # get input tensor ,, obtain an output Tensor self.input_x = self.sess.graph .get_tensor_by_name ( " adder Mul: 0 " ) self.y_conv2 = self.sess.graph.get_tensor_by_name ( " final_result: 0 " ) self.Preprocessing (IMG) # image preprocessing DEF Recognition (Self, IM): IM = cv2.resize (IM, (28, 28), interpolation = cv2.INTER_CUBIC) x_imgNp.reshape = (IM, [-1, 784 ]) Output = self.sess.run (self.y_conv2, feed_dict = {self.input_x: x_img}) Print ( ' The number you entered is D% ' % (NP .argmax (Output))) return np.argmax (Output) # returns the result of the recognition DEF Preprocessing These (Self, Image): IF image.shape [0]> 800 : Image = imutils.resize (Image, height = 800) # If the image is too large local threshold segmentation slightly slower speed, so too downsampling image IMG = cv2.cvtColor (image, cv2.COLOR_BGR2GRAY) # Convert to Gray picture M1, N1 = img.shape K = int (M1 /. 19) +. 1 LInt = (N1 /. 19) +. 1 IMG = cv2.GaussianBlur (IMG, (. 3,. 3), 0) # Gaussian IMM = img.copy () # based Niblack local Thresholding Method for Image Segmentation extracting text the results were better for X in Range (K): for Y in Range (L): S = IMM [. 19 * X:. 19 * (X +. 1),. 19 * Y:. 19 * (Y +. 1 )] Me = S .mean () # mean var = np.std (S) # variance T = Me * (. 1 - 0.2 * ((125 - var) / 125 )) RET, IMM [ . 19 X *: *. 19 (X +. 1) , Y *. 19:. 19 * (Y +. 1)] = cv2.threshold ( IMM [ . 19 X *: *. 19 (X +. 1), Y *. 19:. 19 * (Y +. 1)], T, 255, Cv2.THRESH_BINARY_INV) label_image = measure.label (IMM) # connected component labeling for Region in measure.regionprops (label_image): # circulating the communication area of each of a set of attributes # ignore small area IF region.area <100 : Continue MINR, MiNC, MAXR, MAXC = region.bbox # obtain parameters of the bounding rectangle cv2.rectangle (Image, (MiNC, MINR), (MAXC, MAXR), (0, 255, 0), 2) # drawing area communication im2 = imm [ MINR -. 5: MAXR +. 5, MiNC -. 5: MAXC +. 5] # obtaining the region of interest, i.e., each number area Number = self.recognition (IM2) # identification cv2.putText (Image, STR (Number), (MiNC, MINR - 10), 0, 2, (0, 0, 255), 2) # recognition result written on the original cv2.imshow ( " Nizi " , IMM ) cv2.imshow ( " Annie " , Image) cv2.waitKey (0) IF the __name__ == ' __main__ ' : IMG = cv2.imread ( " num.jpg " ) X = Number_recognition (IMG)
Segmentation results are shown below:
Recognition results are as follows:
9 and 4 found recognition errors, and the rest were correctly identified, there may be a smaller amount of data networks and the number of iterations reason!
Disclaimer: This article is the original article CSDN bloggers "zzzzjh", and follow CC 4.0 BY-SA copyright agreement, reproduced, please attach the original source link and this statement.
Original link: https: //blog.csdn.net/zzzzjh/article/details/84783277
[OpenCV depth attention and learning AI]
Long press or scan the following QR code to follow