tensorflow卷积网络训练mnist数据集以及测试自己的图片

本篇博文是综合其它博文的改写,这是原博文作者链接https://blog.csdn.net/sparta_117/article/details/66965760

 train.py:两个卷积层,两个池化层,两个全连接层

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


#读取数据
mnist=input_data.read_data_sets("D:/anicode/spyderworkspace/mnist test_new/MNIST_data",one_hot=True)

#批次大小
batch_size=50
#一共有多少个批次
n_batch=mnist.train.num_examples//batch_size
#图像大小
img_size=28
#通道数
num_channels=1
#类别数
num_classes=10

#定义两个占位符 
x=tf.placeholder(tf.float32,shape=[None,img_size*img_size],name='x')
y_true=tf.placeholder(tf.float32,shape=[None,num_classes],name='y_true')
y_true_cls=tf.argmax(y_true,dimension=1)
#keep_prob=tf.placeholder(tf.float32)
filter_size_conv1=5
num_filters_conv1=32

filter_size_conv2=5
num_filters_conv2=64

fc_layer_size=1024

def create_weights(shape):
    return tf.Variable(tf.truncated_normal(shape,stddev=0.1))

def create_biases(size):
    return tf.Variable(tf.constant(0.1,shape=[size]))

def create_convolution_layer(input,num_input_channels,conv_filter_size,num_filters):
    weights=create_weights(shape=[conv_filter_size,conv_filter_size,num_input_channels,num_filters])
    biases=create_biases(num_filters)
    
    layer=tf.nn.conv2d(input=input,filter=weights,strides=[1,1,1,1],padding='SAME')
    layer+=biases
    layer=tf.nn.relu(layer)
    
    layer=tf.nn.max_pool(value=layer,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
    
    return layer

def create_flatten_layer(layer):
    layer_shape=layer.get_shape()
    num_features=layer_shape[1:4].num_elements()
    layer=tf.reshape(layer,[-1,num_features])
    
    return layer
    
def create_fc_layer(input,num_inputs,num_outputs,use_relu=True):
    weights=create_weights(shape=[num_inputs,num_outputs])
    biases=create_biases(num_outputs)
    
    layer=tf.matmul(input,weights)+biases
    
    layer=tf.nn.dropout(layer,keep_prob=1.0)
    if use_relu:
        layer=tf.nn.relu(layer)
    return layer

x_image = tf.reshape(x, [-1,28,28,1])  
layer_conv1=create_convolution_layer(input=x_image,
                                     num_input_channels=num_channels,#1
                                     conv_filter_size=filter_size_conv1,#5
                                     num_filters=num_filters_conv1)#32

layer_conv2=create_convolution_layer(input=layer_conv1,
                                     num_input_channels=num_filters_conv1,
                                     conv_filter_size=filter_size_conv2,
                                     num_filters=num_filters_conv2)

layer_flat=create_flatten_layer(layer_conv2)

layer_fc1=create_fc_layer(input=layer_flat,
                          num_inputs=layer_flat.get_shape()[1:4].num_elements(),
                          num_outputs=fc_layer_size,
                          use_relu=True)

layer_fc2=create_fc_layer(input=layer_fc1,
                          num_inputs=fc_layer_size,
                          num_outputs=num_classes,
                          use_relu=False)

y_pred=tf.nn.softmax(layer_fc2,name='y_pred')
y_pred_cls=tf.argmax(y_pred,dimension=1)

sess=tf.Session()
sess.run(tf.global_variables_initializer())
cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2,labels=y_true)
cost=tf.reduce_mean(cross_entropy)
optimizer=tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
correct_prediction=tf.equal(y_pred_cls,y_true_cls)
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

saver=tf.train.Saver()
sess.run(tf.global_variables_initializer())


total_iterations=0
def train(num_iteration):
    for i in range(total_iterations,total_iterations+num_iteration):
        x_batch,y_true_batch=mnist.train.next_batch(batch_size)
        feed_dict_tr={x:x_batch,y_true:y_true_batch}
        feed_dict_test={x:mnist.test.images,y_true:mnist.test.labels}
        sess.run(optimizer,feed_dict_tr)
        if i%100==0:
            train_acc=sess.run(accuracy,feed_dict_tr)
            test_acc=sess.run(accuracy,feed_dict_test)
            print('step %d,tarining acc %g,testing acc %g'%(i,train_acc,test_acc))
            saver.save(sess,'./mnist model/model.ckpt')
    
train(num_iteration=2000)

 test.py:测试图像用的是window自带的画图工具画的。测试了自己写的0-9,10个数字,识别率只有60%,额......不知道是不是迭代的次数太少了,但是看训练时,精度都达到了98%及以上,或者是对图片的处理还有待改进吧(原po的图片处理方式,我是照搬的)

from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt


def loadimage():
    #图片路径
    file_name='D:/2015project/mnist data processing/afterimages/temp8.png'
    #转为灰度图像
    im = Image.open(file_name).convert('L')
    #另存图片
    im.save("D:/anicode/spyderworkspace/mnist test_new/image/sample8.png")
    plt.imshow(im)
    plt.show()
    #获取图像像素值
    tv = list(im.getdata()) 
    #对图像做归一化处理
    tva = [ (255-x)*1.0/255.0 for x in tv] 
    
    return tva

result=loadimage()
x = tf.placeholder(tf.float32, [None, 784])
keep_prob = tf.placeholder(tf.float32)
sess=tf.Session()
#step1网络结构图
saver=tf.train.import_meta_graph('./mnist model/model.ckpt.meta')
 
#step2加载权重参数
saver.restore(sess,'./mnist model/model.ckpt')
 
#获取默认的图
graph=tf.get_default_graph()


y_pred=graph.get_tensor_by_name("y_pred:0")
x=graph.get_tensor_by_name("x:0")
y_true=graph.get_tensor_by_name("y_true:0")

prediction=tf.argmax(y_pred,1)

#eval和run的功能相似,但是run一次可以计算多个tensor值,而eval一次只能计算一个。
predint=prediction.eval(feed_dict={x: [result]},session=sess)
print('recognize result:')
print(predint[0])

对测试图片的处理用的是vs2015+opencv2实现的

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <stdio.h>

using namespace cv;
using namespace std;

cv::Mat org, dst, img, tmp;
void on_mouse(int event, int x, int y, int flags, void *ustc)//event鼠标事件代号,x,y鼠标坐标,flags拖拽和键盘操作的代号
{
	static Point pre_pt = cv::Point(-1, -1);//初始坐标
	static Point cur_pt = cv::Point(-1, -1);//实时坐标
	char temp[16];
	if (event == CV_EVENT_LBUTTONDOWN)//左键按下,读取初始坐标,并在图像上该点处划圆
	{
		org.copyTo(img);//将原始图片复制到img中
		sprintf(temp, "(%d,%d)", x, y);
		pre_pt = Point(x, y);
		putText(img, temp, pre_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255), 1, 8);//在窗口上显示坐标
		circle(img, pre_pt, 2, Scalar(255, 0, 0, 0), CV_FILLED, CV_AA, 0);//划圆
		imshow("img", img);
	}
	else if (event == CV_EVENT_MOUSEMOVE && !(flags & CV_EVENT_FLAG_LBUTTON))//左键没有按下的情况下鼠标移动的处理函数
	{
		img.copyTo(tmp);//将img复制到临时图像tmp上,用于显示实时坐标
		sprintf(temp, "(%d,%d)", x, y);
		cur_pt = Point(x, y);
		putText(tmp, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));//只是实时显示鼠标移动的坐标
		imshow("img", tmp);
	}
	else if (event == CV_EVENT_MOUSEMOVE && (flags & CV_EVENT_FLAG_LBUTTON))//左键按下时,鼠标移动,则在图像上划矩形
	{
		img.copyTo(tmp);
		sprintf(temp, "(%d,%d)", x, y);
		cur_pt = Point(x, y);
		putText(tmp, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));
		rectangle(tmp, pre_pt, cur_pt, Scalar(0, 255, 0, 0), 1, 8, 0);//在临时图像上实时显示鼠标拖动时形成的矩形
		imshow("img", tmp);
	}
	else if (event == CV_EVENT_LBUTTONUP)//左键松开,将在图像上划矩形
	{
		org.copyTo(img);
		sprintf(temp, "(%d,%d)", x, y);
		cur_pt = Point(x, y);
		putText(img, temp, cur_pt, FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 0, 255));
		circle(img, pre_pt, 2, Scalar(255, 0, 0, 0), CV_FILLED, CV_AA, 0);
		rectangle(img, pre_pt, cur_pt, Scalar(0, 255, 0, 0), 1, 8, 0);//根据初始点和结束点,将矩形画到img上
		imshow("img", img);
		img.copyTo(tmp);
		//截取矩形包围的图像,并保存到dst中
		int width = abs(pre_pt.x - cur_pt.x);
		int height = abs(pre_pt.y - cur_pt.y);
		if (width == 0 || height == 0)
		{
			printf("width == 0 || height == 0");
			return;
		}
		dst = org(Rect(min(cur_pt.x, pre_pt.x), min(cur_pt.y, pre_pt.y), width, height));
		cv::resize(dst, dst, Size(28, 28));
		cvtColor(dst, dst, CV_BGR2GRAY);
		threshold(dst, dst, 170, 255, CV_THRESH_BINARY);
		imwrite("D:/2015project/mnist data processing/afterimages/temp9.png", dst);//注意将这里改为自己的处理结果存储地址
		namedWindow("dst");
		imshow("dst", dst);
		waitKey(0);
	}
}
int main()
{
	org = imread("D:/2015project/mnist data processing/images/num_9.png");//读取图片地址
	org.copyTo(img);
	org.copyTo(tmp);
	namedWindow("img");//定义一个img窗口
	setMouseCallback("img", on_mouse, 0);//调用回调函数
	imshow("img", img);
	cv::waitKey(0);
}

猜你喜欢

转载自blog.csdn.net/sinat_38998284/article/details/81154514