卷积神经网络训练mnist数据集实现手写数字识别

神经网络：一个神经元Xi（比如图片中的一个像素点）对应一个参数，最后构建成一个线性函数，Yi=X1*A1+X2*A2.......+b。然后根据softmax函数对y进行打分，选择分值最大对应的类别为其决策类型。

深度神经网络：很多时候数据集不是线性分布的，这时候就不能用一个线性函数将其分开，那么就可以多次用线性函数进行分类，所以对上述构建的Yi先不对其进行决策，再对Yi构建一次线性函数，然后再对其结果构建线性函数。。。。。重复几次后得以完全分类，再对其做决策打分。

卷积神经网络：类似神经网络，只不过为了减少参数，引入了局部关联。每个神经元看做一个滤波器(filter)，神经元对图片进行滑动卷积操作得出一张新的图片（及一个神经元通过不断的看图片的局部而得出整张图片的信息），每个神经元看完后生成一张图片，深度+1，以至于这一层的图片深度不断加深。下一层卷积操作的滤波器会在长宽的基础上再加一个深度，及上一层的深度。

下面就是用tensorflow和minst数据集训练手写数字识别

mytest.py

from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

x=tf.placeholder(tf.float32,shape=[None,784])
y=tf.placeholder(tf.float32,shape=[None,10])

def weigth_variable(shape):
    initial=tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial=tf.constant(0.1,shape=shape)
    return tf.Variable(initial)

def conv2d(x,w):
    return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding="SAME")

def max_pool_2x2(x):
    return tf.nn.max_pool(x,strides=[1,2,2,1],ksize=[1,2,2,1],padding="SAME")

x1=tf.reshape(x,[-1,28,28,1])
w1=weigth_variable([5,5,1,32])
b1=bias_variable([32])
c1=conv2d(x1,w1)+b1
r1=tf.nn.relu(c1)
p1=max_pool_2x2(r1)

w2=weigth_variable([5,5,32,64])
b2=bias_variable([64])
c2=conv2d(p1,w2)+b2
r2=tf.nn.relu(c2)
p2=max_pool_2x2(r2)

x2=tf.reshape(p2,[-1,7*7*64])
w3=weigth_variable([7*7*64,1024])
b3=bias_variable([1024])
c3=tf.matmul(x2,w3)+b3
r3=tf.nn.dropout(tf.nn.relu(c3),1.0)

w4=weigth_variable([1024,10])
b4=bias_variable([10])
c4=tf.matmul(r3,w4)+b4

l=tf.nn.softmax_cross_entropy_with_logits(logits=c4,labels=y)
accuracy=tf.reduce_mean(l)
step=tf.train.AdamOptimizer(0.0004).minimize(accuracy)

#正确率
rate=tf.reduce_mean(tf.cast(tf.equal(tf.argmax(c4,1),tf.argmax(y,1)),dtype=tf.float32))*100


sess=tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(10001):
    batch=mnist.train.next_batch(50)
    sess.run(step, feed_dict={x: batch[0], y: batch[1]})
    if (i % 100 == 0):
      print(str(i)+"："+str( sess.run(rate,{x:batch[0],y:batch[1]}))+"%")

saver=tf.train.Saver()
saver.save(sess,'E:/python/nummodel/mymodel.ckpt')

mytestimg.py

from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
import numpy as np
from PIL import Image
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)


def getTestPicArray(filename):
    im = Image.open(filename)
    x_s = 28
    y_s = 28
    out = im.resize((x_s, y_s), Image.ANTIALIAS)

    im_arr = np.array(out.convert('L'))
    #
    num0 = 0
    num255 = 0
    threshold = 100

    for x in range(x_s):
        for y in range(y_s):
            if im_arr[x][y] > threshold:
                num255 = num255 + 1
            else:
                num0 = num0 + 1

    if (num255 > num0):
        print("convert!")
        for x in range(x_s):
            for y in range(y_s):
                im_arr[x][y] = 255 - im_arr[x][y]
                if (im_arr[x][y] < threshold):  im_arr[x][y] = 0
            # if(im_arr[x][y] > threshold) : im_arr[x][y] = 0
            # else : im_arr[x][y] = 255
            # if(im_arr[x][y] < threshold): im_arr[x][y] = im_arr[x][y] - im_arr[x][y] / 2

    # # out = Image.fromarray(np.uint8(im_arr))
    # # out.save(filename.split('/')[0] + '/28pix/' + filename.split('/')[1])
    # # # print im_arr
    im_arr = im_arr.astype(np.float32)
    im_arr = np.multiply(im_arr, 1.0 / 255.0)
    for x in range(x_s):
        for y in range(y_s):
            if im_arr[x][y]<0.5:
                print(0, end="")
            else:
                print(1, end="")
        print("")
    nm = im_arr.reshape((1, 784))
    return nm


x=tf.placeholder(tf.float32,shape=[None,784])

def weigth_variable(shape):
    initial=tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial=tf.constant(0.1,shape=shape)
    return tf.Variable(initial)

def conv2d(x,w):
    return tf.nn.conv2d(x,w,strides=[1,1,1,1],padding="SAME")

def max_pool_2x2(x):
    return tf.nn.max_pool(x,strides=[1,2,2,1],ksize=[1,2,2,1],padding="SAME")

x1=tf.reshape(x,[-1,28,28,1])
w1=weigth_variable([5,5,1,32])
b1=bias_variable([32])
c1=conv2d(x1,w1)+b1
r1=tf.nn.relu(c1)
p1=max_pool_2x2(r1)

w2=weigth_variable([5,5,32,64])
b2=bias_variable([64])
c2=conv2d(p1,w2)+b2
r2=tf.nn.relu(c2)
p2=max_pool_2x2(r2)

x2=tf.reshape(p2,[-1,7*7*64])
w3=weigth_variable([7*7*64,1024])
b3=bias_variable([1024])
c3=tf.matmul(x2,w3)+b3
r3=tf.nn.dropout(tf.nn.relu(c3),1.0)

w4=weigth_variable([1024,10])
b4=bias_variable([10])
c4=tf.matmul(r3,w4)+b4
maxarrg=tf.argmax(c4,1)

saver=tf.train.Saver()
sess=tf.Session()
sess.run(tf.global_variables_initializer())
saver.restore(sess,"E:/python/nummodel/mymodel.ckpt")

myimg=getTestPicArray("E:/python/img/test10.png ")
print(sess.run(maxarrg,feed_dict={x:myimg}))

卷积神经网络训练mnist数据集实现手写数字识别

猜你喜欢