利用TensorFlow手写数字识别(MNIST)

MNIST数据集

MNIST数据集主要由一些手写的数字和相应的标签组成，图片一共有10类，分别对应0~9
包含以下四个文件

train-images-idx3-ubyte.gz 9M 训练图像数据
train-labels-idx3-ubyte.gz 0.03M 训练图像的标签
t10k-images-idx3-ubyte.gz 1.57M 测试图像数据
t10k-labels-idx3-ubyte.gz 4.4K 测试图像标签

图像标签：每一张图片实际写下的数字

下载MNIST

#coding:utf-8
form tensorFlow.examples/mnist import input_data

#从MNIST_data中获取MNIST数据，若不存在，则自动下载
mnist = input_data.read_data_sets("MNIST_data",one_hot=True)

执行完成后，项目目录下会自动加载MNIST_data文件

这里写图片描述

查看MNIST

#查看训练数据的大小
print(mnist.train.images.shape)
print(mnist.train.labels.shape)

#打印出第0张照片的向量表示
print(mnist.train.images[0,:])

#打印第0张照片的标签
print(mnist.train.labels[0,:])

将MNIST数据集保存为图片
每张图片由28*28的矩阵表示
这里写图片描述

#coding:utf-8
from tensorflow.examples.tutorials.mnist import input_data
import scipy.misc
import os

#读取数据集
mnist = input_data.read_data_sets("MNIST_data/",one_hot=True)
#把原始图片保存在MNSI_data/raw下，不存在会自动创建
save_dir = 'MNIST_data/raw'
if os.path.exists(save_dir) is False:
    os.makedirs(save_dir)

#保存前20张照片
for i in range(20):
    #mnist.train.images[i,:]表示第i张照片（序号从0开始）
    image_arry = mnist.train.images[i,:]
    #tf的MNIST图片是一个784维度的向量，重新把他还原为28*28维的图像
    image_arry = image_arry.reshape(28,28)
    #设置保存格式
    filename = save_dir + 'mnist_train_%d.jpg' % i
    #先将image_arry保存为图片
    #先用scipy.misc.toimage转换为图像，再调用save保存
    scipy.misc.toimage(image_arry,cmin=0.0,cmax=1.0).save(filename)

print('please check: %s'% save_dir)

这里写图片描述

图像标签的独热one-hot表示
用一个10维向量表示0~9,10维向量是原先类别号的one-hot表示
one-hot：用N维向量表示N个类别，每个类别独占一位，one-hot中只有一个1 其他为0

0 （1,0,0,0,0,0,0,0,0,0,0,0,0）
1 （0,1,0,0,0,0,0,0,0,0,0,0,0）
2 （0,0,1,0,0,0,0,0,0,0,0,0,0）

识别MNIST

Softmax回归
线性的多分类模型，从逻辑回归转换而来
区别：逻辑分两类 Softmax分多类

原理：预测结果就是就是概率最大的那个类

y=Softmax(W**T x+b)

占位符和变量
占位符和变量（variable）都是Tensor，Tensor不是具体的值，只是一些我们希望的”节点”
占位符不依赖于其他的Tensor，他的值由用户自行传递给TensorFlow，通常用来样本数据和标签。

x = tf.placeholder(tf.float32,[None,789])
;用来出巡训练图片数据的占位符，
形状为[None,789] None：一维大小是任意的
每张图片用一个784维的向量表示。

y_ = tf.placeholder(tf.float32,[None,10])
用来储存训练图的实际标签

变量

W=tf.Variable(tf.zeros([784,10]))
初始值为784*10的全零矩阵

b = tf.Variable(tf.zeros[10])
初始值为10维的0向量

在TF中实现

y = tf.nn.softmax(tf.matmul(x,W)+b)
#y是一依赖于x、w、b的Tensor.
先获取x、w、b的值，再计算y

y实际上定义了一个Softmax回归模型
假设x为（N,784）N代表输入的训练图像的数目。w为（784,10），b为（10，）。那么wx+b 形状为（N,10）也就是说y的每一行是一个10维的向量，表示概率

会话
会话是TF的又一核心概念，Tensor是”希望“，TF进行计算的节点，会话可以看成对这些节点进行计算的上下文。变量是在计算过程中可以改变的的Tensor。变量的值就保存在会话中。操作前需要初始化，tf.global_variables_initializer().run()#初始化所有的变量


# coding:utf-8
# 导入tensorflow。
# 这句import tensorflow as tf是导入TensorFlow约定俗成的做法，请大家记住。
import tensorflow as tf
# 导入MNIST教学的模块
from tensorflow.examples.tutorials.mnist import input_data
# 与之前一样，读入MNIST数据
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

# 创建x，x是一个占位符（placeholder），代表待识别的图片
x = tf.placeholder(tf.float32, [None, 784])

# W是Softmax模型的参数，将一个784维的输入转换为一个10维的输出
# 在TensorFlow中，变量的参数用tf.Variable表示
W = tf.Variable(tf.zeros([784, 10]))
# b是又一个Softmax模型的参数，我们一般叫做“偏置项”（bias）。
b = tf.Variable(tf.zeros([10]))

# y=softmax(Wx + b)，y表示模型的输出
y = tf.nn.softmax(tf.matmul(x, W) + b)

# y_是实际的图像标签，同样以占位符表示。
y_ = tf.placeholder(tf.float32, [None, 10])

# 至此，我们得到了两个重要的Tensor：y和y_。
# y是模型的输出，y_是实际的图像标签，不要忘了y_是独热表示的
# 下面我们就会根据y和y_构造损失

# 根据y, y_构造交叉熵损失
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y)))

# 有了损失，我们就可以用随机梯度下降针对模型的参数（W和b）进行优化
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
#TF默认会对所有的变量计算梯度，这里只定义了两个变量W和b，
#所以将采用梯度下降法对W、b计算梯度并跟新他们的值。
#0.01是梯度下降优化器使用的学习率(Learning Rate)

# 创建一个Session。只有在Session中才能运行优化步骤train_step。
sess = tf.InteractiveSession()
# 运行之前必须要初始化所有变量，分配内存。
tf.global_variables_initializer().run()
print('start training...')

# 进行1000步梯度下降
for _ in range(1000):
    # 在mnist.train中取100个训练数据,每次100个，共1000次
    # batch_xs是形状为(100, 784)的图像数据，batch_ys是形如(100, 10)的实际标签
    # batch_xs, batch_ys对应着两个占位符x和y_
    batch_xs, batch_ys = mnist.train.next_batch(100)
    # 在Session中运行train_step，运行时要传入占位符的值
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

# 正确的预测结果
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
#tf.argmax(y,1)传入最大下标
# 计算预测准确率，它们都是Tensor
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 在Session中运行Tensor可以得到Tensor的值
# 这里是获取最终模型的正确率
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))  # 0.9185

这里写图片描述

使用卷积神经网络提高准确率

开头依旧是导入TF

#conding：utf-8
import tensorflow as tf
from tensorflow.example.tutorials.mnist import input_data

载入MNIST数据，建立占位符，
x：训练图像
y_:对应训练图像的标签

#读入数据
mnist = input_data.read_data_sets("MNIST_data",one_hot = True)
x = tf.placeholder(tf.float32,[None,784])
y_ = tf.placeholder(tf.float32.[None,10])

由于采用神经网络对图像进行分类，所以不能使用784维的向量表示x，而应还原为28*28的图片。
[-1,28,28,1]中的-1：形状第一维的大小是根据x自动确定的

#还原
x_image = tf.reshape(x,[-1,28,28,1])

x_image 就是训练图像
第一层卷积
定义四个函数weight_variable返回一个给定形状的变量，自动截断正态分布初始化
bias_variabale返回一个给定形状的变量，初始化所有值为0.1
用以上两个函数创建卷积的核(kernal)与偏置(bias)

def weight_variable(shape):
    initial = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1,shape=shape)
    return tf.Variable(initial)

def conv2d(x,W):
    return tf.nn.conv2d(x,W,strides=[1,1,1,1],padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

第一层卷积

卷积
激活函数
(池化)

W_conv1 = weight_variable([5,5,1,32])
b_conv1 = bias_variable([32])
#h_conv1真正进行卷积运算
h_conv1 = tf.nn.relu(conv2d(x_image,W_conv1) + b_conv1)
#h_pool1激活
h_pool1 = max_pool_2x2(h_conv1)

第二层卷积

#对第一层卷积产生的结果h_poll在进行卷积
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1,W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

两层卷积之后是全连接层
使用Dropout防止全连接时过拟合
在训练时，以一定的概率去掉某些链接(只是在当前步骤中去除)每一步去除都是随机的、
例子中Dropout概率为0.5：每训练一个链接都有50%概率被去除

#全连接层，输出为1024维的向量
W_fc1 = weight_variable([7*7*64,1024])
b_fc1 = bias_variable([1024])
h_pool2_flat = tf.reshape(h_pool2,[-1,7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat,W_fc1) + b_fc1)
#使用Dropout防止全连接时过拟合，keep_prob是一个占位符，训练时是0.5，测试时是1
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1,keep_prob)

再加入一层全链接，吧h_fc1_drop转化为10个类别的打分

#吧1024维向量转化为10维，对应十个类
W_fc2 = weight_variable([1024,10])
b_fc2 = bias_variable([10])
y_conv = tf.matmul(h_fc1_drop,W_fc2) + b_fc2

y_conv相当于SoftMax模型中的Login，
TF提供了tf.nn.softmax_cross_entropy_with_logins函数，可以直接对Login定义交叉熵损失

#直接采用tf.nn.softmax_cross_entropy_with_logins函数
cross_entropy = tf.reduce_mean()
    tf.nn.softmax_cross_entropy_with_logins(lables=y_,logits=y_conv)
#同样定义train_step
train_step = tf.train.AdadeltaOptimizer(1e-4).minimize(cross_entropy)

定义准确率

correct_prediction = tf.equal(tf.argmax(y_conv,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

额外在验证机上计算模型准确率，方便监控训练进度，调整模型参数

#创建session
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
#训练20000
for i in range(20000)
    batch = mnist.train.next_batch(50)
    #每100步报告一次验证集上准确率
    if i % 100 == 0:
        train_accuracy = accuracy.eval(feed_dict={
            x:batch[0],y_:batch[1],keep_prob:1.0
        })
        print("step %d,traing accuracy %g" % (i,train_accuracy))
        train_step.run(feed_dict = {x:batch[0],y_:batch[1],keep_prob:0.5})

训练结束，打印测试机上的准确率

print("测试集准确率" % accuracy.eval(feed_dict={
        x:mnist.test.images,y_:mnist.test.labels,keep_prob:1.0
    }))

完整代码*

# coding: utf-8
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')


if __name__ == '__main__':
    # 读入数据
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    # x为训练图像的占位符、y_为训练图像标签的占位符
    x = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])

    # 将单张图片从784维向量重新还原为28x28的矩阵图片
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # 第一层卷积层
    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    # 第二层卷积层
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    # 全连接层，输出为1024维的向量
    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])
    h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
    # 使用Dropout，keep_prob是一个占位符，训练时为0.5，测试时为1
    keep_prob = tf.placeholder(tf.float32)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    # 把1024维的向量转换成10维，对应10个类别
    W_fc2 = weight_variable([1024, 10])
    b_fc2 = bias_variable([10])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    # 我们不采用先Softmax再计算交叉熵的方法，而是直接用tf.nn.softmax_cross_entropy_with_logits直接计算
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    # 同样定义train_step
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

    # 定义测试的准确率
    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 创建Session和变量初始化
    sess = tf.InteractiveSession()
    sess.run(tf.global_variables_initializer())

    # 训练20000步
    for i in range(20000):
        batch = mnist.train.next_batch(50)
        # 每100步报告一次在验证集上的准确度
        if i % 100 == 0:
            train_accuracy = accuracy.eval(feed_dict={
                x: batch[0], y_: batch[1], keep_prob: 1.0})
            print("step %d,  训练集准确率%g" % (i, train_accuracy))
        train_step.run(feed_dict={x: batch[0], y_: batch[1], keep_prob: 0.5})

    # 训练结束后报告在测试集上的准确度
    print("测试集准确率 %g" % accuracy.eval(feed_dict={
        x: mnist.test.images, y_: mnist.test.labels, keep_prob:1.0}))

经过漫长的训练，测试集准确率0.9924
这里写图片描述