第一个项目
第一个项目是使用简单的多层神经网络(不涉及CNN)对手写字进行的识别,准确率达到98%以上。这个项目主要是初步认识如何进行参数调优的过程。首先贴出实现的程序,然后再来谈谈调优过程遇到的问题。
程序
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import sys
from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
FLAGS=None
data_dir='/tmp/tensorflow/mnist/input_data'
mnist= input_data.read_data_sets(data_dir,one_hot=True)
hd1input=784
hd1output=500
hd2output=300
hd3output=10
#输入
x=tf.placeholder(tf.float32,[None,hd1input])
#实际标签
y_=tf.placeholder(tf.float32,[None,10])
#定义权重w和偏差项b
def init_weight(shape,st_dev):
'''
random_normal用于生成正太分布随机数,
均值mean,标准差stddev,shape为矩阵大小
'''
weight=tf.Variable(tf.random_normal(shape,stddev=st_dev))
return(weight)
def init_bias(shape,st_dev):
bias=tf.Variable(tf.random_normal(shape,stddev=st_dev))
return(bias)
#定义全连接层
def full_connect(input_layer,weight,bias):
layer=tf.add(tf.matmul(input_layer,weight),bias)
return (tf.nn.relu(layer))
#dropout
keep_prob = tf.placeholder(tf.float32)
#第一层
w1=init_weight([hd1input,hd1output],0.1)
b1=init_bias([hd1output],0.1)
hidden_1=full_connect(x,w1,b1)
L1_drop=tf.nn.dropout(hidden_1,keep_prob)
#第二层
w2=init_weight([hd1output,hd2output],0.1)
b2=init_bias([hd2output],0.1)
hidden_2=full_connect(L1_drop,w2,b2)
L2_drop=tf.nn.dropout(hidden_2,keep_prob)
#输出层
w3=init_weight([hd2output,hd3output],0.1)
b3=init_weight([hd3output],0.1)
y=(tf.add(tf.matmul(L2_drop,w3),b3))
#计算交叉熵
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_,logits=y)
)
#学习率
learn_rate=tf.Variable(0.001)
train_step = tf.train.AdamOptimizer(learn_rate).minimize(cross_entropy)
#正确率
correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
#参数初始化
init_op=tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
#学习率改变
for epoch in range(50):
sess.run(tf.assign(learn_rate,0.001 * (0.95 ** epoch)))
#最多5000次迭代
for _ in range(5000):
batch_xs,batch_ys=mnist.train.next_batch(100)
#在会话中,占位符可以使用 feed_dict 馈送数据。
sess.run(train_step,feed_dict={x:batch_xs,y_:batch_ys,keep_prob:1.0})
lr=sess.run(learn_rate)
acc=sess.run(accuracy,feed_dict={x: mnist.test.images,y_: mnist.test.labels,keep_prob:1.0})
print(" Iter "+ str(epoch) + ",Test Accuracy= "+ str(acc) + ",Learning_Rate ="+str(lr)+" ")
输出
Iter 0,Test Accuracy= 0.9799,Learning_Rate =0.001
Iter 1,Test Accuracy= 0.9827,Learning_Rate =0.00095
Iter 2,Test Accuracy= 0.9782,Learning_Rate =0.0009025
Iter 3,Test Accuracy= 0.9818,Learning_Rate =0.000857375
Iter 4,Test Accuracy= 0.981,Learning_Rate =0.000814506
Iter 5,Test Accuracy= 0.9831,Learning_Rate =0.000773781
Iter 6,Test Accuracy= 0.9848,Learning_Rate =0.000735092
Iter 7,Test Accuracy= 0.9852,Learning_Rate =0.000698337
Iter 8,Test Accuracy= 0.9851,Learning_Rate =0.00066342
Iter 9,Test Accuracy= 0.9856,Learning_Rate =0.000630249
Iter 10,Test Accuracy= 0.9856,Learning_Rate =0.000598737
Iter 11,Test Accuracy= 0.9858,Learning_Rate =0.0005688
Iter 12,Test Accuracy= 0.9855,Learning_Rate =0.00054036
Iter 13,Test Accuracy= 0.984,Learning_Rate =0.000513342
Iter 14,Test Accuracy= 0.9844,Learning_Rate =0.000487675
Iter 15,Test Accuracy= 0.9849,Learning_Rate =0.000463291
Iter 16,Test Accuracy= 0.9849,Learning_Rate =0.000440127
Iter 17,Test Accuracy= 0.985,Learning_Rate =0.00041812
Iter 18,Test Accuracy= 0.9847,Learning_Rate =0.000397214
Iter 19,Test Accuracy= 0.9847,Learning_Rate =0.000377354
Iter 20,Test Accuracy= 0.9848,Learning_Rate =0.000358486
Iter 21,Test Accuracy= 0.985,Learning_Rate =0.000340562
Iter 22,Test Accuracy= 0.9852,Learning_Rate =0.000323534
Iter 23,Test Accuracy= 0.9854,Learning_Rate =0.000307357
Iter 24,Test Accuracy= 0.9852,Learning_Rate =0.000291989
Iter 25,Test Accuracy= 0.9852,Learning_Rate =0.00027739
Iter 26,Test Accuracy= 0.985,Learning_Rate =0.00026352
Iter 27,Test Accuracy= 0.9846,Learning_Rate =0.000250344
Iter 28,Test Accuracy= 0.9846,Learning_Rate =0.000237827
Iter 29,Test Accuracy= 0.9838,Learning_Rate =0.000225936
Iter 30,Test Accuracy= 0.9838,Learning_Rate =0.000214639
Iter 31,Test Accuracy= 0.9849,Learning_Rate =0.000203907
Iter 32,Test Accuracy= 0.9851,Learning_Rate =0.000193711
Iter 33,Test Accuracy= 0.985,Learning_Rate =0.000184026
Iter 34,Test Accuracy= 0.985,Learning_Rate =0.000174825
Iter 35,Test Accuracy= 0.9852,Learning_Rate =0.000166083
Iter 36,Test Accuracy= 0.9849,Learning_Rate =0.000157779
Iter 37,Test Accuracy= 0.9845,Learning_Rate =0.00014989
Iter 38,Test Accuracy= 0.9843,Learning_Rate =0.000142396
Iter 39,Test Accuracy= 0.9843,Learning_Rate =0.000135276
Iter 40,Test Accuracy= 0.9848,Learning_Rate =0.000128512
Iter 41,Test Accuracy= 0.9845,Learning_Rate =0.000122087
Iter 42,Test Accuracy= 0.9843,Learning_Rate =0.000115982
Iter 43,Test Accuracy= 0.9844,Learning_Rate =0.000110183
Iter 44,Test Accuracy= 0.9846,Learning_Rate =0.000104674
Iter 45,Test Accuracy= 0.9845,Learning_Rate =9.94403e-05
Iter 46,Test Accuracy= 0.9845,Learning_Rate =9.44682e-05
Iter 47,Test Accuracy= 0.9844,Learning_Rate =8.97448e-05
Iter 48,Test Accuracy= 0.9841,Learning_Rate =8.52576e-05
Iter 49,Test Accuracy= 0.9834,Learning_Rate =8.09947e-05
问题
优化器
一开始我使用的是tf.train.GradientDescentOptimizer这个优化器对w进行梯度下降计算,发现准确率最高也就达到93%多(未对w,b进行优化),而采用tf.train.AdamOptimizer这个优化器对w进行梯度下降计算,发现准确率最高达到97%多(未对w,b进行优化),感觉在这个项目中似乎AdamOptimizer会更好一点。
w,b的初始化
采用了AdamOptimizer,最高也就达到了97%,没有满足要求。因此考虑到变量参数初始值的问题。一开始的时候,我选择的是w,b均为均值为0,标准差为1的正态分布。考虑到有可能是初始值过大的问题,便改为均值为0,标准差为0.1的正态分布。最终结果如上面所述。
交叉熵的计算
交叉熵的计算我的是
y=(tf.add(tf.matmul(L2_drop,w3),b3))
cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y_,logits=y)
)
而在其他的一些博客上我看到的一般是
prediction = tf.nn.softmax(tf.matmul(L2_drop, W3) + b3)
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=prediction)
)
我尝试加入tf.nn.softmax函数,但是发现准确率变得贼差,完全不能看。而且,根据我自己的理解softmax_cross_entropy_with_logits这个函数中logits参数应该就是wx+b的结果,不需要进行其他处理的。不知道我理解的对不对。
最后
这是我第一个尝试的项目,可能存在一些错误,希望大家指教!