先附上数据集的链接:https://github.com/caicloud/tensorflow-tutorial/tree/master/datasets/flowers
然后附上分类模型的代码:https://github.com/jcfszxc/tempfile
先导入函数库:
from skimage import io,transform
import glob
import os
import tensorflow as tf
import numpy as np
import time
import cv2
数据集地址 && 模型保存地址:
path = "/home/z/flower_photos/"
model_path = "/home/z/flower_photos/model.ckpt"
将所有的图片resize成100*100的大小:
w, h, c = 100, 100, 3
读取图片:
def read_img(path):
cate = [path+x for x in os.listdir( path ) if os.path.isdir( path+x )]
imgs = []
labels = []
for idx,folder in enumerate( cate ):
for im in glob.glob( folder+'/*.jpg' ):
print( 'reading the images:%s'%( im ) )
img = io.imread( im )
# 当然也可以用transform.resize(), 不过经过测试,发现cv2.resize()的效率会更高
img = cv2.resize( img, ( w, h ) )
# 如果图片不是全部为三通道,要加上下面这行代码,将单通道图片转换成3通道图片
# img = cv2.cvtColor( img, cv2.COLOR_BGR2HSV )
imgs.append( img )
labels.append( idx )
return np.asarray( imgs, np.float32 ), np.asarray( labels, np.int32 )
data,label = read_img(path)
打乱顺序:
def disorder(data, label):
np.random.seed( int( time.time() ) )
num_example = data.shape[0]
arr = np.arange( num_example )
np.random.shuffle( arr )
data = data[arr]
label = label[arr]
return( data, label )
data, label = disorder( data, label )
将所有数据按比例分成训练集和测试集:
ratio = 0.382
num_example = data.shape[0]
s = np.int( num_example * ratio )
x_train = data[:s]
y_train = label[:s]
x_val = data[s:]
y_val = label[s:]
下面这一大块就是神经网络的构建:
各参数名称:
layer ==> 层数
conv ==> Convolution ==> 卷积函数
pool ==> Pooling ==> 池化函数
relu ==> Relu激活函数
fc ==> Fully Connected Layer ==> 全连接层
x = tf.placeholder(tf.float32,shape=[None,w,h,c],name='x')
y_ = tf.placeholder(tf.int32,shape=[None,],name='y_')
def inference(input_tensor, train, regularizer):
with tf.variable_scope('layer1-conv1'):
conv1_weights = tf.get_variable("weight",[5,5,3,32],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv1_biases = tf.get_variable("bias", [32], initializer=tf.constant_initializer(0.0))
conv1 = tf.nn.conv2d(input_tensor, conv1_weights, strides=[1, 1, 1, 1], padding='SAME')
relu1 = tf.nn.relu(tf.nn.bias_add(conv1, conv1_biases))
with tf.name_scope("layer2-pool1"):
pool1 = tf.nn.max_pool(relu1, ksize = [1,2,2,1],strides=[1,2,2,1],padding="VALID")
with tf.variable_scope("layer3-conv2"):
conv2_weights = tf.get_variable("weight",[5,5,32,64],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv2_biases = tf.get_variable("bias", [64], initializer=tf.constant_initializer(0.0))
conv2 = tf.nn.conv2d(pool1, conv2_weights, strides=[1, 1, 1, 1], padding='SAME')
relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases))
with tf.name_scope("layer4-pool2"):
pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
with tf.variable_scope("layer5-conv3"):
conv3_weights = tf.get_variable("weight",[3,3,64,128],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv3_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
conv3 = tf.nn.conv2d(pool2, conv3_weights, strides=[1, 1, 1, 1], padding='SAME')
relu3 = tf.nn.relu(tf.nn.bias_add(conv3, conv3_biases))
with tf.name_scope("layer6-pool3"):
pool3 = tf.nn.max_pool(relu3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
with tf.variable_scope("layer7-conv4"):
conv4_weights = tf.get_variable("weight",[3,3,128,128],initializer=tf.truncated_normal_initializer(stddev=0.1))
conv4_biases = tf.get_variable("bias", [128], initializer=tf.constant_initializer(0.0))
conv4 = tf.nn.conv2d(pool3, conv4_weights, strides=[1, 1, 1, 1], padding='SAME')
relu4 = tf.nn.relu(tf.nn.bias_add(conv4, conv4_biases))
with tf.name_scope("layer8-pool4"):
pool4 = tf.nn.max_pool(relu4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
nodes = 6*6*128
reshaped = tf.reshape(pool4,[-1,nodes])
with tf.variable_scope('layer9-fc1'):
fc1_weights = tf.get_variable("weight", [nodes, 1024],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc1_weights))
fc1_biases = tf.get_variable("bias", [1024], initializer=tf.constant_initializer(0.1))
fc1 = tf.nn.relu(tf.matmul(reshaped, fc1_weights) + fc1_biases)
if train: fc1 = tf.nn.dropout(fc1, 0.5)
with tf.variable_scope('layer10-fc2'):
fc2_weights = tf.get_variable("weight", [1024, 512],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc2_weights))
fc2_biases = tf.get_variable("bias", [512], initializer=tf.constant_initializer(0.1))
fc2 = tf.nn.relu(tf.matmul(fc1, fc2_weights) + fc2_biases)
if train: fc2 = tf.nn.dropout(fc2, 0.5)
# 因为这里的数据集中,花卉被分为5类,所以在下面的fc3_weights和fc3_biases中的参数被设置成了5,
# 如果要用训练的数据集,最终分成多少类别,就把参数改成那个数字
with tf.variable_scope('layer11-fc3'):
fc3_weights = tf.get_variable("weight", [512, 5],
initializer=tf.truncated_normal_initializer(stddev=0.1))
if regularizer != None: tf.add_to_collection('losses', regularizer(fc3_weights))
fc3_biases = tf.get_variable("bias", [5], initializer=tf.constant_initializer(0.1))
logit = tf.matmul(fc2, fc3_weights) + fc3_biases
return logit
Regularizer 正则化,正则化可以避免神经网络过度拟合
regularizer = tf.contrib.layers.l2_regularizer(0.000001)
logits = inference(x,False,regularizer)
(小处理)将logits乘以1赋值给logits_eval,定义name,方便在后续调用模型时通过tensor名字调用输出tensor
b = tf.constant(value=1,dtype=tf.float32)
logits_eval = tf.multiply(logits,b,name='logits_eval')
loss=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_)
Learning_rate 学习率,神经网络每次梯度下降的步长大小,学习率越大,所需时间越短,学习率越小,训练精度越高。
train_op=tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
correct_prediction = tf.equal(tf.cast(tf.argmax(logits,1),tf.int32), y_)
acc= tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
学习率和正则化的值不是越大越好也不是越小越好,选一个恰当的值可以极大地提高训练精度和训练速度
比如可以从0.1开始调试。
定义一个函数,按批次取数据
def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
if shuffle:
excerpt = indices[start_idx:start_idx + batch_size]
else:
excerpt = slice(start_idx, start_idx + batch_size)
yield inputs[excerpt], targets[excerpt]
注释:batch_size是批尺寸,在合理范围内增大Batch_Size可以提高内存利用率,提高大矩阵乘法的并行效率。
跑完一次 epoch(全数据集)所需的迭代次数减少,对于相同数据量的处理速度进一步加快。
在一定范围内,一般来说 Batch_Size 越大,其确定的下降方向越准,引起训练震荡越小。
但是盲目增大Batch_Size可能会让内存容量撑不住。
跑完一次 epoch(全数据集)所需的迭代次数减少,要想达到相同的精度,其所花费的时间大大增加了,从而对参数的修正也就显得更加缓慢。
对于新手来说,一般建议设置为64。
# 训练和测试数据
# n_epoch是训练的迭代次数,可以设置得更大一些
n_epoch = 100
batch_size = 64
saver = tf.train.Saver()
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for epoch in range(n_epoch):
start_time = time.time()
#training
train_loss, train_acc, n_batch = 0, 0, 0
for x_train_a, y_train_a in minibatches(x_train, y_train, batch_size, shuffle=True):
_,err,ac = sess.run([train_op,loss,acc], feed_dict={x: x_train_a, y_: y_train_a})
train_loss += err; train_acc += ac; n_batch += 1
print(" train loss: %f" % (np.sum(train_loss)/ n_batch))
print(" train acc: %f" % (np.sum(train_acc)/ n_batch))
#validation
val_loss, val_acc, n_batch = 0, 0, 0
for x_val_a, y_val_a in minibatches(x_val, y_val, batch_size, shuffle=False):
err, ac = sess.run([loss,acc], feed_dict={x: x_val_a, y_: y_val_a})
val_loss += err; val_acc += ac; n_batch += 1
print(" validation loss: %f" % (np.sum(val_loss)/ n_batch))
print(" validation acc: %f" % (np.sum(val_acc)/ n_batch))
saver.save(sess,model_path)
sess.close()
就这样,一个完整的分类模型就写好啦。
完整的代码来这里下载哦。
这个分类模型在将三千张的花卉分成五类的时候,表现的很好,但是当我把数据集换成60种类别的三万张图片时,表现的就不是那么好了。这说明这个模型可能并不适合太多类别的分类,如果需要的话,就需要对这个神经网络进行针对性修改了。