使用的数据集是kaggle猫狗大战比赛的数据集
网络、训练和测试的代码
数据集的放置方式为:以‘cat’为文件夹名称,放置猫的图片,以‘dog’为文件夹名称,放置狗的图片
需要导入的data_align模块也放在这里的
import tensorflow as tf
import data_align
import time
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
with tf.device('/cpu:0'):
#参数值设置
learning_rate=1e-4
training_iters=200
batch_size=50
display_step=5
n_classes=2
n_fc1=4096
n_fc2=2048
#构建模型
x= tf.placeholder(tf.float32,[None,227,227,3])
y=tf.placeholder(tf.float32,[None,n_classes])
W_conv={
'conv1':tf.Variable(tf.truncated_normal([11,11,3,96],
stddev=0.0001)),
'conv2':tf.Variable(tf.truncated_normal([5,5,96,256],
stddev=0.01)),
'conv3':tf.Variable(tf.truncated_normal([3,3,256,384],
stddev=0.01)),
'conv4':tf.Variable(tf.truncated_normal([3,3,384,384],
stddev=0.01)),
'conv5':tf.Variable(tf.truncated_normal([3,3,384,256],
stddev=0.01)),
'fc1':tf.Variable(tf.truncated_normal([6*6*256,n_fc1],
stddev=0.1)),
'fc2':tf.Variable(tf.truncated_normal([n_fc1,n_fc2],stddev=0.1)),
'fc3':tf.Variable(tf.truncated_normal([n_fc2,n_classes],stddev=0.1))
}
b_conv={ #必须初始化 否则可能导致不收敛
'conv1':tf.Variable(tf.constant(0.0,dtype=tf.float32,shape=[96])),
'conv2':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[256])),
'conv3':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[384])),
'conv4':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[384])),
'conv5':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[256])),
'fc1':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[n_fc1])),
'fc2':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[n_fc2])),
'fc3':tf.Variable(tf.constant(0.0,dtype=tf.float32,shape=[n_classes]))
}
#第1层卷积层
conv1=tf.nn.conv2d(x,W_conv['conv1'],strides=[1,4,4,1],padding='VALID')
conv1=tf.nn.bias_add(conv1,b_conv['conv1'])
conv1=tf.nn.relu(conv1)
#第1层池化层
pool1=tf.nn.avg_pool(conv1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
#LRN层
norml=tf.nn.lrn(pool1,5,bias=1.0,alpha=0.001/9.0,beta=0.75)
#第2层卷积层
conv2=tf.nn.conv2d(norml,W_conv['conv2'],strides=[1,1,1,1],padding='SAME')
conv2=tf.nn.bias_add(conv2,b_conv['conv2'])
conv2=tf.nn.relu(conv2)
#第2层池化层
pool2=tf.nn.avg_pool(conv2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
#LRN层
norm2=tf.nn.lrn(pool2,5,bias=1.0,alpha=0.001/9.0,beta=0.75)
#第3层卷积层
conv3=tf.nn.conv2d(norm2,W_conv['conv3'],strides=[1,1,1,1],padding='SAME')
conv3=tf.nn.bias_add(conv3,b_conv['conv3'])
conv3=tf.nn.relu(conv3)
#第4层卷积层
conv4=tf.nn.conv2d(conv3,W_conv['conv4'],strides=[1,1,1,1],padding='SAME')
conv4=tf.nn.bias_add(conv4,b_conv['conv4'])
conv4=tf.nn.relu(conv4)
#第5层卷积层
conv5=tf.nn.conv2d(conv4,W_conv['conv5'],strides=[1,1,1,1],padding='SAME')
conv5=tf.nn.bias_add(conv5,b_conv['conv5'])
conv5=tf.nn.relu(conv5)
#第5层池化层
pool5=tf.nn.avg_pool(conv5,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
#第6层全连接层
# print(pool5.shape)
reshape=tf.reshape(pool5,[-1,6*6*256])
fc1=tf.add(tf.matmul(reshape,W_conv['fc1']),b_conv['fc1'])
fc1 = tf.nn.relu(fc1)
fc1 = tf.nn.dropout(fc1, 0.5)
#第7层全连接层
fc2=tf.add(tf.matmul(fc1,W_conv['fc2']),b_conv['fc2'])
fc2=tf.nn.relu(fc2)
fc2=tf.nn.dropout(fc2,0.5)
#第8层全连接层
fc3=tf.add(tf.matmul(fc2,W_conv['fc3']),b_conv['fc3'])
#定义损失
'''
对fc3进行 exp/+exp归一化、求log值,求相反数,最终得到正实数,(最初的时候)对y进行one_hot编码,然后对位相乘,reduce_mean求得是平均数
'''
# labels=tf.argmax(y,axis=1)
# loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=fc3))
# loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=fc3, logits=y))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=fc3))
optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
#评估模型
correct_pred=tf.equal(tf.argmax(fc3,1),tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float32))
init=tf.global_variables_initializer()
save_model = r".//model\AlexNet.ckpt"
def train(opech):
with tf.Session() as sess:
sess.run(init)
train_writer=tf.summary.FileWriter(r'.//log',sess.graph) # 输出日志的地方
saver = tf.train.Saver()
c=[]
start_time=time.time()
coord=tf.train.Coordinator() #实例化队列协调器
threads=tf.train.start_queue_runners(coord=coord)
step=0
for i in range(opech):
step=i
image,label=sess.run([image_batch, label_batch])
# image,label=data_align.read_and_decode(tfrecords_file,batch_size)#
labels=data_align.onehot(label) #对标签进行one_hot
sess.run(optimizer,feed_dict={x:image,y:labels})
loss_record=sess.run(loss,feed_dict={x:image,y:labels})
print('now the loss is %f'%loss_record)
c.append(loss_record)
end_time=time.time()
print('time: ',end_time-start_time)
start_time=end_time
print('------------------%d onpech is finished------------------'%i)
print('Optimization Finished!')
saver.save(sess,save_model)
print('Model Save Finished!')
coord.request_stop()
coord.join(threads)
plt.plot(c)
plt.xlabel('iter')
plt.ylabel('loss')
plt.title('lr=%f, ti=%d, bs=%d' % (learning_rate, training_iters, batch_size))
plt.tight_layout()
plt.savefig(r'cnn-tf-AlexNet.png',dpi=200)
plt.show()
def per_class(imagefile):
image = Image.open(imagefile)
image = image.resize([227, 227])
image_array = np.array(image)
image = tf.cast(image_array,tf.float32)
image = tf.image.per_image_standardization(image)
image = tf.reshape(image, [1, 227, 227, 3])
saver = tf.train.Saver()
with tf.Session() as sess:
save_model = tf.train.latest_checkpoint('.//model') #
saver.restore(sess, save_model)
image = tf.reshape(image, [1, 227, 227, 3])
image = sess.run(image)
prediction = sess.run(fc3, feed_dict={x: image})
max_index = np.argmax(prediction)
if max_index==0:
return "cat"
else:
return "dog"
if __name__=='__main__':
model='train'
if model=='train':
get_images=r'.//data//catsdogs'
X_train, y_train = data_align.get_file(get_images)
image_batch, label_batch = data_align.get_batch(X_train, y_train, 227, 227, 50, 900)
train(90)
elif model=='test':
imagefile = r'.//9.jpg'
r = per_class(imagefile)
print(r)
需要用到的函数文件data_align.py
import cv2
import os
import numpy as np
import tensorflow as tf
from skimage import io
def rebuild(dir):
for root, dirs, files in os.walk(dir):
print(root,dirs,files)
for file in files:
filepath=os.path.join(root,file)
try:
image=cv2.imread(filepath)
dim=(227,227)
resized=cv2.resize(image,dim)
path=r'E:\TensorFlow\AlexNet_raw\kaggledogscats\\'+file
cv2.imwrite(path,resized)
except:
print(filepath)
os.remove(filepath)
cv2.waitKey(0)
def get_file(file_dir):
images=[]
temp=[]
for root,sub_folders,files in os.walk(file_dir):
# print(root,sub_folders,files)
#image directories
for name in files:
images.append(os.path.join(root,name))
#get 10 sub-folder names
for name in sub_folders:
temp.append(os.path.join(root,name))
# print(files)
#assign 10 labels based on the folder names
labels=[]
for one_folder in temp:
n_img=len(os.listdir(one_folder))
letter=one_folder.split('\\')[-1]
if letter=='cat':
labels=np.append(labels,n_img*[0])
else:
labels=np.append(labels,n_img*[1])
#shuffle
temp=np.array([images,labels])
# print(temp)
temp=temp.transpose()
np.random.shuffle(temp)
print(temp.shape)
image_list=list(temp[:,0])
label_list=list(temp[:,1])
label_list=[int(float(i)) for i in label_list]
return image_list,label_list
def int64_feature(value): #[]输入为list
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) #方括号表示输入为列表 转化为二进制形式
def convert_to_tfrecord(images_list,labels_list,save_dir,name):
filename=os.path.join(save_dir,name+'.tfrecords')
n_samples=len(labels_list)
writer=tf.python_io.TFRecordWriter(filename) #实例化并传入保存文件路径 写入到文件中
print('\nTransform start......')
for i in np.arange(0,n_samples):
try:
image=io.imread(images_list[i])
image_raw=image.tostring()
label=int(labels_list[i])
example=tf.train.Example(features=tf.train.Features(feature={ #协议内存块
'label':int64_feature(label),
'image_raw':bytes_feature(image_raw),
}))
writer.write(example.SerializeToString())
except IOError as e:
print('Could not read:',images_list[i])
writer.close()
print('Transform done!')
def read_and_decode(tfrecords_file,batch_size):
# 返回输出队列,QueueRunner加入到当前图中的QUEUE_RUNNER收集器
filename_queue=tf.train.string_input_producer([tfrecords_file])
reader=tf.TFRecordReader() #实例化读取器
_,serialized_example=reader.read(filename_queue) #返回队列当中的下一个键值对tensor
# 输入标量字符串张量,输出字典映射向量tensor和稀疏向量值
img_features=tf.parse_single_example(serialized_example,
features={
'label':tf.FixedLenFeature([],
tf.int64),
'image_raw':tf.FixedLenFeature([],
tf.string),
})
image=tf.decode_raw(img_features['image_raw'],tf.uint8) #解析字符向量tensor为实数,需要有相同长度
image=tf.reshape(image,[227,227,3])
label=tf.cast(img_features['label'],tf.int32)
#从TFRecords中读取数据,保证内容和标签同步,
'''
Args:
tensors: 入队列表向量或字典向量The list or dictionary of tensors to enqueue.
batch_size: 每次入队出队的数量The new batch size pulled from the queue.
capacity: 队列中最大的元素数量An integer. The maximum number of elements in the queue.
min_after_dequeue: 在一次出队以后对列中最小元素数量Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements.
num_threads: 向量列表入队的线程数The number of threads enqueuing tensor_list.
seed: 队列中shuffle的种子Seed for the random shuffling within the queue.
enqueue_many: 向量列表中的每个向量是否是单个实例Whether each tensor in tensor_list is a single example.
shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for tensor_list.
allow_smaller_final_batch: (Optional) Boolean. If True, allow the final batch to be smaller if there are insufficient items left in the queue.
shared_name: (Optional) If set, this queue will be shared under the given name across multiple sessions.
name: (Optional) A name for the operations.
'''
image_batch,label_batch=tf.train.shuffle_batch([image,label],
batch_size=batch_size,
min_after_dequeue=100,
num_threads=64,
capacity=200)
return image_batch,tf.reshape(label_batch,[batch_size])
def onehot(labels):
n_sample=len(labels)
n_class=max(labels)+1
onehot_labels=np.zeros((n_sample,n_class))
onehot_labels[np.arange(n_sample),labels]=1
return onehot_labels
def get_batch(image_list,label_list,img_width,img_height,batch_size,capacity):
image=tf.cast(image_list,tf.string)
label=tf.cast(label_list,tf.int32)
input_queue=tf.train.slice_input_producer([image,label])
label=input_queue[1]
image_contents=tf.read_file(input_queue[0])
image=tf.image.decode_jpeg(image_contents,channels=3)
image=tf.image.resize_image_with_crop_or_pad(image,img_width,img_height)
image=tf.image.per_image_standardization(image)
image_batch,label_batch=tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
label_batch=tf.reshape(label_batch,[batch_size])
return image_batch,label_batch