残差网络 ResNet 的 tensorflow 简单实现
前言
和前面几篇一样,这个也是没有使用到 slim 的朴素 tensorflow 实现,重复造轮子,大概是因为懒。
图片及代码参考来源于此。
首先是 ResNet 的整体架构,由普通的 CNN 网络 + 一些残差路径而已。
上图中,把残差块分为了 CONV BLOCK 和 ID BLOCK 区别如下。
ID BLOCK 的残余项就是 X,直接短路即可,如下图。
而CONV BLOCK 要对 X 进行一个卷积操作,再连接残余项,如下图。
准确率不太高,但也懒得调了。算了吧。
那接下来就可以直接上代码了。
ID_block 定义代码
def ID_block(X, channels_in, kernel_channels, is_training, name = 'ID_block'):
conv1 = conv_layer(X, 1, 1, channels_in, kernel_channels, is_training, name + '/conv1')
conv2 = conv_layer(conv1, 3, 1, kernel_channels, kernel_channels, is_training, name + '/conv2')
conv3 = conv_layer(conv2, 1, 1, kernel_channels, channels_in, is_training, name + '/conv3', False)
add = tf.add(conv3, X)
result = tf.nn.relu(add)
return result
CONV_block 定义代码
def CONV_block(X, channels_in, channels_out, is_training, name = 'CONV_block'):
conv1 = conv_layer(X, 1, 1, channels_in, channels_out, is_training, name + '/conv1')
conv2 = conv_layer(conv1, 3, 1, channels_out, channels_out, is_training, name + '/conv2')
conv3 = conv_layer(conv2, 1, 1, channels_out, channels_out, is_training, name + '/conv3', False)
short_cut = conv_layer(X, 3, 1, channels_in, channels_out, is_training, name + '/short_cut', False)
add = tf.add(conv3, short_cut)
result = tf.nn.relu(add)
return result
完整代码
由于我的数据相对简单,所以就跑了一下浅层的 ResNet,可以对应自己的数据,增加几层。
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import numpy as np
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.system("rm -r logs")
import tensorflow as tf
get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
from PIL import Image
# import multiprocessing
from multiprocessing import Process
import threading
import time
# In[2]:
TrainPath = '/home/winsoul/disk/MyML/data/tfrecord/train.tfrecords'
ValPath = '/home/winsoul/disk/MyML/data/tfrecord/val.tfrecords'
# In[3]:
def read_tfrecord(TFRecordPath):
with tf.Session() as sess:
feature = {
'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64)
}
# filename_queue = tf.train.string_input_producer([TFRecordPath], num_epochs = 1)
filename_queue = tf.train.string_input_producer([TFRecordPath])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example, features = feature)
image = tf.decode_raw(features['image'], tf.float32)
image = tf.reshape(image, [299, 299, 3])
label = tf.cast(features['label'], tf.int32)
return image, label
# In[4]:
def conv_layer(X, k, s, channels_in, channels_out, is_training, name = 'CONV', should_relu = True):
with tf.name_scope(name):
W = tf.Variable(tf.truncated_normal([k, k, channels_in, channels_out], stddev = 0.01));
b = tf.Variable(tf.constant(0.01, shape = [channels_out]))
conv = tf.nn.conv2d(X, W, strides = [1, s, s, 1], padding = 'SAME')
conv_b = tf.nn.bias_add(conv, b)
# bn = tf.layers.batch_normalization(conv_b, training = is_training)
if should_relu == True:
result = tf.nn.relu(conv_b)
else:
result = conv_b
tf.summary.histogram('weights', W)
tf.summary.histogram('biases', b)
tf.summary.histogram('activations', result)
return result
def pool_layer(X, k, s, strr = 'SAME', pool_type = 'MAX'):
if pool_type == 'MAX':
result = tf.nn.max_pool(X,
ksize = [1, k, k, 1],
strides = [1, s, s, 1],
padding = strr)
else:
result = tf.nn.avg_pool(X,
ksize = [1, k, k, 1],
strides = [1, s, s, 1],
padding = strr)
return result
def fc_layer(X, neurons_in, neurons_out, last = False, name = 'FC'):
with tf.name_scope(name):
W = tf.Variable(tf.truncated_normal([neurons_in, neurons_out], stddev = 0.01))
b = tf.Variable(tf.constant(0.01, shape = [neurons_out]))
tf.summary.histogram('weights', W)
tf.summary.histogram('biases', b)
if last == False:
result = tf.nn.relu(tf.matmul(X, W) + b)
else:
result = tf.matmul(X, W) + b
tf.summary.histogram('activations', result)
return result
# In[5]:
def ID_block(X, channels_in, kernel_channels, is_training, name = 'ID_block'):
conv1 = conv_layer(X, 1, 1, channels_in, kernel_channels, is_training, name + '/conv1')
conv2 = conv_layer(conv1, 3, 1, kernel_channels, kernel_channels, is_training, name + '/conv2')
conv3 = conv_layer(conv2, 1, 1, kernel_channels, channels_in, is_training, name + '/conv3', False)
add = tf.add(conv3, X)
result = tf.nn.relu(add)
return result
# In[6]:
def CONV_block(X, channels_in, channels_out, is_training, name = 'CONV_block'):
conv1 = conv_layer(X, 1, 1, channels_in, channels_out, is_training, name + '/conv1')
conv2 = conv_layer(conv1, 3, 1, channels_out, channels_out, is_training, name + '/conv2')
conv3 = conv_layer(conv2, 1, 1, channels_out, channels_out, is_training, name + '/conv3', False)
short_cut = conv_layer(X, 3, 1, channels_in, channels_out, is_training, name + '/short_cut', False)
add = tf.add(conv3, short_cut)
result = tf.nn.relu(add)
return result
# In[7]:
def Network(BatchSize, learning_rate):
tf.reset_default_graph()
with tf.Session() as sess:
is_training = tf.placeholder(dtype = tf.bool, shape=())
keep_prob = tf.placeholder('float32', name = 'keep_prob')
judge = tf.Print(is_training, ['is_training:', is_training])
image_train, label_train = read_tfrecord(TrainPath)
image_val, label_val = read_tfrecord(ValPath)
image_train_Batch, label_train_Batch = tf.train.shuffle_batch([image_train, label_train],
batch_size = BatchSize,
capacity = BatchSize*3 + 200,
min_after_dequeue = BatchSize)
image_val_Batch, label_val_Batch = tf.train.shuffle_batch([image_val, label_val],
batch_size = BatchSize,
capacity = BatchSize*3 + 200,
min_after_dequeue = BatchSize)
image_Batch = tf.cond(is_training, lambda: image_train_Batch, lambda: image_val_Batch)
label_Batch = tf.cond(is_training, lambda: label_train_Batch, lambda: label_val_Batch)
label_Batch = tf.one_hot(label_Batch, depth = 5)
X = tf.identity(image_Batch)
y = tf.identity(label_Batch)
with tf.name_scope('input_reshape'):
tf.summary.image('input', X, 32)
conv1 = conv_layer(X, 7, 2, 3, 12, is_training, "conv1")
max_pool1 = pool_layer(conv1, 3, 2)
res1 = CONV_block(max_pool1, 12, 24, is_training, 'CONV_block_1')
# res2 = ID_block(res1, 24, 32, is_training, 'ID_block_1')
# res3 = CONV_block(res2, 24, 36, is_training, 'CONV_block_2')
# res4 = ID_block(res3, 36, 48, is_training, 'ID_block_2')
# res4 = CONV_block(res3, 36, 48, is_training, 'CONV_block_3')
# res5 = ID_block(res4, 48, 56, is_training, 'ID_block_3')
mean_pool1 = pool_layer(res1, 3, 2, pool_type = 'MEAN')
print(mean_pool1.shape)
drop1 = tf.nn.dropout(mean_pool1, keep_prob)
fc1 = fc_layer(tf.reshape(drop1, [-1, 38 * 38 * 24]), 38 * 38 * 24, 5182, name = 'fc1')
drop2 = tf.nn.dropout(fc1, keep_prob)
fc2 = fc_layer(drop2, 5182, 512, name = 'fc2')
drop3 = tf.nn.dropout(fc2, keep_prob)
y_result = fc_layer(drop3, 512, 5, name = 'y_result')
with tf.name_scope('summaries'):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = y_result, labels = y))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
#train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy)
corrent_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_result, 1))
accuracy = tf.reduce_mean(tf.cast(corrent_prediction, 'float', name = 'accuracy'))
tf.summary.scalar("loss", cross_entropy)
tf.summary.scalar("accuracy", accuracy)
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord = coord)
merge_summary = tf.summary.merge_all()
summary__train_writer = tf.summary.FileWriter("./logs/train" + '_rate:' + str(learning_rate), sess.graph)
summary_val_writer = tf.summary.FileWriter("./logs/test" + '_rate:' + str(learning_rate))
try:
batch_index = 0
while not coord.should_stop():
sess.run([train_step], feed_dict = {keep_prob: 0.5, is_training: True})
if batch_index % 10 == 0:
summary_train, _, acc_train, loss_train = sess.run([merge_summary, train_step, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, is_training: True})
summary__train_writer.add_summary(summary_train, batch_index)
print(str(batch_index) + ' train:' + ' ' + str(acc_train) + ' ' + str(loss_train))
summary_val, acc_val, loss_val = sess.run([merge_summary, accuracy, cross_entropy], feed_dict = {keep_prob: 1.0, is_training: False})
summary_val_writer.add_summary(summary_val, batch_index)
print(str(batch_index) + ' val: ' + ' ' + str(acc_val) + ' ' + str(loss_val))
batch_index += 1;
# if batch_index > 100:
# break
except tf.errors.OutOfRangeError:
print("OutofRangeError!")
finally:
print("Finish")
coord.request_stop()
coord.join(threads)
sess.close()
# In[8]:
def main():
rate = 0.00001
while True:
print(rate)
try:
Network(16, rate)
except KeyboardInterrupt:
pass
finally:
rate /= 3
# for rate in (0.00007, 0.00003):
# try:
# print("-----------------------------------------------")
# print(str(rate) + ':')
# Network(64, rate)
# except KeyboardInterrupt:
# pass
# In[ ]:
if __name__ == '__main__':
main()