在迁移学习中,fine tune是其中一类:将预训练模型参数导入目标模型中作为初始化参数,整体进行训练,新增的网络层参数采用随机初始化。 该方法适用于目标任务的数据集充足的情况下,此时过拟合程度低。
那么如何加载部分参数作为初始化参数?
我们采用模型复现的方式导入图,而非加载tf.train.import_meta_graph()
一般实验情况下保存的时候,都是用的saver类来保存,如下
saver = tf.train.Saver()
saver.save(sess,"model.ckpt")
全部参数加载时的代码
saver=tf.train.Saver()
saver.restore(sess,"model.ckpt")
根据变量的名字,选择加载部分变量
#得到该网络中,所有可以加载的参数
variables = tf.contrib.framework.get_variables_to_restore()
#删除output层中的参数
variables_to_resotre = [v for v in varialbes if v.name.split('/')[0]!='output']
#构建这部分参数的saver
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess,'model.ckpt')
下面我们以双流的四层卷积的回归任务为例,原始模型如下:
def net():
left_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="left_img")
right_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="right_img")
y = tf.placeholder(tf.float32, shape=[None, 1],name="label")
is_training = tf.placeholder(tf.bool,name="is_training")
with tf.name_scope('left_stream'):
# layer 1
with tf.name_scope('conv-layer-1'):
W_conv1_l = weight_variable([5, 5, 3, 16])
h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(left_eye, W_conv1_l), training=is_training)))
h_pool1_l = max_pool_2x2(h_conv1_l)
# layer2
with tf.name_scope('conv-layer-2'):
W_conv2_l = weight_variable([3, 3, 16, 48])
h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
h_pool2_l = max_pool_2x2(h_conv2_l)
# layer3
with tf.name_scope('conv-layer-3'):
W_conv3_l = weight_variable([3, 3, 48, 64])
h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
h_pool3_l = max_pool_2x2(h_conv3_l)
# layer4
with tf.name_scope('conv-layer-4'):
W_conv4_l = weight_variable([2, 2, 64, 64])
h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))
# layer5
with tf.name_scope('nn-layer-1'):
W_fc1_l = weight_variable([7 * 7 * 64, 100])
h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
h_fc1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_left")
# layer6
with tf.name_scope('out'):
W_fc2_landmark_l = weight_variable([100, 1])
b_fc2_landmark_l = bias_variable([1])
y_predict_left = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_l, W_fc2_landmark_l),training=is_training), name="op_to_restore_left")
with tf.name_scope('right_stream'):
# layer 1
with tf.name_scope('conv-layer-1'):
W_conv1_l = weight_variable([5, 5, 3, 16])
h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(right_eye, W_conv1_l), training=is_training)))
h_pool1_l = max_pool_2x2(h_conv1_l)
# layer2
with tf.name_scope('conv-layer-2'):
W_conv2_l = weight_variable([3, 3, 16, 48])
h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
h_pool2_l = max_pool_2x2(h_conv2_l)
# layer3
with tf.name_scope('conv-layer-3'):
W_conv3_l = weight_variable([3, 3, 48, 64])
h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
h_pool3_l = max_pool_2x2(h_conv3_l)
# layer4
with tf.name_scope('conv-layer-4'):
W_conv4_l = weight_variable([2, 2, 64, 64])
h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))
# layer5
with tf.name_scope('nn-layer-1'):
W_fc1_l = weight_variable([7 * 7 * 64, 100])
h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
h_fc1_r = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_right")
# layer6
with tf.name_scope('out'):
W_fc2_landmark_l = weight_variable([100, 1])
b_fc2_landmark_l = bias_variable([1])
y_predict_right = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_r, W_fc2_landmark_l),training=is_training), name="op_to_restore_right")
y_predict = (y_predict_left+y_predict_right)*0.5
with tf.name_scope('accuracy'):
loss = tf.reduce_mean(tf.reduce_sum(tf.square(y - y_predict), reduction_indices=[1]))
global_step = tf.Variable(0,trainable=False)
global global_step
learning_rate = tf.train.exponential_decay(0.0003, global_step, 100, 0.94, staircase=True)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):##BN算法需要在训练前更新参数
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
return (left_eye,right_eye,y), train_step, loss, y_predict,tf.train.Saver(),is_training
我们将回归任务迁移至二分类任务:
def net():
left_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="left_img")
right_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="right_img")
y = tf.placeholder(tf.float32, shape=[None, 1],name="label")
is_training = tf.placeholder(tf.bool,name="is_training")
with tf.name_scope('left_stream'):
# layer 1
with tf.name_scope('conv-layer-1'):
W_conv1_l = weight_variable([5, 5, 3, 16])
h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(left_eye, W_conv1_l), training=is_training)))
h_pool1_l = max_pool_2x2(h_conv1_l)
# layer2
with tf.name_scope('conv-layer-2'):
W_conv2_l = weight_variable([3, 3, 16, 48])
h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
h_pool2_l = max_pool_2x2(h_conv2_l)
# layer3
with tf.name_scope('conv-layer-3'):
W_conv3_l = weight_variable([3, 3, 48, 64])
h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
h_pool3_l = max_pool_2x2(h_conv3_l)
# layer4
with tf.name_scope('conv-layer-4'):
W_conv4_l = weight_variable([2, 2, 64, 64])
h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))
# layer5
with tf.name_scope('nn-layer-1'):
W_fc1_l = weight_variable([7 * 7 * 64, 100])
h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
h_fc1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_left")
# layer6
with tf.name_scope('out'):
W_fc2_landmark_l = weight_variable([100, 1])
b_fc2_landmark_l = bias_variable([1])
y_predict_left = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_l, W_fc2_landmark_l),training=is_training), name="op_to_restore_left")
with tf.name_scope('right_stream'):
# layer 1
with tf.name_scope('conv-layer-1'):
W_conv1_l = weight_variable([5, 5, 3, 16])
h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(right_eye, W_conv1_l), training=is_training)))
h_pool1_l = max_pool_2x2(h_conv1_l)
# layer2
with tf.name_scope('conv-layer-2'):
W_conv2_l = weight_variable([3, 3, 16, 48])
h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
h_pool2_l = max_pool_2x2(h_conv2_l)
# layer3
with tf.name_scope('conv-layer-3'):
W_conv3_l = weight_variable([3, 3, 48, 64])
h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
h_pool3_l = max_pool_2x2(h_conv3_l)
# layer4
with tf.name_scope('conv-layer-4'):
W_conv4_l = weight_variable([2, 2, 64, 64])
h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))
# layer5
with tf.name_scope('nn-layer-1'):
W_fc1_l = weight_variable([7 * 7 * 64, 100])
h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
h_fc1_r = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_right")
# layer6
with tf.name_scope('out'):
W_fc2_landmark_l = weight_variable([100, 1])
b_fc2_landmark_l = bias_variable([1])
y_predict_right = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_r, W_fc2_landmark_l),training=is_training), name="op_to_restore_right")
# 添加新的结构
y_new = tf.placeholder(tf.float32, shape=[None, 2], name="label")
with tf.name_scope('new_left_out'):
W_fc2_landmark_l = weight_variable([100, 2])
b_fc2_landmark_l = bias_variable([2])
y_predict_left = tf.nn.softmax(tf.matmul(h_fc1_l, W_fc2_landmark_l) + b_fc2_landmark_l,
name="op_to_restore_left")
with tf.name_scope('new_right_out'):
W_fc2_landmark_l = weight_variable([100, 2])
b_fc2_landmark_l = bias_variable([2])
y_predict_right = tf.nn.softmax(tf.matmul(h_fc1_r, W_fc2_landmark_l) + b_fc2_landmark_l,
name="op_to_restore_right")
y_predict = (y_predict_left + y_predict_right) * 0.5
with tf.name_scope('accuracy'):
loss = tf.reduce_mean(-1 * tf.reduce_sum(y_new[:, 0] * tf.log(y_predict[:, 0] + 1e-10)) -
10 * tf.reduce_sum(y_new[:, 1] * tf.log(y_predict[:, 1] + 1e-10)))
global_step = tf.Variable(0, trainable=False)
global global_step
learning_rate = tf.train.exponential_decay(0.0003, global_step, 100, 0.94, staircase=True)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): ##BN算法需要在训练前更新参数
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
# accuracy
correct_prediction = tf.equal(tf.argmax(y_predict, 1), tf.argmax(y_new, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return (left_eye,right_eye,y_new), train_step, accuracy, y_predict,is_training,W_conv1_l
部分参数读取:
sess.run(tf.global_variables_initializer())
variables = tf.contrib.framework.get_variables_to_restore()
variables_to_restore = [v for v in variables if "new_" not in v.name]
# 参数倒入
saver_restore = tf.train.Saver(variables_to_restore)
saver_restore.restore(sess, tf.train.latest_checkpoint("model_path"))
attention: variables指新任务的模型中存在的变量,部分参数是我们新增的,原始模型中不存在,故需要剔除。仅读取原始模型中存在的且我们仍需使用的参数即可。