tensoflow迁移学习之finetune

在迁移学习中，fine tune是其中一类：将预训练模型参数导入目标模型中作为初始化参数，整体进行训练，新增的网络层参数采用随机初始化。该方法适用于目标任务的数据集充足的情况下，此时过拟合程度低。

那么如何加载部分参数作为初始化参数？

我们采用模型复现的方式导入图，而非加载tf.train.import_meta_graph()

一般实验情况下保存的时候，都是用的saver类来保存，如下

saver = tf.train.Saver()
saver.save(sess,"model.ckpt")

全部参数加载时的代码

saver=tf.train.Saver()
saver.restore(sess,"model.ckpt")

根据变量的名字，选择加载部分变量

#得到该网络中，所有可以加载的参数
variables = tf.contrib.framework.get_variables_to_restore()
#删除output层中的参数
variables_to_resotre = [v for v in varialbes if v.name.split('/')[0]!='output']
#构建这部分参数的saver
saver = tf.train.Saver(variables_to_restore)
saver.restore(sess,'model.ckpt')

下面我们以双流的四层卷积的回归任务为例，原始模型如下：

def net():
    left_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="left_img")
    right_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="right_img")
    y = tf.placeholder(tf.float32, shape=[None, 1],name="label")
    is_training = tf.placeholder(tf.bool,name="is_training")

    with tf.name_scope('left_stream'):
        # layer 1
        with tf.name_scope('conv-layer-1'):
            W_conv1_l = weight_variable([5, 5, 3, 16])
            h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(left_eye, W_conv1_l), training=is_training)))
            h_pool1_l = max_pool_2x2(h_conv1_l)

        # layer2
        with tf.name_scope('conv-layer-2'):
            W_conv2_l = weight_variable([3, 3, 16, 48])
            h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
            h_pool2_l = max_pool_2x2(h_conv2_l)

        # layer3
        with tf.name_scope('conv-layer-3'):
            W_conv3_l = weight_variable([3, 3, 48, 64])
            h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
            h_pool3_l = max_pool_2x2(h_conv3_l)

        # layer4
        with tf.name_scope('conv-layer-4'):
            W_conv4_l = weight_variable([2, 2, 64, 64])
            h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))

        #  layer5
        with tf.name_scope('nn-layer-1'):
            W_fc1_l = weight_variable([7 * 7 * 64, 100])
            h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
            h_fc1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_left")

        # layer6
        with tf.name_scope('out'):
            W_fc2_landmark_l = weight_variable([100, 1])
            b_fc2_landmark_l = bias_variable([1])
        y_predict_left = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_l, W_fc2_landmark_l),training=is_training), name="op_to_restore_left")

    with tf.name_scope('right_stream'):
        # layer 1
        with tf.name_scope('conv-layer-1'):
            W_conv1_l = weight_variable([5, 5, 3, 16])
            h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(right_eye, W_conv1_l), training=is_training)))
            h_pool1_l = max_pool_2x2(h_conv1_l)

        # layer2
        with tf.name_scope('conv-layer-2'):
            W_conv2_l = weight_variable([3, 3, 16, 48])
            h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
            h_pool2_l = max_pool_2x2(h_conv2_l)

        # layer3
        with tf.name_scope('conv-layer-3'):
            W_conv3_l = weight_variable([3, 3, 48, 64])
            h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
            h_pool3_l = max_pool_2x2(h_conv3_l)

        # layer4
        with tf.name_scope('conv-layer-4'):
            W_conv4_l = weight_variable([2, 2, 64, 64])
            h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))

        #  layer5
        with tf.name_scope('nn-layer-1'):
            W_fc1_l = weight_variable([7 * 7 * 64, 100])
            h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
            h_fc1_r = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_right")

        # layer6
        with tf.name_scope('out'):
            W_fc2_landmark_l = weight_variable([100, 1])
            b_fc2_landmark_l = bias_variable([1])
        y_predict_right = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_r, W_fc2_landmark_l),training=is_training), name="op_to_restore_right")
    y_predict = (y_predict_left+y_predict_right)*0.5

    with tf.name_scope('accuracy'):
        loss = tf.reduce_mean(tf.reduce_sum(tf.square(y - y_predict), reduction_indices=[1]))
        global_step = tf.Variable(0,trainable=False)
        global global_step
        learning_rate = tf.train.exponential_decay(0.0003, global_step, 100, 0.94, staircase=True)
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):##BN算法需要在训练前更新参数
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
        return (left_eye,right_eye,y), train_step, loss, y_predict,tf.train.Saver(),is_training

我们将回归任务迁移至二分类任务：

def net():
    left_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="left_img")
    right_eye = tf.placeholder(tf.float32, shape=[None, 80, 80, 3],name="right_img")
    y = tf.placeholder(tf.float32, shape=[None, 1],name="label")
    is_training = tf.placeholder(tf.bool,name="is_training")

    with tf.name_scope('left_stream'):
        # layer 1
        with tf.name_scope('conv-layer-1'):
            W_conv1_l = weight_variable([5, 5, 3, 16])
            h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(left_eye, W_conv1_l), training=is_training)))
            h_pool1_l = max_pool_2x2(h_conv1_l)

        # layer2
        with tf.name_scope('conv-layer-2'):
            W_conv2_l = weight_variable([3, 3, 16, 48])
            h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
            h_pool2_l = max_pool_2x2(h_conv2_l)

        # layer3
        with tf.name_scope('conv-layer-3'):
            W_conv3_l = weight_variable([3, 3, 48, 64])
            h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
            h_pool3_l = max_pool_2x2(h_conv3_l)

        # layer4
        with tf.name_scope('conv-layer-4'):
            W_conv4_l = weight_variable([2, 2, 64, 64])
            h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))

        #  layer5
        with tf.name_scope('nn-layer-1'):
            W_fc1_l = weight_variable([7 * 7 * 64, 100])
            h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
            h_fc1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_left")

        # layer6
        with tf.name_scope('out'):
            W_fc2_landmark_l = weight_variable([100, 1])
            b_fc2_landmark_l = bias_variable([1])
        y_predict_left = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_l, W_fc2_landmark_l),training=is_training), name="op_to_restore_left")

    with tf.name_scope('right_stream'):
        # layer 1
        with tf.name_scope('conv-layer-1'):
            W_conv1_l = weight_variable([5, 5, 3, 16])
            h_conv1_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(right_eye, W_conv1_l), training=is_training)))
            h_pool1_l = max_pool_2x2(h_conv1_l)

        # layer2
        with tf.name_scope('conv-layer-2'):
            W_conv2_l = weight_variable([3, 3, 16, 48])
            h_conv2_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool1_l, W_conv2_l), training=is_training)))
            h_pool2_l = max_pool_2x2(h_conv2_l)

        # layer3
        with tf.name_scope('conv-layer-3'):
            W_conv3_l = weight_variable([3, 3, 48, 64])
            h_conv3_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool2_l, W_conv3_l),training=is_training)))
            h_pool3_l = max_pool_2x2(h_conv3_l)

        # layer4
        with tf.name_scope('conv-layer-4'):
            W_conv4_l = weight_variable([2, 2, 64, 64])
            h_conv4_l = tf.abs(tf.nn.relu(tf.layers.batch_normalization(conv2d(h_pool3_l, W_conv4_l),training=is_training)))

        #  layer5
        with tf.name_scope('nn-layer-1'):
            W_fc1_l = weight_variable([7 * 7 * 64, 100])
            h_pool5_flat_l = tf.reshape(h_conv4_l, [-1, 7 * 7 * 64])
            h_fc1_r = tf.abs(tf.nn.relu(tf.layers.batch_normalization(tf.matmul(h_pool5_flat_l, W_fc1_l),training=is_training)),name="lstm_right")

        # layer6
        with tf.name_scope('out'):
            W_fc2_landmark_l = weight_variable([100, 1])
            b_fc2_landmark_l = bias_variable([1])
        y_predict_right = tf.nn.sigmoid(tf.layers.batch_normalization(tf.matmul(h_fc1_r, W_fc2_landmark_l),training=is_training), name="op_to_restore_right")

    # 添加新的结构
    y_new = tf.placeholder(tf.float32, shape=[None, 2], name="label")
    with tf.name_scope('new_left_out'):
        W_fc2_landmark_l = weight_variable([100, 2])
        b_fc2_landmark_l = bias_variable([2])
        y_predict_left = tf.nn.softmax(tf.matmul(h_fc1_l, W_fc2_landmark_l) + b_fc2_landmark_l,
                                       name="op_to_restore_left")
    with tf.name_scope('new_right_out'):
        W_fc2_landmark_l = weight_variable([100, 2])
        b_fc2_landmark_l = bias_variable([2])
        y_predict_right = tf.nn.softmax(tf.matmul(h_fc1_r, W_fc2_landmark_l) + b_fc2_landmark_l,
                                        name="op_to_restore_right")
        y_predict = (y_predict_left + y_predict_right) * 0.5

    with tf.name_scope('accuracy'):
        loss = tf.reduce_mean(-1 * tf.reduce_sum(y_new[:, 0] * tf.log(y_predict[:, 0] + 1e-10)) -
                              10 * tf.reduce_sum(y_new[:, 1] * tf.log(y_predict[:, 1] + 1e-10)))
        global_step = tf.Variable(0, trainable=False)
        global global_step
        learning_rate = tf.train.exponential_decay(0.0003, global_step, 100, 0.94, staircase=True)
        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):  ##BN算法需要在训练前更新参数
            train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
        # accuracy
        correct_prediction = tf.equal(tf.argmax(y_predict, 1), tf.argmax(y_new, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    return (left_eye,right_eye,y_new), train_step, accuracy, y_predict,is_training,W_conv1_l

部分参数读取：

sess.run(tf.global_variables_initializer())
variables = tf.contrib.framework.get_variables_to_restore()
variables_to_restore = [v for v in variables if "new_" not in v.name]
# 参数倒入
saver_restore = tf.train.Saver(variables_to_restore)
saver_restore.restore(sess, tf.train.latest_checkpoint("model_path"))

attention: variables指新任务的模型中存在的变量，部分参数是我们新增的，原始模型中不存在，故需要剔除。仅读取原始模型中存在的且我们仍需使用的参数即可。

tensoflow迁移学习之finetune

猜你喜欢