caffe学习实践之finetuning

本文所述内容参考caffe官网教程：http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/02-fine-tuning.ipynb。由于官网教程为英文，且略显深奥，我将学习教程后自己总结的资料和代码记录下来，供日后查看，也方便新手入门caffe这一机器学习框架。
本文主要按以下几个步骤讲述fine tune的意义和实现：
一、fine tune原理；二、fine tune程序实现的大致思路；三、代码实现和实验效果。

一、fine tune原理。
未在教材和网络上找到fine tune的权威定义，我的理解为——对于已经针对某一数据集A训练好的网络参数集合，使用另一数据集B的数据和标签重新训练，以使神经网络达在B上达到较好分类性能的训练方法。 fine tune的主要意义在于用较少数据达到好的分类效果。

二、fine tune程序实现大致思路。
官网教程实现了以下功能——对已经由IMAGENET数据集训练完成的参数，将之用另一个仅有5个标签、2000张图像的数据集fine tune ，实现了较好的分类效果（与随机参数的网络迭代训练相同次数相比较结果完胜）。
程序主要分为以下几个步骤：1.准备图像还原函数，获取fine tune的数据集的准备工作。2.编写网络结构代码，载入训练数据集和标签的数据。3.编写solver，设定solver的学习参数。4.编写迭代训练函数。5.相关效果的测试程序。

三、代码实现和实验效果。
为方便自己和新手理解，本文代码经过重构，与官网代码有所不同，本代码复制至jupyter notebook可直接运行（修改caffe_root即可）。
3.1.获取数据集、图像还原、设置路径等预备工作
这部分代码无技术可言，直接从官网复制。
获取数据集

caffe_root = '/your/caffe/root' # 这里是caffe的根目录 
# Download just a small subset of the data for this exercise.
# (2000 of 80K images, 5 of 20 labels.)
# To download the entire dataset, set `full_dataset = True`.
full_dataset = False
if full_dataset:
    NUM_STYLE_IMAGES = NUM_STYLE_LABELS = -1
else:
    NUM_STYLE_IMAGES = 2000
    NUM_STYLE_LABELS = 5

# This downloads the ilsvrc auxiliary data (mean file, etc),
# and a subset of 2000 images for the style recognition task.
import os
os.chdir(caffe_root)  # run scripts from caffe root
!data/ilsvrc12/get_ilsvrc_aux.sh
!scripts/download_model_binary.py models/bvlc_reference_caffenet
!python examples/finetune_flickr_style/assemble_data.py \
    --workers=-1  --seed=1701 \
    --images=$NUM_STYLE_IMAGES  --label=$NUM_STYLE_LABELS
# back to examples
os.chdir('examples')

图像还原

import sys
sys.path.insert(0, caffe_root + 'python')
import caffe

caffe.set_mode_cpu()

import numpy as np
from pylab import *
%matplotlib inline
import tempfile

# 显示图像函数（在处理时图像数据格式改变，不能直接显示）
def deprocess_net_image(image):
    image = image.copy()              # don't modify destructively
    image = image[::-1]               # BGR -> RGB
    image = image.transpose(1, 2, 0)  # CHW -> HWC
    image += [123, 117, 104]          # (approximately) undo mean subtraction

    # 限定图像中数值的区间
    image[image < 0], image[image > 255] = 0, 255

    # 将图像处理后的浮点数转换为整数
    image = np.round(image)
    image = np.require(image, dtype=np.uint8)
    return image

设置路径加载模型

import os
net_weights = os.path.join(caffe_root, 'data/weight/bvlc_reference_caffenet.caffemodel')
assert os.path.exists(net_weights)

3.2.编写神经网络结构代码、获取数据集和标签数据
这部分内容不灵活，个别代码解释见注释。
编写神经网络结构代码

from caffe import layers as L
from caffe import params as P
# 神经网络逐层的学习参数，本例中除最后一层外所有层均采用这些参数
weight_param = dict(lr_mult=1, decay_mult=1)
bias_param = dict(lr_mult=2,decay_mult=0)
learned_param = [weight_param, bias_param]

frozen_param = [dict(lr_mult=0)] * 2

def conv_relu(bottom,ks,nout,stride=1,pad=0,group=1,
             param=learned_param,
             weight_filler=dict(type='gaussian'),std=0.01,
             bias_filler=dict(type='constant'),value=0.1):
    conv = L.Convolution(bottom,kernel_size=ks,stride=stride,
                        num_output=nout,pad=pad,group=group,
                        param=param,weight_filler=weight_filler,
                        bias_filler=bias_filler)
    return conv, L.ReLU(conv,in_place=True)

def fc_relu(bottom,nout,param=learned_param,
            weight_filler=dict(type='gaussian',std=0.005),
           bias_filler=dict(type='constant',value=0.1)):
    fc = L.InnerProduct(bottom,num_output=nout,param=param,
                       weight_filler=weight_filler,
                       bias_filler=bias_filler)
    return fc,L.ReLU(fc,in_place=True)

def max_pool(bottom,ks,stride=1):
    return L.Pooling(bottom,pool=P.Pooling.MAX,kernel_size=ks,
                    stride=stride)

def caffenet(data,label=None,train=True,num_classes=1000,
            classifier_name='fc8',learn_all=False):
    n = caffe.NetSpec()
    n.data = data
    # 如果learn_all变量为真，则训练所有带参数层；若为假，只训练最后一个全连接层
    param = learned_param if learn_all else frozen_param
    n.conv1,n.relu1 = conv_relu(n.data,11,96,stride=4,param=param)
    n.pool1 = max_pool(n.relu1,3,stride=2)
    n.norm1 = L.LRN(n.pool1,local_size=5,alpha=1e-4,beta=0.75)
    n.conv2,n.relu2 = conv_relu(n.norm1,5,256,pad=2,group=2,param=param)
    n.pool2 = max_pool(n.relu2,3,stride=2)
    n.norm2 = L.LRN(n.pool2,local_size=5,alpha=1e-4,beta=0.75)
    n.conv3,n.relu3 = conv_relu(n.norm2,3,384,pad=1,param=param)
    n.conv4,n.relu4 = conv_relu(n.relu3,3,384,pad=1,group=2,param=param)
    n.conv5,n.relu5 = conv_relu(n.relu4,3,256,pad=1,group=2,param=param)
    n.pool5 = max_pool(n.relu5,3,stride=2)
    n.fc6,n.relu6 = fc_relu(n.pool5,4096,param=param)
    if train:
        n.drop6 = fc7input = L.Dropout(n.relu6,in_place=True)
    else:
        fc7input = n.relu6
    n.fc7,n.relu7 = fc_relu(fc7input,4096,param=param)
    if train:
        n.drop7 = fc8input = L.Dropout(n.relu7,in_place=True)
    else:
        fc8input = n.relu7
    # 最后一层使用与其他层不同的学习参数，因为fine tune有时只对最后一层训练以提高整体的训练速度
    fc8 = L.InnerProduct(fc8input,num_output=num_classes,param=learned_param)
    n.__setattr__(classifier_name,fc8)
    # 如果在测试状态，则用softmax函数对概率归一化
    if not train:
        n.probs = L.Softmax(fc8)
    # 如果在训练状态，则计算损失函数值和准确率
    if label is not None:
        n.label = label
        n.loss = L.SoftmaxWithLoss(fc8,n.label)
        n.acc = L.Accuracy(fc8,n.label)

    with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(str(n.to_proto()))
        # 返回值为记录网络结构文件的路径
        return f.name

获取数据集和标签数据

style_type_file = caffe_root + 'examples/finetune_flickr_style/style_names.txt'

def style_caffenet(data_input,labels_input,num_classes,train=False,learn_all=False):
    # 对caffenet网络进行重构
    return caffenet(data=data_input,label=labels_input,train=train,num_classes=num_classes,
                    learn_all=learn_all,classifier_name='fc8_flickr')

def get_data_batch():
    # 获取数据集的图像和标签数据
    source = caffe_root + 'data/flickr_style/test.txt'
    transform_param = dict(mirror=True, crop_size=227,
        mean_file=caffe_root + 'data/ilsvrc12/imagenet_mean.binaryproto')
    data_input,labels_input = L.ImageData(
        transform_param=transform_param, source=source,
        batch_size=50, new_height=256, new_width=256, ntop=2)
    return data_input,labels_input

def get_file_content():
    # 获取标签名称数据
    style_list = list(np.loadtxt(style_type_file,str,delimiter='\n'))
    return style_list

3.3.编写solver，设定solver参数
这部分我从官网那个直接复制过来了（仅在用GPU训练处改称了CPU训练）。设定参数需要对神经网络的运行机制有一定了解，本文不多叙述。
设定神经网络参数

from caffe.proto import caffe_pb2

def solver(train_net_path, test_net_path=None, base_lr=0.001):
    # 输入的是神经网络结构文件的路径。
    s = caffe_pb2.SolverParameter()

    # Specify locations of the train and (maybe) test networks.
    s.train_net = train_net_path
    if test_net_path is not None:
        s.test_net.append(test_net_path)
        s.test_interval = 1000  # Test after every 1000 training iterations.
        s.test_iter.append(100) # Test on 100 batches each time we test.

    # The number of iterations over which to average the gradient.
    # Effectively boosts the training batch size by the given factor, without
    # affecting memory utilization.
    s.iter_size = 1

    s.max_iter = 100000     # # of times to update the net (training iterations)

    # Solve using the stochastic gradient descent (SGD) algorithm.
    # Other choices include 'Adam' and 'RMSProp'.
    s.type = 'SGD'

    # Set the initial learning rate for SGD.
    s.base_lr = base_lr

    # Set `lr_policy` to define how the learning rate changes during training.
    # Here, we 'step' the learning rate by multiplying it by a factor `gamma`
    # every `stepsize` iterations.
    s.lr_policy = 'step'
    s.gamma = 0.1
    s.stepsize = 20000

    # Set other SGD hyperparameters. Setting a non-zero `momentum` takes a
    # weighted average of the current gradient and previous gradients to make
    # learning more stable. L2 weight decay regularizes learning, to help prevent
    # the model from overfitting.
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # Display the current training loss and accuracy every 1000 iterations.
    s.display = 1000

    # Snapshots are files used to store networks we've trained.  Here, we'll
    # snapshot every 10K iterations -- ten times during training.
    s.snapshot = 10000
    s.snapshot_prefix = caffe_root + 'models/finetune_flickr_style/finetune_flickr_style'

    # Train on the CPU.  Using the CPU to train large networks is very slow.
    s.solver_mode = caffe_pb2.SolverParameter.CPU

    # Write the solver to a temporary file and return its filename.
    with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(str(s))
        return f.name

3.4.运行solver，迭代训练
运行solver的函数，进行特定次数的SGD过程，对参数进行调整；保存下每次的Loss和Accuracy数值；最后将所有迭代过solver的参数都以文件形式保存。

def run_solvers(solvers,niter):
    # 运行solver训练神经网络
    blobs = ('loss','acc')   
    # 创建保存Loss和Accuracy的字典
    loss,acc = ({name:np.zeros(niter) for name,_ in solvers} for _ in blobs)
    for name,s in solvers:
        for it in range(niter):
            # 进行一次SGD迭代并保存Loss和Accuracy
            s.step(1) 
            loss[name][it] = s.net.blobs['loss'].data.copy()
            acc[name][it] = s.net.blobs['acc'].data.copy()
            if it % 10 == 0:
                print '(%d) loss is %5f,acc is %6f%%'%(it+1,loss[name][it],acc[name][it]*100)

    weights = {}
    for name,s in solvers:
        # 训练结束后将训练好的每个solver参数都以文件形式保存下来
        weights[name] = caffe_root + 'data/weight/%s-pretrained_model.caffemodel'%name
        s.net.save(weights[name])
    return loss,acc,weights

运行

def finetune_training():
    """set fundamental hyperparameter of network then begin to run solvers """
    niter = 200

    data_input,labels_input = get_data_batch()
    style_solver_filename = solver(style_caffenet(data_input=data_input,
                                                  labels_input=labels_input,
                                                  num_classes=5,train=True))
    style_solver = caffe.get_solver(style_solver_filename)
    style_solver.net.copy_from(net_weights)
    # 设置solver list，以便同时运行多个solver训练任务。
    print 'Running solvers for %d iterations...' % niter
    solvers = [('finetuned', style_solver)]
    loss, acc, weights = run_solvers(solvers,niter)
    print 'Done.'

    train_loss = loss['finetuned']
    train_acc = acc['finetuned']
    style_weights = weights['finetuned']
    return train_loss,train_acc,style_weights

train_loss,train_acc,style_weights = finetune_training()

四、编写相关的测试程序
将训练好的权重数据输入网络，测试网络对于目前数据集的分类效果。

# 设定保存的weight路径
pretrained_weights = os.path.join(caffe_root,'data/weight/your_weight') 
def eval_style_net():
    """evaluate the classification performance of the network"""
    net_stru = style_caffenet(data_input=data_input,labels_input=labels_input,
                        num_classes=5)
    net = caffe.Net(net_stru,pretrained_weights,caffe.TEST)
    accuracy = 0
    niter = 50
    for i in range(niter):
        accuracy += net.forward()['acc']
        if i % 10 == 0:
            print 'iterate number is %d current sum is %f'%(i,accuracy)
    return accuracy/niter
general_accuracy = eval_style_net()
print 'general accuracy is %f%%'%(general_accuracy*100)

caffe学习实践之finetuning

猜你喜欢