本文主要是在caffe中实现yolo,需要在caffe中添加新的网络层:LeakyLayer和DetectLayer。
一、添加LeakyLayer层
1.创建hpp头文件leaky_layer.hpp
不同功能类型的层所引的头文件也不同,具体可以到“caffe/include/caffe/layers”目录下找相似的现成的文件参考,新添加的hpp文件也要放到该目录下。
注意:命名的时候应严格一致和注意大小写。
#ifndef CAFFE_LEAKY_LAYER_HPP_
#define CAFFE_LEAKY_LAYER_HPP_
//*****************************************
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
//*****************************************
#include "caffe/layers/neuron_layer.hpp"
namespace caffe {
template <typename Dtype>
class LeakyLayer : public NeuronLayer<Dtype>{
public:
//******新添加层的type: "Leaky" *******
explicit LeakyLayer(const LayerParameter& param)
: NeuronLayer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
//******以后我们层的type: "DiffCutoff" *******
virtual inline const char* type() const { return "Leaky"; }
//****只需要一个bottom和一个top*****
virtual inline int ExactNumBottomBlobs() const { return 1; }
virtual inline int ExactNumTopBlobs() const { return 1; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){};
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
};
}
#endif
2.创建leaky_layer.cpp文件
cpp文件位于src/caffe/layers下。
#include <algorithm>
#include <vector>
//*****************************************
#include "caffe/layers/leaky_layer.hpp"
//*****************************************
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void LeakyLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
NeuronLayer<Dtype>::LayerSetUp(bottom, top);
CHECK_NE(top[0], bottom[0]) << this->type() << " Layer does not "
"allow in-place computation.";
}
template <typename Dtype>
void LeakyLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const int count = top[0]->count();
Dtype* top_data = top[0]->mutable_cpu_data();
const Dtype* bottom_data = bottom[0]->cpu_data();
for(int i = 0; i < count; ++i){
if(bottom_data[i] > 0)
top_data[i] = bottom_data[i];
else
top_data[i] = 0.1*bottom_data[i];
//top_data[i] = bottom_data[i] > 0£¿ bottom_data[i]: (Dtype(0.1)*bottom_data[i]);
}
}
//template <typename Dtype>
//void LeakyLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
// const vector<bool>& propagate_down,
// const vector<Blob<Dtype>*>& bottom) {
// if (propagate_down[0]) {
// const Dtype* bottom_data = bottom[0]->cpu_data();
// const Dtype* top_diff = top[0]->cpu_diff();
// Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
// const int count = bottom[0]->count();
// for (int i = 0; i < count; ++i) {
// if(bottom_diff[i] > 0)
// bottom_diff[i] = top_diff[i];
// else
// bottom_diff[i] = top_diff[i]*0.1;
//bottom_diff[i] = bottom_data[i] > 0 ? top_diff[i] : top_diff[i]*0.1;
// }
// }
//}
#ifdef CPU_ONLY
STUB_GPU(LeakyLayer);
#endif
INSTANTIATE_CLASS(LeakyLayer);
REGISTER_LAYER_CLASS(Leaky);
}
3.创建leaky_layer.cu文件
#include <algorithm>
#include <vector>
#include "caffe/layers/neuron_layer.hpp"
#include "caffe/layers/leaky_layer.hpp"
namespace caffe {
// CUDA kernele for forward
template <typename Dtype>
__global__ void LeakyForward(const int n, const Dtype* in, Dtype* out){
CUDA_KERNEL_LOOP(index, n){
out[index] = in[index] > 0 ? in[index] : in[index]*0.1;
}
}
template <typename Dtype>
void LeakyLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
const int count = bottom[0]->count();
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
LeakyForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, top_data);
CUDA_POST_KERNEL_CHECK;
}
template<typename Dtype>
__global__ void LeakyBackward(const int n, const Dtype* bottom_data, Dtype* bottom_diff, const Dtype* top_diff){
CUDA_KERNEL_LOOP(index, n){
bottom_diff[index] = bottom_data[index] > 0 ? top_diff[index] : top_diff[index]*0.1;
}
}
template<typename Dtype>
void LeakyLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom){
const int count = bottom[0]->count();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const Dtype* top_diff = top[0]->mutable_gpu_diff();
const Dtype* bottom_data = bottom[0]->gpu_data();
LeakyBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, bottom_diff, top_diff);
}
INSTANTIATE_LAYER_GPU_FUNCS(LeakyLayer);
} // namespace caffe
由于LeakyLayer没有参数,故不需要在caffe.proto文件中添加参数和消息函数,也即写好hpp、cpp、cu文件就已添加完成;
二、创建DetectLayer层
1.创建detect_layer.hpp文件
#ifndef CAFFE_DETECT_LAYER_HPP_
#define CAFFE_DETECT_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/detect_layer.hpp"
namespace caffe {
template<typename Dtype>
class DetectLayer : public Layer<Dtype>{
public:
explicit DetectLayer(const LayerParameter& param);
virtual ~DetectLayer(){}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "Detect";}
virtual inline int ExactNumBottomBlobs() const {return 2;}
virtual inline int ExactNumTopBlobs() const { return 1;}
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& top);
int classes;
int coords;
int rescore;
int side;
int num;
bool softmax;
bool sqrt;
float jiter;
float object_scale;
float noobject_scale;
float class_scale;
float coord_scale;
};
}
#endif //namespace
2.创建detect_layer.cpp文件
#include <algorithm>
#include <cfloat>
#include <vector>
#include "caffe/layers/detect_layer.hpp"
#include "caffe/util/math_functions.hpp"
using namespace std;
namespace caffe {
template<typename Dtype>
Dtype lap(Dtype x1_min,Dtype x1_max,Dtype x2_min,Dtype x2_max){
if(x1_min < x2_min){
if(x1_max < x2_min){
return 0;
}else{
if(x1_max > x2_min){
if(x1_max < x2_max){
return x1_max - x2_min;
}else{
return x2_max - x2_min;
}
}else{
return 0;
}
}
}else{
if(x1_min < x2_max){
if(x1_max < x2_max)
return x1_max-x1_min;
else
return x2_max-x1_min;
}else{
return 0;
}
}
}
template<typename Dtype>
Dtype box_iou(const vector<Dtype> box1, const vector<Dtype> box2){
Dtype lap_x = lap(box1[0]-box1[2]/2,box1[0]+box1[2]/2,box2[0]-box2[2]/2,box2[0]+box[2]/2);
Dtype lap_y = lap(box1[1]-box1[3]/2,box1[1]+box1[3]/2,box2[1]-box2[3]/2,box2[1]+box[3]/2);
Dtype are = box1[2]*box1[3]+box2[2]*box2[3]-lap_x*lap_y;
if(are < 0.00001)
return 0.0;
else
return (lap_x*lap_y)/are;
}
template <typename Dtype>
DetectLayer<Dtype>::DetectLayer(const LayerParameter& param) : Layer<Dtype>(param){
this->layer_param_.add_propagate_down(true);
this->layer_param_.add_propagate_down(false);
const DetectParameter& detect_param = this->layer_param_.detect_param();
classes = detect_param.classes();
coords = detect_param.coords();
rescore = detect_param.rescore();
side = detect_param.side();
num = detect_param.num();
softmax = detect_param.softmax();
sqrt = detect_param.sqrt();
jiter = detect_param.jitter();
object_scale = detect_param.object_scale();
noobject_scale = detect_param.noobject_scale();
class_scale = detect_param.class_scale();
coord_scale = detect_param.coord_scale();
}
template <typename Dtype>
void DetectLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top){
Layer<Dtype>::LayerSetUp(bottom, top);
this->layer_param_.add_loss_weight(Dtype(1));
int inputs = (side*side*(((1+coords)*num)+classes));
CHECK_EQ(inputs, bottom[0]->count(1)) << "input dimensions error";
CHECK_EQ(top.size(), 1) << "top size must be 1";
}
template <typename Dtype>
void DetectLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top){
vector<int> shape(0);
top[0]->Reshape(shape);
}
template <typename Dtype>
void DetectLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,const vector<Blob<Dtype>*>& top){
int input_num_each = side*side*(((1+coords)*num)+classes);
int batch = bottom[0]->num();
int locations = side*side;
const Dtype* truth = bottom[1]->cpu_data();
const Dtype* input = bottom[0]->cpu_data();
Dtype* delta = bottom[0]->mutable_cpu_diff();
Dtype& cost = top[0]->mutable_cpu_data()[0];
cost = Dtype(0.0);
for(int i = 0; i < bottom[0]->count(); ++i){
delta[i] = Dtype(0.0);
}
float avg_iou = 0;
float avg_cat = 0;
float avg_allcat = 0;
float avg_obj = 0;
float avg_anyobj = 0;
int count = 0;
for(int b = 0; b < batch; ++b){
int input_index = b*input_num_each;
for(int l = 0; l < locations; ++l){
int truth_index = (b*locations+l)*(1+coords+classes);
Dtype is_obj = truth[truth_index];
for(int n = 0; n < num;++n){
int delta_index = input_index + locations*classes + l*num + n;
delta[delta_index] = noobject_scale*(0 - input[delta_index]);
cost += noobject_scale*pow(input[delta_index],2);
avg_anyobj += input[delta_index];
}
int best_index = 0;
float best_iou = 0;
float best_rmse = 400;
if(is_obj < 0.0001) continue;
int class_index = input_index + l*classes;
for(int j = 0; j < classes; ++j){
delta[class_index+j]= class_scale * (truth[truth_index+1+j] - input[class_index+j]);
if(truth[truth_index+1+j]) avg_cat += input[class_index+j];
avg_allcat += input[class_index+j];
}//classes
vector<float> truth_box;
truth_box.push_back(float(truth[truth_index+1+classes]/side));
truth_box.push_back(float(truth[truth_index+1+classes+1]/side));
truth_box.push_back(float(truth[truth_index+1+classes+2]));
truth_box.push_back(float(truth[truth_index+1+classes+3]));
for(int n = 0; n < num; ++n){
int box_index = input_index + locations*(classes+num)+(l*num+n)*coords;
vector<float> out_box;
out_box.push_back(float(input[box_index]/side));
out_box.push_back(float(input[box_index+1]/side));
if(sqrt){
out_box.push_back(float(input[box_index+2]*input[box_index+2]));
out_box.push_back(float(input[box_index+3]*input[box_index+3]));
}else{
out_box.push_back(float(input[box_index+2]));
out_box.push_back(float(input[box_index+3]));
}
float iou = box_iou(truth_box, out_box);
float rmse = (pow(truth_box[0]-out_box[0],2)+pow(truth_box[1]-out_box[1],2)+pow(truth_box[2]-out_box[2],2)+pow(truth_box[3]-out_box[3],2));
if(best_iou > 0 || iou > 0){
if(iou > best_iou){
best_iou = iou;
best_index = n;
}
}else{
if(rmse < best_rmse){
best_rmse = rmse;
best_index = n;
}
}
}//for num
int box_index = input_index + locations*(classes+num)+(l*num+best_index)*coords;
int tbox_index = truth_index+1+classes;
vector<float> best_box;
best_box.push_back(float(input[box_index]/side));
best_box.push_back(float(input[box_index+1]/side));
if(sqrt){
best_box.push_back(float(input[box_index+2]*input[box_index+2]));
best_box.push_back(float(input[box_index+3]*input[box_index+3]));
}else{
best_box.push_back(float(input[box_index+2]));
best_box.push_back(float(input[box_index+3]));
}
int p_index = input_index + locations*classes + l*num + best_index;
cost -= noobject_scale*pow(input[p_index],2);
cost += object_scale*pow(1-input[p_index],2);
avg_obj += input[p_index];
delta[p_index] = object_scale*(1. - input[p_index]);
if(rescore){
//delta[p_index] = object_scale*(best_iou - input[p_index]);
}
delta[box_index] = coord_scale*(truth[tbox_index]-input[box_index]);
delta[box_index+1] = coord_scale*(truth[tbox_index+1]-input[box_index+1]);
delta[box_index+2] = coord_scale*(truth[tbox_index+2]-input[box_index+2]);
delta[box_index+3] = coord_scale*(truth[tbox_index+3]-input[box_index+3]);
if(sqrt) {
delta[box_index+2] = coord_scale*(std::sqrt(truth[tbox_index+2])-input[box_index+2]);
delta[box_index+3] = coord_scale*(std::sqrt(truth[tbox_index+3])-input[box_index+3]);
}
cost += pow(1-best_iou, 2);
avg_iou += best_iou;
++count;
}//locations
}//batch
for(int i = 0; i < bottom[0]->count(); ++i){
delta[i] = -delta[i];
}
printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(classes * count), avg_obj/count, avg_anyobj/(locations * batch * num), count);
}
template <typename Dtype>
void DetectLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<bool>& propagate_down,const vector<Blob<Dtype>*>& top){
}
template float lap(float x1_min,float x1_max,float x2_min,float x2_max);
template double lap(double x1_min,double x1_max,double x2_min,double x2_max);
template float box_iou(const vector<float> box1, const vector<float> box2);
template double box_iou(const vector<double> box1, const vector<double> box2);
#ifdef CPU_ONLY
STUB_GPU(DetectLayer);
#endif
INSTANTIATE_CLASS(DetectLayer);
REGISTER_LAYER_CLASS(Detect);
}//namespace caffe
3.修改src\caffe\proto\caffe.proto文件
这里为新写的层添加参数和消息函数。
(1)首先应该在message LayerParameter {}重制定一个唯一ID,这个ID的可选值可以由这句话看出(顺着往下加):
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
message LayerParameter {
由上图可以看出,可选的ID为147。
于是LayerParameter函数体内就可以添加这样一行:
optional DetectParameter detect_param = 147;
为了下次添加新层方便,最好修改注释信息:
// NOTE
// Update the next available ID when you add a new LayerParameter field.
//
// LayerParameter next available layer-specific ID: 148 (last added: detect_param)
message LayerParameter {
(2)添加Detect layer参数的消息函数
message DetectParameter{
optional uint32 classes = 1 [default = 1];
optional uint32 coords = 2 [default = 4];
optional uint32 rescore = 3 [default = 1];
optional uint32 side = 4 [default = 11];
optional bool softmax = 5 [default = false];
optional bool sqrt = 6 [default = true];
optional float jitter = 7 [default = 0.2];
optional float object_scale = 8 [default = 1.0];
optional float noobject_scale = 9 [default = 0.5];
optional float class_scale = 10 [default = 1.0];
optional float coord_scale = 11 [default = 5];
optional uint32 num = 12 [default = 2];
}
注意:这里的参数值设置是有要求的,最好确保各个数字不重复,否则会报错,如下设置会报错:
message DetectParameter{
optional uint32 classes = 1 [default = 1];
optional uint32 coords = 4 [default = 4];
optional uint32 rescore = 1 [default = 1];
optional uint32 side = 11 [default = 11];
optional uint32 num = 5 [default = 2];
optional bool softmax = false [default = false];
optional bool sqrt = true [default = true];
optional float jiter = 0 [default = 0.2];
optional float object_scale = 1.0 [default = 1.0];
optional float noobject_scale = 0.5 [default = 0.5];
optional float class_scale = 1.0 [default = 1.0];
optional float coord_scale = 5 [default = 5];
}
注意:某些参考资料中还需要在V0LayerParameter和V1LayerParameter中添加内容的步骤,该步骤是针对旧版caffe的,现已经废弃,只需要在LayerParameter中制定ID,并在任意位置添加消息函数即可
(4)重新编译caffe
make clean
make all
make run
make runtest