caffe-- fully connected layer inner_product_layer

  In caffe, the layer is fully connected is called "inner_product_layer", the distinguished tensorflow fullyconnected_layer.

 

1, prototxt defined in

Layer { 
  bottom: "FC7" 
  Top: "FC8" 
  name: "FC8"
   type: "InnerProduct"  param { # weight learning parameter lr_mult: 10 # learning rate decay_mult: . 1} param { # BIAS learning parameters lr_mult: 20 is # general , bias learning rate is a weight twice the learning rate decay_mult:. 0} inner_product_param {num_output: 1000 # number output unit weight_filler { # weights initializer type: "Gaussian" STD: 0.005} bias_filler { # BIAS initializer type: " Constant "value: 0.1}}} 


2, as defined in caffe.proto
LayerParameter {Message 
    optional InnerProductParameter = inner_product_param 117; 
} 

Message {InnerProductParameter 
  optional UInt32 num_output = . 1;    // number of network layer output  optional BOOL bias_term = 2 [ default = to true]; // item if there is bias FillerParameter weight_filler = optional . 3; / / weight Filler optional FillerParameter bias_filler = weight . 4; // Filler bias bIAS // inner product computation for a single first Axis. // -1 is the last optional Axis Int32 = Axis . 5 [ default = . 1]; // if the weight matrix is transposed optional BOOL tRANSPOSE = . 6 [default = false]; }

3、inner_product_layer.hpp
#ifndef CAFFE_INNER_PRODUCT_LAYER_HPP_
#define CAFFE_INNER_PRODUCT_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp" #include "caffe/layer.hpp" #include "caffe/proto/caffe.pb.h" namespace caffe { /** * @brief Also known as a "fully-connected" layer, computes an inner product * with a set of learned weights, and (optionally) adds biases. * * TODO(dox): thorough documentation for Forward, Backward, and proto params. */ template <typename Dtype> class InnerProductLayer : public Layer<Dtype> { public: explicit InnerProductLayer(const LayerParameter& param) : Layer<Dtype>(param) {} virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual inline const char* type() const { return "InnerProduct"; } virtual inline int ExactNumBottomBlobs() const { return 1; } virtual inline int ExactNumTopBlobs() const { return 1; } protected: virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top); virtual void Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); virtual void Backward_gpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom); int M_; # 样本个数 int K_; # 特征维度 int N_; # 输出神经元个数 bool bias_term_; Blob<Dtype> bias_multiplier_; bool transpose_; ///< if true, assume transposed weights }; } // namespace caffe #endif // CAFFE_INNER_PRODUCT_LAYER_HPP_

4、inner_product_layer.cpp
#include <vector>

#include "caffe/filler.hpp"
#include "caffe/layers/inner_product_layer.hpp" #include "caffe/util/math_functions.hpp" namespace caffe { template <typename Dtype> void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int num_output = this->layer_param_.inner_product_param().num_output(); // 输出单元个数 bias_term_ = this->layer_param_.inner_product_param().bias_term(); transpose_ = this->layer_param_.inner_product_param().transpose(); N_ = num_output; const int bottom Axis = [ (- 0]> CanonicalAxisIndex . this-> layer_param_.inner_product_param () Axis ()); // Axis-dimensional data vector of length are flattened to the K_ // e.g., bottom [0] - (N , C, H, W), axis = 1, starts from the C dimension, the dimension of the inner product by N CHW. // output: N X (a C1 + C2 + ... + CK) W is X H X @ the number of samples x number of output units. 1 x. 1 x (M x N x x. 1. 1) bottom = K_ [ 0] -> COUNT (Axis); // IF WE need to SET the Check The weights up IF ( this-> blobs_.size ()> 0) {the LOG (the INFO) << "Parameter Initialization Skipping";} the else { IF (bias_term_) { this-> blobs_.resize ( 2);} the else { this-> blobs_.resize ( . 1 );} // initializes weights Vector <int > weight_shape ( 2); IF (transpose_) { // the weight matrix is transposed weight_shape [ 0] = K_; weight_shape [ . 1] = of N_;} the else {weight_shape [ 0] = of N_; weight_shape [ . 1] = K_ ;} this-> blobs_ [ 0] .reset ( new new Blob <to Dtype> (weight_shape)); // initialize heavy weight // blobs_ [0], of N_. 1 X X X K_. 1 the shared_ptr <Filler <to Dtype>> weight_filler (GetFiller <to Dtype> ( this-> layer_param_.inner_product_param () weight_filler ().)); weight_filler-> the Fill ( this-> blobs_ [ 0] .get ()); // if bias term, initializing // blobs_ [ 1], each output corresponding to a BIAS, N_ total number. IF (bias_term_) { Vector < int > bias_shape (1, N_); this->blobs_[1].reset(new Blob<Dtype>(bias_shape)); shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); } } // 参数初始化 this->param_propagate_down_.resize(this->blobs_.size(), true); } template <typename Dtype> void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Figure out the dimensions const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); const int new_K = bottom[0]->count(axis); CHECK_EQ(K_, new_K) << "Input size incompatible with inner product parameters."; // The first "axis" dimensions are independent inner products; the total // number of these is M_, the product over these dimensions. M_ = bottom[0]->count(0, axis); // 样本数,batchsize // The top shape will be the bottom shape with the flattened axes dropped, // and replaced by a single axis with dimension num_output (N_). vector<int> top_shape = bottom[0]->shape(); top_shape.resize(axis + 1); top_shape[axis] = N_; top[0]->Reshape(top_shape); // Set up the bias multiplier if (bias_term_) { vector<int> bias_shape(1, M_); bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); // 均设为 1 } } // 前向计算 // Y = W * x + b template <typename Dtype> void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); caffe_cpu_gemm<Dtype>(CblasNoTrans, transpose_ ? CblasNoTrans : CblasTrans, M_, N_, K_, (Dtype)1., bottom_data, weight, (Dtype)0., top_data); if (bias_term_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., bias_multiplier_.cpu_data(), this->blobs_[1]->cpu_data(), (Dtype)1., top_data); } } // 反向计算 template <typename Dtype> void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // top_diff: N x M, 每一列为一个样本的 error. const Dtype* bottom_data = bottom[0]->cpu_data(); // Gradient with respect to weight // 关于 weight 的梯度 if (transpose_) { caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_, (Dtype)1., bottom_data, top_diff, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)1., this->blobs_[0]->mutable_cpu_diff()); } } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)1., this->blobs_[1]->mutable_cpu_diff()); } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bottom data if (transpose_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } else { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } } } #ifdef CPU_ONLY STUB_GPU(InnerProductLayer); #endif INSTANTIATE_CLASS(InnerProductLayer); REGISTER_LAYER_CLASS(InnerProduct); } // namespace caffe
 
 

 

Guess you like

Origin www.cnblogs.com/jianfeifeng/p/11010865.html