triplet hard mining-xiaolong wang

1.
Prequel process

template <typename Dtype>
void RankHardLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {

    const Dtype* bottom_data = bottom[0]->cpu_data();  //输入数据(numx1024),也即网络最后的输出
    const Dtype* label = bottom[1]->cpu_data();        //输入的num个样本的标签
    int count = bottom[0]->count();                    //总的元素数量
    int num = bottom[0]->num();                        //一个batch图片数量
    int dim = bottom[0]->count() / bottom[0]->num();   //


    RankParameter rank_param = this->layer_param_.rank_param();
    int neg_num = rank_param.neg_num();      // 4
    int pair_size = rank_param.pair_size();  // 2
    float hard_ratio = rank_param.hard_ratio();   //0.5
    float rand_ratio = rank_param.rand_ratio();   //0.5
    float margin = rank_param.margin();           //1
    Dtype* dis_data = dis_.mutable_cpu_data();    //存储num个样本之间的距离
    Dtype* mask_data = mask_.mutable_cpu_data();  //存储选取的负样本和正样本。如果i=2,j=7对应的mask
                                                  //设置为1,那么2,3(i+1),7组成一个三元组

    set_mask(bottom);                             //triplet的选取
    Dtype loss = 0;
    int cnt = neg_num * num / pair_size * 2;      //cnt最后代表的是总的样本量

    for(int i = 0; i < num; i += pair_size)
    {
        for(int j = 0; j < num; j ++)
        {
            if(mask_data[i * num + j] == 0) continue;
            Dtype tloss1 = max(Dtype(0), dis_data[i * num + i + 1] - dis_data[i * num + j] + Dtype(margin));
            Dtype tloss2 = max(Dtype(0), dis_data[i * num + i + 1] - dis_data[(i + 1) * num + j] + Dtype(margin));
            loss += tloss1 + tloss2;              //计算损失,损失等于两个正样本与负样本之间的两个
                                                  //tripletloss之和                        
        }
    }

    loss = loss / cnt;
    top[0]->mutable_cpu_data()[0] = loss;
}

Reverse transmission, the loss function has no parameters, only need to calculate the gradient of the input
1) cnt: means the total number of samples, assuming that N similar two-tuples are input, then each two-tuple goes back to the rest of the other categories Looking for negative samples in the binary group, four negative samples will be found (neg_num=4), so the total number of samples should be (num/2)*4*2, which is consistent with the expression below

template <typename Dtype>
void RankHardLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {


    const Dtype* bottom_data = bottom[0]->cpu_data();
    const Dtype* label = bottom[1]->cpu_data();
    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
    int count = bottom[0]->count();
    int num = bottom[0]->num();
    int dim = bottom[0]->count() / bottom[0]->num();


    RankParameter rank_param = this->layer_param_.rank_param();
    int neg_num = rank_param.neg_num();
    int pair_size = rank_param.pair_size();
    float hard_ratio = rank_param.hard_ratio();
    float rand_ratio = rank_param.rand_ratio();
    float margin = rank_param.margin();

    Dtype* dis_data = dis_.mutable_cpu_data();                //获得存储的梯度地址
    Dtype* mask_data = mask_.mutable_cpu_data();              //获取triplet样本对的信息

    for(int i = 0; i < count; i ++ )
        bottom_diff[i] = 0;

    int cnt = neg_num * num / pair_size * 2;                 //所有样本的数量

    for(int i = 0; i < num; i += pair_size)                  //正样本对按照对来存储
    {
        const Dtype* fori = bottom_data + i * dim;
        const Dtype* fpos = bottom_data + (i + 1) * dim;

        Dtype* fori_diff = bottom_diff + i * dim;
        Dtype* fpos_diff = bottom_diff + (i + 1) * dim;
        for(int j = 0; j < num; j ++)                      //遍历所有样本,一旦发现是之前set-mask留下
                                                           //的负样本,就开始计算梯度
        {
            if(mask_data[i * num + j] == 0) continue;      //不属于该triplet的负样本不参与梯度更新
            Dtype tloss1 = max(Dtype(0), dis_data[i * num + i + 1] - dis_data[i * num + j] + Dtype(margin));
            Dtype tloss2 = max(Dtype(0), dis_data[i * num + i + 1] - dis_data[(i + 1) * num + j] + Dtype(margin));

            const Dtype* fneg = bottom_data + j * dim;
            Dtype* fneg_diff = bottom_diff + j * dim;
            if(tloss1 > 0)      
            {
                for(int k = 0; k < dim; k ++)          //因为对输入做梯度计算,所以每一个元素都要照顾到
                {
                    fori_diff[k] += (fneg[k] - fpos[k]); // / (pairNum * 1.0 - 2.0);
                    fpos_diff[k] += -fori[k]; // / (pairNum * 1.0 - 2.0);
                    fneg_diff[k] +=  fori[k];
                }
            }
            if(tloss2 > 0)
            {
                for(int k = 0; k < dim; k ++)
                {
                    fori_diff[k] += -fpos[k]; // / (pairNum * 1.0 - 2.0);
                    fpos_diff[k] += fneg[k]-fori[k]; // / (pairNum * 1.0 - 2.0);
                    fneg_diff[k] += fpos[k];
                }
            }

        }
    }

    for (int i = 0; i < count; i ++)
    {
        bottom_diff[i] = bottom_diff[i] / cnt;
    }

}

write picture description here

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325741979&siteId=291194637