darknet-router层

darknet中router层是用来合并前面若干层的输出,作为一个新层,其本质上就是一个复制操作

配置

[route]
layers = -1, 8

cfg文件中如上指定route层,需要合并CURRENT_LAYRE_INDEX-1和第8层的输出,其中CURRENT_LAYER_INDEX是当前route层索引

加载

route_layer parse_route(list *options, size_params params, network net)
{
    char *l = option_find(options, "layers");   
    int len = strlen(l);
    if(!l) error("Route Layer must specify input layers");
    int n = 1;
    int i;
    for(i = 0; i < len; ++i){
        //逗号分隔
        if (l[i] == ',') ++n; 
    }

    int *layers = calloc(n, sizeof(int));
    int *sizes = calloc(n, sizeof(int));
    for(i = 0; i < n; ++i){
        int index = atoi(l);
        l = strchr(l, ',')+1;
         //如果是负数,则相对当前层索引做偏移
        if(index < 0) index = params.index + index; 
        layers[i] = index;
        sizes[i] = net.layers[index].outputs;
    }
    int batch = params.batch;

    //构造router层
    route_layer layer = make_route_layer(batch, n, layers, sizes); 

    //router层应该是要求合并层的特征的w,h保持一致,合并只是在通道c维度上进行
    convolutional_layer first = net.layers[layers[0]];
    layer.out_w = first.out_w;
    layer.out_h = first.out_h;
    layer.out_c = first.out_c;
    for(i = 1; i < n; ++i){
        int index = layers[i];
        convolutional_layer next = net.layers[index];
        if(next.out_w == first.out_w && next.out_h == first.out_h){
            layer.out_c += next.out_c; 
        }else{
            layer.out_h = layer.out_w = layer.out_c = 0; 
        }
    }

    return layer;
}

前向函数

从指定的层复制特征图,结果是按样本存储
这里写图片描述

void forward_route_layer(const route_layer l, network_state state)
{
    int i, j;
    int offset = 0;
    for(i = 0; i < l.n; ++i){
        int index = l.input_layers[i];  //ȡ layer index
        float *input = state.net.layers[index].output;  
        int input_size = l.input_sizes[i];
        for(j = 0; j < l.batch; ++j){
            copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); 
        }
        offset += input_size;
    }
}

后向函数

把router层delta复制到指定层

void backward_route_layer(const route_layer l, network_state state)
{
    int i, j;
    int offset = 0;
    for(i = 0; i < l.n; ++i){
        int index = l.input_layers[i];
        float *delta = state.net.layers[index].delta;
        int input_size = l.input_sizes[i];
        for(j = 0; j < l.batch; ++j){
            axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1);
        }
        offset += input_size;
    }
}

猜你喜欢

转载自blog.csdn.net/z0n1l2/article/details/80740083