yolo里的dropout实现代码解析

1、在parser.c文件里其在声明dropout层时,直接制定了其输出层的内存地址是上一层的输出地址,代码如下:

#ifndef pca
    fprintf(stderr, "layer     filters    size              input                output\n");
#else
	fprintf(stderr, "加载数据!!");
#endif
    while(n){
        params.index = count;
       // fprintf(stderr, "%5d ", count);
        s = (section *)n->val;
        options = s->options;
        layer l = {0};
        LAYER_TYPE lt = string_to_layer_type(s->type);
        if(lt == CONVOLUTIONAL){
            l = parse_convolutional(options, params);
        }else if(lt == LOCAL){
            l = parse_local(options, params);
        }else if(lt == ACTIVE){
            l = parse_activation(options, params);
        }else if(lt == RNN){
            l = parse_rnn(options, params);
        }else if(lt == GRU){
            l = parse_gru(options, params);
        }else if(lt == CRNN){
            l = parse_crnn(options, params);
        }else if(lt == CONNECTED){
            l = parse_connected(options, params);
        }else if(lt == CROP){
            l = parse_crop(options, params);
        }else if(lt == COST){
            l = parse_cost(options, params);
        }else if(lt == REGION){
            l = parse_region(options, params);
        }else if(lt == DETECTION){
            l = parse_detection(options, params);
        }else if(lt == SOFTMAX){
            l = parse_softmax(options, params);
            net.hierarchy = l.softmax_tree;
        }else if(lt == NORMALIZATION){
            l = parse_normalization(options, params);
        }else if(lt == BATCHNORM){
            l = parse_batchnorm(options, params);
        }else if(lt == MAXPOOL){
            l = parse_maxpool(options, params);
        }else if(lt == REORG){
            l = parse_reorg(options, params);
        }else if(lt == AVGPOOL){
            l = parse_avgpool(options, params);
        }else if(lt == ROUTE){
            l = parse_route(options, params, net);
        }else if(lt == SHORTCUT){
            l = parse_shortcut(options, params, net);
        }else if(lt == DROPOUT) {
            l = parse_dropout(options, params);
            l.output = net.layers[count-1].output;
            l.delta = net.layers[count-1].delta;

而其前向传播代码在dropout_layer.c里:

void forward_dropout_layer(dropout_layer l, network_state state)
{
    int i;
    if (!state.train) return;
    for(i = 0; i < l.batch * l.inputs; ++i){
        float r = rand_uniform(0, 1);
        l.rand[i] = r;
        if(r < l.probability) 
			//把神经元赋值为0,则是相当于dropout了
			state.input[i] = 0;
        else
			//这里是把没有抑制的进行rescale,网上说是为了保证原来网络的归一化特性。
			state.input[i] *= l.scale;
    }
}

这里的state.input跟l.output的内存地址是一至的。因为在整体网络前向传播的代码里,可以知道state.input的地址是上一层网络的输出值地址,在network.c如下:

void forward_network(network net, network_state state)
{
	state.workspace = net.workspace;
	int i;
	for (i = 0; i < net.n; ++i){
		state.index = i;
		layer l = net.layers[i]; //开始新的网络读取
		if (l.delta){
			scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
		}
		l.forward(l, state);
		//把上一层l的输出作为下一层网络state的输入,这里的input跟上一层的output是同地址,这是内存地址赋予另一变量
		state.input = l.output;
	}
}


猜你喜欢

转载自blog.csdn.net/yangdashi888/article/details/79522004