static int entry_index(layer l, int batch, int location, int entry)
{
int n = location / (l.w*l.h);
int loc = location % (l.w*l.h);
return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc;
}
box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
{
//(w,h) 输入图片尺寸 (lw,lh)当前特征图尺寸
box b;
b.x = (i + x[index + 0*stride]) / lw;
b.y = (j + x[index + 1*stride]) / lh;
b.w = exp(x[index + 2*stride]) * biases[2*n] / w;
b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
return b;
}
void forward_yolo_layer(const layer l, network_state state)
{
int i,j,b,t,n
memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float))
#ifndef GPU
for (b = 0
for(n = 0
int index = entry_index(l, b, n*l.w*l.h, 0)
activate_array(l.output + index, 2*l.w*l.h, LOGISTIC)
index = entry_index(l, b, n*l.w*l.h, 4)
activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC)
}
}
#endif
memset(l.delta, 0, l.outputs * l.batch * sizeof(float))
if(!state.train) return
float avg_iou = 0
float recall = 0
float recall75 = 0
float avg_cat = 0
float avg_obj = 0
float avg_anyobj = 0
int count = 0
int class_count = 0
*(l.cost) = 0
for (b = 0
for (j = 0
for (i = 0
for (n = 0
//内存布局: batch-anchor-xoffset-yoffset-w-h-objectness-classid
//xoffset,yoffset,bw,bh,objectness,classid的尺寸都是l.w * l.h
int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0)
//(i,j) 对应的第l.mask[n]个anchor的预测结果
box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h)
float best_iou = 0
int best_t = 0
//遍历图中所有groundtruth object ,找出和pred重合度最高的一个object
for(t = 0
box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1)
int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]
if (class_id >= l.classes) {
printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1)
getchar()
continue
}
if(!truth.x) break
float iou = box_iou(pred, truth)
if (iou > best_iou) {
best_iou = iou
best_t = t
}
}
int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4)
avg_anyobj += l.output[obj_index]
l.delta[obj_index] = 0 - l.output[obj_index]
if (best_iou > l.ignore_thresh) {
l.delta[obj_index] = 0
}
if (best_iou > l.truth_thresh) {
l.delta[obj_index] = 1 - l.output[obj_index]
int class_id = state.truth[best_t*(4 + 1) + b*l.truths + 4]
if (l.map) class_id = l.map[class_id]
int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1)
//计算classid的误差(one-hot-encoding,计算方法和objectness类似,但这里还支持focal_loss)
delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, 0, l.focal_loss)
box truth = float_to_box_stride(state.truth + best_t*(4 + 1) + b*l.truths, 1)
//计算box误差 (倒数第二个参数的意思是?)
delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h)
}
}
}
} //end of "for (j = 0; j < l.h; ++j)"
//下面这段代码遍历每个groundtruth object, 查找iou最大的anchor
//这和上面遍历所有grid的anchor,查找iou最大的groundtruth object作用一样,但如此这样可以避免anchor设置不合适,没有完全覆盖
//groundtruth object的情况?
for(t = 0
box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1)
int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]
if (class_id >= l.classes) continue
if(!truth.x) break
float best_iou = 0
int best_n = 0
i = (truth.x * l.w)
j = (truth.y * l.h)
box truth_shift = truth
truth_shift.x = truth_shift.y = 0
for(n = 0
box pred = {0}
pred.w = l.biases[2*n]/ state.net.w
pred.h = l.biases[2*n+1]/ state.net.h
float iou = box_iou(pred, truth_shift)
if (iou > best_iou){
best_iou = iou
best_n = n
}
}
int mask_n = int_index(l.mask, best_n, l.n)
if(mask_n >= 0){
int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0)
//和前面的计算一样,但读取了返回值iou
float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h)
int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4)
avg_obj += l.output[obj_index]
l.delta[obj_index] = 1 - l.output[obj_index]
int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]
if (l.map) class_id = l.map[class_id]
int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1)
//和前面的计算一样,但读取了一个状态信息avg_cat
delta_yolo_class(l.output, l.delta, class_index, class_id, l.classes, l.w*l.h, &avg_cat, l.focal_loss)
++count
++class_count
if(iou > .5) recall += 1
if(iou > .75) recall75 += 1
avg_iou += iou
}
}
}
//avg_iou和avg_cat如果差异过大,是不是说明配置有问题?
*(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2)
printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count)
}