Tensorflow版Faster RCNN源码解析(TFFRCNN) (13) gt_data_layer/roidb.py

本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记

---------------个人学习笔记---------------

----------------本文作者吴疆--------------

------点击此处链接至博客园原文------

定义函数与roi_data_layer/roidb.py类似

1.prepare_roidb(imdb)

给imdb.roidb增加'info_boxes'字段信息,shape为N*18,18表示(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target),未见调用

get_boxes_grid(image_height, image_width)(由C编译)未知函数意义

为何imdb.roidb中既有gt roi又有一般的roi???ex_rois和gt_rois???

默认cfg.TRAIN.SCALE_MAPPING值,将报错,该函数并未被执行

扫描二维码关注公众号,回复: 7005273 查看本文章
# 为imdb.roidb增加'info_boxes'字段信息
# 包含(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)共18列
def prepare_roidb(imdb):
    """
    Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
        return
    roidb = imdb.roidb
    # 遍历数据集每张图像
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        info_boxes = np.zeros((0, 18), dtype=np.float32)
        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue
        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        # 未知函数意义???
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
        # for each scale
        # 默认TRAIN.SCALES = (600,)
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            boxes_rescaled = boxes * scale
            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
            # rois与某类gt最大的overlap值
            max_overlaps = overlaps.max(axis = 1)
            argmax_overlaps = overlaps.argmax(axis = 1)
            # 对应的该类
            max_classes = labels[argmax_overlaps]
            # select positive boxes
            fg_inds = []
            for k in xrange(1, imdb.num_classes):
                fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])
            # 为何imdb.roidb中既有gt roi又有一般的roi???ex_rois和gt_rois???
            if len(fg_inds) > 0:
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
                # scale mapping for RoI pooling???
                # 无TRAIN.SCALE_MAPPING该值,将报错
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
                # contruct the list of positive boxes
                # 18对应存储的内容
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds,:]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))
        roidb[i]['info_boxes'] = info_boxes

2.add_bbox_regression_targets(roidb)

规范化目标回归值,并返回目标回归值的均值和标准差供测试阶段使用,未见调用

# 规范化目标回归值,并返回目标回归值的均值和标准差供测试阶段使用
def add_bbox_regression_targets(roidb):
    """Add information needed to train bounding-box regressors."""
    assert len(roidb) > 0
    assert 'info_boxes' in roidb[0], 'Did you call prepare_roidb first?'
    num_images = len(roidb)
    # Infer number of classes from the number of columns in gt_overlaps
    num_classes = roidb[0]['gt_overlaps'].shape[1]
    # Compute values needed for means and stds
    # var(x) = E(x^2) - E(x)^2
    class_counts = np.zeros((num_classes, 1)) + cfg.EPS
    sums = np.zeros((num_classes, 4))
    squared_sums = np.zeros((num_classes, 4))
    # (cx, cy, scale_ind, 4box, scale_ind_map, 4box_map, gt_label, gt_sublabel, 4target)共18列
    for im_i in xrange(num_images):
        targets = roidb[im_i]['info_boxes']
        for cls in xrange(1, num_classes):
            cls_inds = np.where(targets[:, 12] == cls)[0]
            if cls_inds.size > 0:
                class_counts[cls] += cls_inds.size
                sums[cls, :] += targets[cls_inds, 14:].sum(axis=0)
                squared_sums[cls, :] += (targets[cls_inds, 14:] ** 2).sum(axis=0)
    # 计算回归目标值的均值和标准差
    means = sums / class_counts
    stds = np.sqrt(squared_sums / class_counts - means ** 2)
    # Normalize targets
    # 规范化目标回归值
    for im_i in xrange(num_images):
        targets = roidb[im_i]['info_boxes']
        for cls in xrange(1, num_classes):
            cls_inds = np.where(targets[:, 12] == cls)[0]
            roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :]
            if stds[cls, 0] != 0:
                roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :]
    # These values will be needed for making predictions
    # (the predicts will need to be unnormalized and uncentered)
    #  ravel()将多维降为1维
    return means.ravel(), stds.ravel()

3._compute_target(ex_rois, gt_rois)

根据ex_rois和gt_rois计算回归目标值,与bbox_transform.py中bbox_transform(ex_rois, gt_rois)函数类似,被prepare_roidb(...)函数调用

# 计算回归目标值,与bbox_transform.py中函数类似
def _compute_targets(ex_rois, gt_rois):
    """Compute bounding-box regression targets for an image. The targets are scale invariance"""
    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
    ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
    ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights

    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights

    targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = np.log(gt_widths / ex_widths)
    targets_dh = np.log(gt_heights / ex_heights)

    targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32)
    targets[:, 0] = targets_dx
    targets[:, 1] = targets_dy
    targets[:, 2] = targets_dw
    targets[:, 3] = targets_dh
    return targets

猜你喜欢

转载自www.cnblogs.com/deeplearning1314/p/11325014.html