Tensorflow version Faster RCNN source parsing (TFFRCNN) (13) gt_data_layer / roidb.py

This blog is on github CharlesShang / TFFRCNN version of the source code for parsing Series Notes

--------------- personal study notes ---------------

---------------- The author Wu Jiang --------------

------ Click here to link to the original blog Park ------

 

Defined functions and roi_data_layer / roidb.py similar

1.prepare_roidb(imdb)

To increase imdb.roidb 'info_boxes' information field, shape of * N 18, 18 represents a (cx, cy, scale_ind, box , scale_ind_map, box_map, gt_label, gt_sublabel, target), no call

get_boxes_grid (IMAGE_HEIGHT, image_width) (by the compiler C) meaning unknown function

Why imdb.roidb in general have both gt roi roi? ? ? ex_rois and gt_rois? ? ?

Default no cfg.TRAIN.SCALE_MAPPING value, an error, this function has not been executed

# 为imdb.roidb增加'info_boxes'字段信息
# 包含(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)共18列
def prepare_roidb(imdb):
    """
    Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
        return
    roidb = imdb.roidb
    # 遍历数据集每张图像
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        info_boxes = np.zeros((0, 18), dtype=np.float32)
        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue
        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        # 未知函数意义???
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
        # for each scale
        #Default = TRAIN.SCALES (600,) 
        for  scale_ind, scale in the enumerate (cfg.TRAIN.SCALES): 
            boxes_rescaled = Boxes * Scale
             # Compute Coverlap 
            Overlaps = bbox_overlaps (boxes_grid.astype (np.float), boxes_rescaled.astype (np.float))
             # ROIs overlap with certain gt maximum value 
            max_overlaps = overlaps.max (Axis =. 1 ) 
            argmax_overlaps = overlaps.argmax (Axis =. 1 )
             # corresponding to the class 
            max_classes = Labels [argmax_overlaps]
             # SELECT positive Boxes 
            fg_inds = []
            for K inxrange (. 1 , imdb.num_classes): 
                fg_inds.extend (np.where ((max_classes == K) & (max_overlaps> = cfg.TRAIN.FG_THRESH)) [0])
             # why in both gt imdb.roidb roi roi another general? ? ? ex_rois and gt_rois? ? ? 
            IF len (fg_inds)> 0: 
                gt_inds = argmax_overlaps [fg_inds]
                 # bounding Box Regression Targets 
                gt_targets = _compute_targets (boxes_grid [fg_inds ,:], boxes_rescaled [gt_inds ,:])
                 # Scale Mapping for the RoI Pooling? ? ? 
                # No TRAIN.SCALE_MAPPING this value, the error 
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING [scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
                # contruct the list of positive boxes
                # 18对应存储的内容
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds,:]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))
        roidb[i]['info_boxes'] = info_boxes

2.add_bbox_regression_targets(roidb)

Standardization of target return value and returns the return value of the target mean and standard deviation for the use of the testing phase, no calls

# Standardization target return value and returns the mean and standard target return value of the difference for the testing phase using the 
DEF add_bbox_regression_targets (roidb):
     "" " the Add Information needed to Train bounding-Box regressors. " "" 
    The Assert len (roidb)> 0
     the Assert  ' info_boxes '  in roidb [0], ' Did you prepare_roidb First Call? ' 
    NUM_IMAGES = len (roidb)
     # Infer classes from The Number of Number of Columns in gt_overlaps 
    num_classes roidb = [0] [ ' gt_overlaps ' ].shape[1]
    # Compute values needed for means and stds
    # var(x) = E(x^2) - E(x)^2
    class_counts = np.zeros((num_classes, 1)) + cfg.EPS
    sums = np.zeros((num_classes, 4))
    squared_sums = np.zeros((num_classes, 4))
    # (cx, cy, scale_ind, 4box, scale_ind_map, 4box_map, gt_label, gt_sublabel, 4target)共18列
    for im_i in xrange(num_images):
        targets = roidb[im_i]['info_boxes']
        for cls in xrange(1, num_classes):
            cls_inds = np.where(targets[:, 12] == cls)[0]
            ifcls_inds.size> 0: 
                class_counts [CLS] + = cls_inds.size 
                sums [CLS,:] + = Targets [cls_inds, 14:] SUM (Axis =. 0) 
                squared_sums [CLS,:] + = (Targets [cls_inds, 14:] ** 2) .sum (Axis = 0)
     # calculation of the regression mean and standard deviation of the target 
    means sums = / class_counts 
    STDs = np.sqrt (squared_sums / class_counts - means ** 2 )
     # the Normalize targets 
    # normalized target return values 
    for im_i in xrange (NUM_IMAGES): 
        Targets = roidb [im_i] [ ' info_boxes ']
        for cls in xrange(1, num_classes):
            cls_inds = np.where(targets[:, 12] == cls)[0]
            roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :]
            if stds[cls, 0] != 0:
                roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :]
    # These values will be needed for making predictions
    # (the predicts will need to be unnormalized and uncentered)
    #  ravel()将多维降为1维
    return means.ravel(), stds.ravel()

3._compute_target(ex_rois, gt_rois)

According ex_rois and gt_rois calculate the return target, and bbox_transform.py in bbox_transform (ex_rois, gt_rois) similar function, is (...) function call prepare_roidb

# 计算回归目标值,与bbox_transform.py中函数类似
def _compute_targets(ex_rois, gt_rois):
    """Compute bounding-box regression targets for an image. The targets are scale invariance"""
    ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS
    ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS
    ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
    ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights

    gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS
    gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS
    gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
    gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights

    targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
    targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
    targets_dw = np.log(gt_widths / ex_widths)
    targets_dh = np.log(gt_heights / ex_heights)

    targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32)
    targets[:, 0] = targets_dx
    targets[:, 1] = targets_dy
    targets[:, 2] = targets_dw
    targets[:, 3] = targets_dh
    return targets

Guess you like

Origin www.cnblogs.com/deeplearning1314/p/11325014.html