This blog is on github CharlesShang / TFFRCNN version of the source code for parsing Series Notes
--------------- personal study notes ---------------
---------------- The author Wu Jiang --------------
------ Click here to link to the original blog Park ------
Defined functions and roi_data_layer / roidb.py similar
1.prepare_roidb(imdb)
To increase imdb.roidb 'info_boxes' information field, shape of * N 18, 18 represents a (cx, cy, scale_ind, box , scale_ind_map, box_map, gt_label, gt_sublabel, target), no call
get_boxes_grid (IMAGE_HEIGHT, image_width) (by the compiler C) meaning unknown function
Why imdb.roidb in general have both gt roi roi? ? ? ex_rois and gt_rois? ? ?
Default no cfg.TRAIN.SCALE_MAPPING value, an error, this function has not been executed
# 为imdb.roidb增加'info_boxes'字段信息 # 包含(cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)共18列 def prepare_roidb(imdb): """ Enrich the imdb's roidb by adding some derived quantities that are useful for training. This function precomputes the maximum overlap, taken over ground-truth boxes, between each ROI and each ground-truth box. The class with maximum overlap is also recorded. """ cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: imdb._roidb = cPickle.load(fid) print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file) return roidb = imdb.roidb # 遍历数据集每张图像 for i in xrange(len(imdb.image_index)): roidb[i]['image'] = imdb.image_path_at(i) boxes = roidb[i]['boxes'] labels = roidb[i]['gt_classes'] info_boxes = np.zeros((0, 18), dtype=np.float32) if boxes.shape[0] == 0: roidb[i]['info_boxes'] = info_boxes continue # compute grid boxes s = PIL.Image.open(imdb.image_path_at(i)).size image_height = s[1] image_width = s[0] # 未知函数意义??? boxes_grid, cx, cy = get_boxes_grid(image_height, image_width) # for each scale #Default = TRAIN.SCALES (600,) for scale_ind, scale in the enumerate (cfg.TRAIN.SCALES): boxes_rescaled = Boxes * Scale # Compute Coverlap Overlaps = bbox_overlaps (boxes_grid.astype (np.float), boxes_rescaled.astype (np.float)) # ROIs overlap with certain gt maximum value max_overlaps = overlaps.max (Axis =. 1 ) argmax_overlaps = overlaps.argmax (Axis =. 1 ) # corresponding to the class max_classes = Labels [argmax_overlaps] # SELECT positive Boxes fg_inds = [] for K inxrange (. 1 , imdb.num_classes): fg_inds.extend (np.where ((max_classes == K) & (max_overlaps> = cfg.TRAIN.FG_THRESH)) [0]) # why in both gt imdb.roidb roi roi another general? ? ? ex_rois and gt_rois? ? ? IF len (fg_inds)> 0: gt_inds = argmax_overlaps [fg_inds] # bounding Box Regression Targets gt_targets = _compute_targets (boxes_grid [fg_inds ,:], boxes_rescaled [gt_inds ,:]) # Scale Mapping for the RoI Pooling? ? ? # No TRAIN.SCALE_MAPPING this value, the error scale_ind_map = cfg.TRAIN.SCALE_MAPPING [scale_ind] scale_map = cfg.TRAIN.SCALES[scale_ind_map] # contruct the list of positive boxes # 18对应存储的内容 # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target) info_box = np.zeros((len(fg_inds), 18), dtype=np.float32) info_box[:, 0] = cx[fg_inds] info_box[:, 1] = cy[fg_inds] info_box[:, 2] = scale_ind info_box[:, 3:7] = boxes_grid[fg_inds,:] info_box[:, 7] = scale_ind_map info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale info_box[:, 12] = labels[gt_inds] info_box[:, 14:] = gt_targets info_boxes = np.vstack((info_boxes, info_box)) roidb[i]['info_boxes'] = info_boxes
2.add_bbox_regression_targets(roidb)
Standardization of target return value and returns the return value of the target mean and standard deviation for the use of the testing phase, no calls
# Standardization target return value and returns the mean and standard target return value of the difference for the testing phase using the DEF add_bbox_regression_targets (roidb): "" " the Add Information needed to Train bounding-Box regressors. " "" The Assert len (roidb)> 0 the Assert ' info_boxes ' in roidb [0], ' Did you prepare_roidb First Call? ' NUM_IMAGES = len (roidb) # Infer classes from The Number of Number of Columns in gt_overlaps num_classes roidb = [0] [ ' gt_overlaps ' ].shape[1] # Compute values needed for means and stds # var(x) = E(x^2) - E(x)^2 class_counts = np.zeros((num_classes, 1)) + cfg.EPS sums = np.zeros((num_classes, 4)) squared_sums = np.zeros((num_classes, 4)) # (cx, cy, scale_ind, 4box, scale_ind_map, 4box_map, gt_label, gt_sublabel, 4target)共18列 for im_i in xrange(num_images): targets = roidb[im_i]['info_boxes'] for cls in xrange(1, num_classes): cls_inds = np.where(targets[:, 12] == cls)[0] ifcls_inds.size> 0: class_counts [CLS] + = cls_inds.size sums [CLS,:] + = Targets [cls_inds, 14:] SUM (Axis =. 0) squared_sums [CLS,:] + = (Targets [cls_inds, 14:] ** 2) .sum (Axis = 0) # calculation of the regression mean and standard deviation of the target means sums = / class_counts STDs = np.sqrt (squared_sums / class_counts - means ** 2 ) # the Normalize targets # normalized target return values for im_i in xrange (NUM_IMAGES): Targets = roidb [im_i] [ ' info_boxes '] for cls in xrange(1, num_classes): cls_inds = np.where(targets[:, 12] == cls)[0] roidb[im_i]['info_boxes'][cls_inds, 14:] -= means[cls, :] if stds[cls, 0] != 0: roidb[im_i]['info_boxes'][cls_inds, 14:] /= stds[cls, :] # These values will be needed for making predictions # (the predicts will need to be unnormalized and uncentered) # ravel()将多维降为1维 return means.ravel(), stds.ravel()
3._compute_target(ex_rois, gt_rois)
According ex_rois and gt_rois calculate the return target, and bbox_transform.py in bbox_transform (ex_rois, gt_rois) similar function, is (...) function call prepare_roidb
# 计算回归目标值,与bbox_transform.py中函数类似 def _compute_targets(ex_rois, gt_rois): """Compute bounding-box regression targets for an image. The targets are scale invariance""" ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + cfg.EPS ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + cfg.EPS ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + cfg.EPS gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + cfg.EPS gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights targets_dw = np.log(gt_widths / ex_widths) targets_dh = np.log(gt_heights / ex_heights) targets = np.zeros((ex_rois.shape[0], 4), dtype=np.float32) targets[:, 0] = targets_dx targets[:, 1] = targets_dy targets[:, 2] = targets_dw targets[:, 3] = targets_dh return targets