Tensorflow版Faster RCNN源码解析(TFFRCNN) (11) gt_data_layer/minibatch.py

本blog为github上CharlesShang/TFFRCNN版源码解析系列代码笔记

---------------个人学习笔记---------------

----------------本文作者吴疆--------------

------点击此处链接至博客园原文------

与roi_data_layer/minibatch.py类似,该代码段部分函数可能并未执行

"""Compute minibatch blobs for training a Fast R-CNN network."""

1.get_minibatch(roidb, num_classes)

更新roidb[i]'info_boxes'字段(未知内容,18是什么意思)、增加'data'(图像数据blob)和'parameters'字段(相关参数,含num_scale 图像缩放尺度数量、num_aspect 使用纵横比数量、cfg.TRAIN.SCALES、cfg.TRAIN.SCALE_MAPPING、cfg.TRAIN.ASPECT_HEIGHTS、cfg.TRAIN.ASPECT_WIDTHS后面3个值均无应该会报错,也有可能该函数并未执行),被_get_next_minibatch(...)调用(gt_data_layer/layer.py中)

# 更新roidb[i]'info_boxes'字段、增加'data'和'parameters'字段
def get_minibatch(roidb, num_classes):
    """Given a roidb, construct a minibatch sampled from it."""
    num_images = len(roidb)
    # 默认TRAIN.BATCH_SIZE = 128
    assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
        'num_images ({}) must divide BATCH_SIZE ({})'. \
        format(num_images, cfg.TRAIN.BATCH_SIZE)
    # Get the input image blob, formatted for caffe
    im_blob = _get_image_blob(roidb)

    # build the box information blob
    # 这里的18写死,指的是什么???
    info_boxes_blob = np.zeros((0, 18), dtype=np.float32)
    # 默认TRAIN.SCALES = (600,)
    num_scale = len(cfg.TRAIN.SCALES)
    for i in xrange(num_images):
        info_boxes = roidb[i]['info_boxes']
        # change the batch index
        # 为何要这样处理???第3、8列各自代表什么???
        info_boxes[:,2] += i * num_scale
        info_boxes[:,7] += i * num_scale
        info_boxes_blob = np.vstack((info_boxes_blob, info_boxes))

    # build the parameter blob
    # 默认TRAIN.ASPECTS= (1,)仅一个???  (Aspect ratio to use during training)
    num_aspect = len(cfg.TRAIN.ASPECTS)
    num = 2 + 2 * num_scale + 2 * num_aspect   # 6?
    # parameters_blob存储以下参数
    # num_scale 图像缩放尺度数量  len(cfg.TRAIN.SCALES) = 1
    # num_aspect 使用纵横比数量   len(cfg.TRAIN.ASPECTS) = 1
    # cfg.TRAIN.SCALES  (600,)
    # cfg.TRAIN.SCALE_MAPPING  无该值 按理会触发error???或许该函数未被调用
    # cfg.TRAIN.ASPECT_HEIGHTS 无该值 按理会触发error???
    # cfg.TRAIN.ASPECT_WIDTHS  无该值 按理会触发error???
    parameters_blob = np.zeros((num), dtype=np.float32)
    parameters_blob[0] = num_scale
    parameters_blob[1] = num_aspect
    parameters_blob[2:2+num_scale] = cfg.TRAIN.SCALES
    parameters_blob[2+num_scale:2+2*num_scale] = cfg.TRAIN.SCALE_MAPPING
    parameters_blob[2+2*num_scale:2+2*num_scale+num_aspect] = cfg.TRAIN.ASPECT_HEIGHTS
    parameters_blob[2+2*num_scale+num_aspect:2+2*num_scale+2*num_aspect] = cfg.TRAIN.ASPECT_WIDTHS
    # For debug visualizations
    # _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)
    blobs = {'data': im_blob,
             'info_boxes': info_boxes_blob,
             'parameters': parameters_blob}
    return blobs

2._get_image_blob(roidb)

对传入的roidb中图像减均值、缩放处理,得到处理后的图像存储到processes_ims列表中,将其作为参数传入im_list_to_blob(...)函数中返回图像数据blob,被get_minibatch(...)函数调用,构成blobs中的‘data’字段

与roi_data_layer/minibatch.py(仅使用了target_size单一尺度进行缩放)此函数的区别在于缩放使用了多尺度TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0),为何要使用多尺度?未见调用

def _get_image_blob(roidb):
    """Builds an input blob from the images in the roidb at the different scales."""
    num_images = len(roidb)
    # 存储缩放后的图像构成的列表,作为参数传入im_list_to_blob(...)函数得到图像数据blob
    processed_ims = []
    for i in xrange(num_images):
        # read image
        im = cv2.imread(roidb[i]['image'])
        if roidb[i]['flipped']:
            im = im[:, ::-1, :]
        im_orig = im.astype(np.float32, copy=True)
        im_orig -= cfg.PIXEL_MEANS
        # build image pyramid
        # 与roi_data_layer/minibatch.py中_get_image_blob(...)区别在此!!!
        # 默认TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0)
        # 为何这里使用了多尺度???(Scales to compute real features)
        for im_scale in cfg.TRAIN.SCALES_BASE:
            im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
                        interpolation=cv2.INTER_LINEAR)
            processed_ims.append(im)
    # Create a blob to hold the input images,blob.py中
    blob = im_list_to_blob(processed_ims)
    return blob 

3._project_image_blob(im_rois, im_scale_factor)

 对rois进行缩放,未见调用

def _project_im_rois(im_rois, im_scale_factor):
    """Project image RoIs into the rescaled training image."""
    rois = im_rois * im_scale_factor
    return rois

4._get_bbox_regression_labels(bbox_target_data, num_classes)

扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值(网络接受的shape)、构造N*(4*num_classes)的bbox_loss_weights,返回 bbox_targets和bbox_loss_weights,未见调用

# 扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值
# 构造N*(4*num_classes)的bbox_loss_weights
def _get_bbox_regression_labels(bbox_target_data, num_classes):
    """
    Bounding-box regression targets are stored in a compact紧密的,紧凑的 form in the roidb.
    This function expands those targets into the 4-of-4*K representation used
    by the network (i.e. only one class has non-zero targets). The loss weights
    are similarly expanded.
    Returns:
        bbox_target_data (ndarray): N x 4K blob of regression targets
        bbox_loss_weights (ndarray): N x 4K blob of loss weights
    """
    clss = bbox_target_data[:, 0]
    bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
    bbox_loss_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
    inds = np.where(clss > 0)[0]  # 剔除bg
    for ind in inds:
        cls = clss[ind]
        start = 4 * cls
        end = start + 4
        # 扩充N*5 bbox_targets to N*(4*num_classes)仅某类有非0的回归目标值
        bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
        # shape为N*(4*num_classes),仅某类对应值为1 1 1 1,其余全0
        bbox_loss_weights[ind, start:end] = [1., 1., 1., 1.]
    return bbox_targets, bbox_loss_weights

5._vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob)

绘制roi矩形框,打印相关信息,未见调用

# 绘制roi矩形框,打印相关信息
def _vis_minibatch(im_blob, rois_blob, labels_blob, sublabels_blob):
    """Visualize a mini-batch for debugging."""
    import matplotlib.pyplot as plt
    for i in xrange(rois_blob.shape[0]):
        # 1(roi来源索引)+4(roi坐标)
        rois = rois_blob[i, :]
        # 该roi来源图像索引 
        im_ind = rois[0]
        roi = rois[2:]
        im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
        im += cfg.PIXEL_MEANS
        im = im[:, :, (2, 1, 0)]
        im = im.astype(np.uint8)
        cls = labels_blob[i]
        subcls = sublabels_blob[i]
        plt.imshow(im)
        print 'class: ', cls, ' subclass: ', subcls
        plt.gca().add_patch(
            plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
                          roi[3] - roi[1], fill=False,
                          edgecolor='r', linewidth=3)
            )
        plt.show()

猜你喜欢

转载自www.cnblogs.com/deeplearning1314/p/11325018.html