mmdetection的train.py解析(以retinaNet为例)

Mmdetection/tools/train的执行流程

Mmdetection/tools/train的执行流程

tools\train.py
  
train_detector(
        model,
        datasets,
        cfg,
        distributed=distributed,
        validate=args.validate,
        logger=logger)
mmdet/apis/train.py
    _dist_train(model, dataset, cfg, validate=validate)
         构造dataloader,model,optimizer,runner
         注册了很多的hook函数
       执行runner.run(data_loaders, cfg.workflow, cfg.total_epochs)
/mmcv/runner/runner.py  
          def run(self, data_loaders, workflow, max_epochs, **kwargs):
                while self.epoch < max_epochs:
                          epoch_runner = getattr(self, mode)
                           epoch_runner(data_loaders[i], **kwargs)
        def train(self, data_loader, **kwargs):         
	        for i, data_batch in enumerate(data_loader):
	            self._inner_iter = i
	            self.call_hook('before_train_iter')
	            outputs = self.batch_processor(
	                self.model, data_batch, train_mode=True, **kwargs)
	                self.outputs = outputs
	            self.call_hook('after_train_iter')
            self._iter += 1
            self.call_hook('after_train_epoch')
            可以通过注册的这些hook函数：
            for hook in self._hooks:
                getattr(hook, fn_name)(self)

模型的构造：

mmdetection-rotated/mmdet/apis/train.py

def _dist_train(model, dataset, cfg, validate=False):
    model = MMDistributedDataParallel(model.cuda())

如果模型是RetinaNet，则执行

mmdetection-rotated/mmdet/models/detectors/retinanet.py

如果是FasterRcnn,则执行

mmdet/models/detectors/faster_rcnn.py

以RetinaNet为例：（继承SingleStageDetector:）

初始化：

mmdetection-rotated/mmdet/models/detectors/retinanet.py

class RetinaNet(SingleStageDetector):

    def __init__(self,
                 backbone,
                 neck,
                 bbox_head,
                 train_cfg=None,
                 test_cfg=None,
                 pretrained=None):
        super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
                                        test_cfg, pretrained)

mmdetection-rotated/mmdet/models/detectors/single_stage.py

class SingleStageDetector(BaseDetector):

    def __init__(self,
                 backbone,
                 neck=None,
                 bbox_head=None,
                 train_cfg=None,
                 test_cfg=None,
                 pretrained=None):
        super(SingleStageDetector, self).__init__()
        self.backbone = builder.build_backbone(backbone)
        if neck is not None:
            self.neck = builder.build_neck(neck)
        self.bbox_head = builder.build_head(bbox_head)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.init_weights(pretrained=pretrained)

按照配置文件中，依次构造backbone,neck,bbox_head.

以构造bbox_head为例：

配置文件：
bbox_head=dict(
        type='RetinaHead',
        num_classes=2,
        in_channels=256,
        stacked_convs=4,
        feat_channels=256,
        octave_base_scale=4,
        scales_per_octave=3,
        anchor_ratios=[0.3, 0.5, 0.8],
        anchor_angle=[-60, -30, 0, 30, 60, 90],
        anchor_strides=[8, 16, 32, 64, 128],
        target_means=[.0, .0, .0, .0, .0],
        target_stds=[1.0, 1.0, 1.0, 1.0, 1.0],
        loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))

#根据type='RetinaHead'来调用响应的构造函数
self.bbox_head = builder.build_head(bbox_head)

调用mmdetection-rotated/mmdet/models/anchor_heads/retina_head.py
class RetinaHead(AnchorHead):

    def __init__(self,
                 num_classes,
                 in_channels,
                 stacked_convs=4,
                 octave_base_scale=4,
                 scales_per_octave=3,
                 conv_cfg=None,
                 norm_cfg=None,
                 **kwargs):
        self.stacked_convs = stacked_convs
        self.octave_base_scale = octave_base_scale
        self.scales_per_octave = scales_per_octave
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        octave_scales = np.array(
            [2**(i / scales_per_octave) for i in range(scales_per_octave)])
        anchor_scales = octave_scales * octave_base_scale
        super(RetinaHead, self).__init__(
            num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)

调用了AnchorHead的构造函数：

mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py

class AnchorHead(nn.Module):
    """Anchor-based head (RPN, RetinaNet, SSD, etc.).

    Args:
        in_channels (int): Number of channels in the input feature map.
        feat_channels (int): Number of channels of the feature map.
        anchor_scales (Iterable): Anchor scales.
        anchor_ratios (Iterable): Anchor aspect ratios.
        anchor_strides (Iterable): Anchor strides.
        anchor_base_sizes (Iterable): Anchor base sizes.
        target_means (Iterable): Mean values of regression targets.
        target_stds (Iterable): Std values of regression targets.
        loss_cls (dict): Config of classification loss.
        loss_bbox (dict): Config of localization loss.
    def __init__(self,
                 num_classes,
                 in_channels,
                 feat_channels=256,
                 anchor_scales=[8, 16, 32],
                 anchor_ratios=[0.5, 1.0, 2.0],
                 anchor_angle=[-30.0, 0.0, 30.0, 60.0, 90.0, 120.0],
                 anchor_strides=[4, 8, 16, 32, 64],
                 target_means=(.0, .0, .0, .0),
                 target_stds=(1.0, 1.0, 1.0, 1.0),
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=True,
                     loss_weight=1.0),
                 loss_bbox=dict(
                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)):
        #产生anchor框
      for anchor_base in self.anchor_strides:
            self.anchor_generators.append(AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios, self.anchor_angle))

模型的训练：

以retinaNet为例，retinaNet继承了SingleStageDetector,所以继承了它的训练过程

mmdet\models\detectors\single_stage.py

先提取特征，得到特征图（backbone+FPN，对应extract_feat）->然后根据特征图回归边界和分类，得到anchor对应的类别和proposals (对应bbox_head)->根据实际标注框（gt_bboxes,gt_labels,img_metas,self.train_cfg），得到各个anchor对应的label（0负样本，正整数：对应的类别，-1非正非负样本）和回归的target (一般是基于anchor和标注框的IOU来判定) -> 根据采样策略对正负样本进行采样->对采样后的样本，根据模型的输出结果和对应的label和target,计算损失losses (对应bbox_head.loss)

    def forward_train(self,
                      img,
                      img_metas,
                      gt_bboxes,
                      gt_labels,
                      gt_bboxes_ignore=None):

        x = self.extract_feat(img)
        outs = self.bbox_head(x)
        loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
        losses = self.bbox_head.loss(
            *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)

特征提取一般是backbone+fpn

retinaNet用两个分支的卷积回归bbox offset和分类：

        for i in range(self.stacked_convs):
            chn = self.in_channels if i == 0 else self.feat_channels
            self.cls_convs.append(
                ConvModule(
                    chn,
                    self.feat_channels,
                    3,
                    stride=1,
                    padding=1,
                    conv_cfg=self.conv_cfg,
                    norm_cfg=self.norm_cfg))
            self.reg_convs.append(
                ConvModule(
                    chn,
                    self.feat_channels,
                    3,
                    stride=1,
                    padding=1,
                    conv_cfg=self.conv_cfg,
                    norm_cfg=self.norm_cfg))
        self.retina_cls = nn.Conv2d(
            self.feat_channels,
            self.num_anchors * self.cls_out_channels,
            3,
            padding=1)
        self.retina_reg = nn.Conv2d(
            self.feat_channels, self.num_anchors * 5, 3, padding=1)#bbox是垂直时，有4个值；是倾斜时，有5个值    





def forward_single(self, x):
        cls_feat = x
        reg_feat = x
        for cls_conv in self.cls_convs:
            cls_feat = cls_conv(cls_feat)
        for reg_conv in self.reg_convs:
            reg_feat = reg_conv(reg_feat)
        cls_score = self.retina_cls(cls_feat)
        bbox_pred = self.retina_reg(reg_feat)
        return cls_score, bbox_pred

计算损失时：

mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py

self.bbox_head.loss(x):

根据标注的目标框和anchor计算每个anchor框的要回归的值和分类的label

cls_reg_targets = anchor_target(
    anchor_list,
    valid_flag_list,
    gt_bboxes,
    img_metas,
    self.target_means,
    self.target_stds,
    cfg,
    gt_bboxes_ignore_list=gt_bboxes_ignore,
    gt_labels_list=gt_labels,
    label_channels=label_channels,
    sampling=self.sampling)

(all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,
 pos_inds_list, neg_inds_list) = multi_apply( anchor_target_single,anchor_list,valid_flag_list,gt_bboxes_list,gt_bboxes_ignore_list, gt_labels_list,img_metas,
     target_means=target_means,
     target_stds=target_stds,
     cfg=cfg,
     label_channels=label_channels,
     sampling=sampling,
     unmap_outputs=unmap_outputs)

其中，函数anchor_target_single的定义为：

def anchor_target_single：

    bbox_assigner = build_assigner(cfg.assigner)
    assign_result = bbox_assigner.assign(anchors, gt_bboxes,
                                     gt_bboxes_ignore, gt_labels)
    bbox_sampler = PseudoSampler()
    sampling_result = bbox_sampler.sample(assign_result, anchors,
                                      gt_bboxes)

bbox_assigner.assign函数：计算预测框（proposals）和标注框的IOU，来定义预测的正样本，负样本，和非正非负样本。

其中，assgner的定义为

assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),

mmdetection-rotated/mmdet/core/bbox/assigners/max_iou_assigner_horizontal.py

mmdetection-rotated/mmdet/core/bbox/assigners/max_iou_assigner_horizontal.py
class MaxIoUAssigner(BaseAssigner):
    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
        This method assign a gt bbox to every bbox (proposal/anchor), each bbox
        will be assigned with -1, 0, or a positive number. -1 means don't care,
        0 means negative sample, positive number is the index (1-based) of
        assigned gt.
        The assignment is done in following steps, the order matters.

        1. assign every bbox to -1
        2. assign proposals whose iou with all gts < neg_iou_thr to 0
        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,
           assign it to that bbox
        4. for each gt bbox, assign its nearest proposals (may be more than
           one) to itself

    #通过mmdetection-rotated/mmdet/core/bbox/geometry.py的bbox_overlaps来计算proposals和标注框两两之间的的IOU
        bboxes = bboxes[:, :5]
        overlaps = bbox_overlaps(gt_bboxes, bboxes)

    #根据IOU的预测结果来设定proposals的label为0，1，-1
        if (self.ignore_iof_thr > 0) and (gt_bboxes_ignore is not None) and (
                gt_bboxes_ignore.numel() > 0):
            if self.ignore_wrt_candidates:
                ignore_overlaps = bbox_overlaps(
                    bboxes, gt_bboxes_ignore, mode='iof')
                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)
            else:
                ignore_overlaps = bbox_overlaps(
                    gt_bboxes_ignore, bboxes, mode='iof')
                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)
            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1

        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)   

    return assign_result

通过mmdetection-rotated/mmdet/core/bbox/geometry.py的bbox_overlaps来计算proposals和标注框两两之间的的IOU

Sample:根据给定的Sample策略（如hard sample (OHEM,IOU Sample),soft sample(Focal loss,GHM,PISA)）等对样本（proposals）采样后来分类回归计算损失或者样本全部用来分类回归计算损失，但是样本的loss权重不同

D:\商汤项目存储\U盘内容\mmdetection_h\mmdet\core\anchor\anchor_target.py
def anchor_target_single

    #当指定采样策略时，就会按照指定的采样策略采样（hard_sample）；否则，就使用全部的正样本和负样本   (soft_sample)
    if sampling:
        assign_result, sampling_result = assign_and_sample(
            anchors, gt_bboxes, gt_bboxes_ignore, None, cfg)
    else:

        bbox_assigner = build_assigner(cfg.assigner)
        assign_result = bbox_assigner.assign(anchors, gt_bboxes,
                                         gt_bboxes_ignore, gt_labels)
        bbox_sampler = PseudoSampler()#使用全部的正样本和负样本(soft_sample)
        sampling_result = bbox_sampler.sample(assign_result, anchors,
                                          gt_bboxes)

anchor_target_single最终会得到该张图中所有的proposal的(labels, label_weights, bbox_targets, bbox_weights, pos_inds, neg_inds)。

其中label指proposal的类被（0负样本，1正样本（或者其他正整数，表示anchor所代表的类别），-1非正非负样本）；

label_weights给定正样本，负样本或者不同类别的目标不同的分类的权重，可以用来平衡类别不平衡的现像

bbox_targets 根据anchor和实际的标注框，通过bbox2delta计算要回归的目标值，
bbox_weights给定回归在损失函数中所占的权重

pos_inds，neg_ids表示样本中正负样本的索引

#bbox2delta,给定anchor的中心点坐标和宽高和角度，实际的标注框的中心点坐标和宽高和角度，计算bbox reg时要回归的值
def bbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
    assert proposals.size() == gt.size()
    proposals = proposals.float()
    gt = gt.float()
    px = proposals[..., 0]
    py = proposals[..., 1]
    pw = proposals[..., 2]
    ph = proposals[..., 3]
    pa = proposals[:, 4]

    gx = gt[..., 0]
    gy = gt[..., 1]
    gw = gt[..., 2]
    gh = gt[..., 3]
    ga = gt[..., 4]

    dx = (gx - px) / pw
    dy = (gy - py) / ph
    dw = torch.log(gw / pw)
    dh = torch.log(gh / ph)
    da = (ga - pa) * np.pi / 180
    deltas = torch.stack([dx, dy, dw, dh, da], dim=-1)
    means = deltas.new_tensor(means).unsqueeze(0)
    stds = deltas.new_tensor(stds).unsqueeze(0)
    deltas = deltas.sub_(means).div_(stds)

（3）对采样后的正样本和负样本计算损失

mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py


class AnchorHead(nn.Module):
    def __init__()#在init函数中设定损失函数
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
其中，我们cfg文件中对loss的定义为：
loss_cls=dict(
            type='FocalLoss',
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))
类别损失函数：
    定义；mmdetection-rotated/mmdet/models/losses/focal_loss.py
       loss_cls = self.loss_weight * sigmoid_focal_loss(
                pred,
                target,
                weight,
                gamma=self.gamma,
                alpha=self.alpha,
                reduction=reduction,
                avg_factor=avg_factor)
bbox的回归损失函数：

mmdetection-rotated/mmdet/models/losses/smooth_l1_loss.py
def smooth_l1_loss(pred, target, beta=1.0):
    assert beta > 0
    assert pred.size() == target.size() and target.numel() > 0
    diff = torch.abs(pred - target)
    loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
                       diff - 0.5 * beta)
    return loss




    #
    def loss():
    (1)先计算各个anchor框的类别和正样本要回归的target
    (2)对正负样本采样
    （3）计算损失
        losses_cls, losses_bbox = multi_apply(
            self.loss_single,
            cls_scores,
            bbox_preds,
            labels_list,
            label_weights_list,
            bbox_targets_list,
            bbox_weights_list,
            num_total_samples=num_total_samples,
            cfg=cfg)

其中，self.loss_angle的定义为：
def loss_single(self, cls_score, bbox_pred, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg):
    loss_cls = self.loss_cls(
            cls_score, labels, label_weights, avg_factor=num_total_samples)
    loss_bbox = self.loss_bbox(
            bbox_pred,
            bbox_targets,
            bbox_weights,
            avg_factor=num_total_samples)
        return loss_cls, loss_bbox

模型的推理（测试时）：

先提取特征，得到特征图（backbone+FPN）->然后根据特征图回归边界和分类，得到anchor对应的类别和proposals->取出图像中的前面N个得分最高的proposals,并将预测的Δ转换为bbox,方便后期计算各个proposals的IOU；用nms抑制重复预测的proposals

    def simple_test(self, img, img_meta, rescale=False):
        x = self.extract_feat(img)
        outs = self.bbox_head(x)
        bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
        bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
        bbox_results = [
            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
            for det_bboxes, det_labels in bbox_list
        ]
        return bbox_results[0]

其中， self.bbox_head.get_bboxes(*bbox_inputs)是取出图像中的前面N个得分最高的proposals,并将预测的Δ转换为bbox,方便后期计算各个proposals的IOU；用nms抑制重复预测的proposals
mmdetection-rotated/mmdet/models/anchor_heads/anchor_head.py
    def get_bboxes(self, cls_scores, bbox_preds, img_metas, cfg,rescale=False):
        proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list, mlvl_anchors, img_shape,scale_factor, cfg, rescale)
            _, topk_inds = max_scores.topk(nms_pre) #先选出前n个得分最高的proposals
            bboxes = delta2bbox(anchors, bbox_pred, self.target_means,self.target_stds, img_shape) #将模型预测的Δ转为bbox
            det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,cfg.score_thr, cfg.nms, cfg.max_per_img)#通过NMS抑制重复预测的anchor框
            return det_bboxes, det_labels

dadaHaHa1234

发布了90 篇原创文章 · 获赞 13 · 访问量 2万+

私信关注

mmdetection的train.py解析(以retinaNet为例)

猜你喜欢