TensorFlow物体检测网络模型加载过程

了解程序加载模型的具体细节

model_config, train_config, input_config = get_configs_from_pipeline_file()

获取配置文件的信息，其中是调用protobuf进行文件解析。
分别得到model_config, train_config, input_config三个配置文件（训练格式）。
后通过functools.partial函数对model_builder.build函数赋予默认值。
通过functools.partial函数对input_reader_builder.build赋予默认值
—————————————————————————————————————————————————————
object_detection —— builders —— model_builder.py
生成网络模型的代码为：

def build(model_config, is_training):
  if not isinstance(model_config, model_pb2.DetectionModel):
    raise ValueError('model_config not of type model_pb2.DetectionModel.')
# 获取配置中的模型种类
  meta_architecture = model_config.WhichOneof('model')
#进行具体加载
  if meta_architecture == 'ssd':
    return _build_ssd_model(model_config.ssd, is_training)
  if meta_architecture == 'faster_rcnn':
    return _build_faster_rcnn_model(model_config.faster_rcnn, is_training)
  raise ValueError('Unknown meta architecture: {}'.format(meta_architecture))

此处以’faster_rcnn’模型为例子，进入_build_faster_rcnn_model

def _build_faster_rcnn_model(frcnn_config, is_training):
#构建一个Faster R-CNN 或者 R-FCN的检测模型。
#如果second_stage_box_predictor的类型是rfcn_box_predictor则为R-FCN模型，否则为Faster R-CNN
#frcnn_config 说明模型的结构
#is_training 模型是否用来进行训练。
  #获取要识别的类数
  num_classes = frcnn_config.num_classes
 #构建图像归一化
  image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
#构建前端网络结构
  feature_extractor = _build_faster_rcnn_feature_extractor(
      frcnn_config.feature_extractor, is_training)
#是否仅构建RPN层
  first_stage_only = frcnn_config.first_stage_only
#构建Anchor
  first_stage_anchor_generator = anchor_generator_builder.build(
      frcnn_config.first_stage_anchor_generator)
#带间隔的卷积，其中的间隔多大
  first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
#卷积神经网络的参数
  first_stage_box_predictor_arg_scope = hyperparams_builder.build(
      frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
#第一阶段的核大小
  first_stage_box_predictor_kernel_size = (
      frcnn_config.first_stage_box_predictor_kernel_size)
#rpn的输出深度
  first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
#第一阶段的最小批次
  first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
#每一张图片RPN中正样本的数量。
  first_stage_positive_balance_fraction = (
      frcnn_config.first_stage_positive_balance_fraction)
#第一阶段nms得分的阈值
  first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
#第一阶段nms的IOU的阈值
  first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
#第一阶段最多传出多少个RPN
  first_stage_max_proposals = frcnn_config.first_stage_max_proposals
#第一阶段的定位损失权重
  first_stage_loc_loss_weight = (
      frcnn_config.first_stage_localization_loss_weight)
#第一阶段的物品损失权重
  first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight
#输出的rpn的大小（宽高相等）
  initial_crop_size = frcnn_config.initial_crop_size
#在maxpool时的核的大小以及步长
  maxpool_kernel_size = frcnn_config.maxpool_kernel_size
  maxpool_stride = frcnn_config.maxpool_stride
#构建卷积神经网络的超参数
  second_stage_box_predictor = box_predictor_builder.build(
      hyperparams_builder.build,
      frcnn_config.second_stage_box_predictor,
      is_training=is_training,
      num_classes=num_classes)
#第二阶段进入的图片的批次
  second_stage_batch_size = frcnn_config.second_stage_batch_size
#第二阶段中图片中bbox的正样本的比例
  second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction
#构建后处理的模型
  (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
  ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
#第二阶段的位置和种类的权重比例
  second_stage_localization_loss_weight = (
      frcnn_config.second_stage_localization_loss_weight)
  second_stage_classification_loss_weight = (
      frcnn_config.second_stage_classification_loss_weight)
#默认不进行困难样本发现
  hard_example_miner = None
#但如果配置文件中有hard_example_miner，则进行困难样本发现
  if frcnn_config.HasField('hard_example_miner'):
    hard_example_miner = losses_builder.build_hard_example_miner(
        frcnn_config.hard_example_miner,
        second_stage_classification_loss_weight,
        second_stage_localization_loss_weight)
#将配置好的模型放入dict中
  common_kwargs = {
      'is_training': is_training,
      'num_classes': num_classes,
      'image_resizer_fn': image_resizer_fn,
      'feature_extractor': feature_extractor,
      'first_stage_only': first_stage_only,
      'first_stage_anchor_generator': first_stage_anchor_generator,
      'first_stage_atrous_rate': first_stage_atrous_rate,
      'first_stage_box_predictor_arg_scope':
      first_stage_box_predictor_arg_scope,
      'first_stage_box_predictor_kernel_size':
      first_stage_box_predictor_kernel_size,
      'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
      'first_stage_minibatch_size': first_stage_minibatch_size,
      'first_stage_positive_balance_fraction':
      first_stage_positive_balance_fraction,
      'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
      'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
      'first_stage_max_proposals': first_stage_max_proposals,
      'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
      'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
      'second_stage_batch_size': second_stage_batch_size,
      'second_stage_balance_fraction': second_stage_balance_fraction,
      'second_stage_non_max_suppression_fn':
      second_stage_non_max_suppression_fn,
      'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
      'second_stage_localization_loss_weight':
      second_stage_localization_loss_weight,
      'second_stage_classification_loss_weight':
      second_stage_classification_loss_weight,
      'hard_example_miner': hard_example_miner}
#如果第二阶段是rfcn的则使用上面这个，否则使用上面这个
  if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor):
    return rfcn_meta_arch.RFCNMetaArch(
        second_stage_rfcn_box_predictor=second_stage_box_predictor,
        **common_kwargs)
  else:
    return faster_rcnn_meta_arch.FasterRCNNMetaArch(
        initial_crop_size=initial_crop_size,
        maxpool_kernel_size=maxpool_kernel_size,
        maxpool_stride=maxpool_stride,
        second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
        **common_kwargs)

—————————————————————————————————————————————————————
之后说明每一个子模型的构建
首先是image_resizer_builder的模型构建

# 构建图片的resize
def build(image_resizer_config):
# 查看类型是否正确
  if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer):
    raise ValueError('image_resizer_config not of type '
                     'image_resizer_pb2.ImageResizer.')
#查看是否设置了image_resizer_oneof属性，如果有判断是否为keep_aspect_ratio_resizer
  if image_resizer_config.WhichOneof(
      'image_resizer_oneof') == 'keep_aspect_ratio_resizer':
    #如果是则进行保持图片比例的缩放，再使用functools.partial对 preprocessor.resize_to_range给默认值。
    keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer
    if not (keep_aspect_ratio_config.min_dimension
            <= keep_aspect_ratio_config.max_dimension):
      raise ValueError('min_dimension > max_dimension')
    return functools.partial(
        preprocessor.resize_to_range,
        min_dimension=keep_aspect_ratio_config.min_dimension,
        max_dimension=keep_aspect_ratio_config.max_dimension)
#如果有image_resizer_oneof属性，如果有判断是否为fixed_shape_resizer，即归一化到固定大小
  if image_resizer_config.WhichOneof(
      'image_resizer_oneof') == 'fixed_shape_resizer':
#如果有则使用functools.partial对preprocessor.resize_image,给默认值，插值的那种resize
    fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer
    return functools.partial(preprocessor.resize_image,
                             new_height=fixed_shape_resizer_config.height,
                             new_width=fixed_shape_resizer_config.width)
  raise ValueError('Invalid image resizer option.')

—————————————————————————————————————————————————————
resize之后就是构建faster_rcnn_meta_arch，也就是进行_build_faster_rcnn_feature_extractor函数的说明

def _build_faster_rcnn_feature_extractor(
    feature_extractor_config, is_training, reuse_weights=None):
#获取第一阶段的网络结构，比如:faster_rcnn_resnet101
  feature_type = feature_extractor_config.type
#获取
#first_stage_features_stride只能等于8或者16，否则会报错
  first_stage_features_stride = (
      feature_extractor_config.first_stage_features_stride)
#判断有没有内置的这个特征提取的网络
  if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
    raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
        feature_type))
  feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
      feature_type]
# 返回了一个初始化了的特征提取
  return feature_extractor_class(
      is_training, first_stage_features_stride, reuse_weights)

—————————————————————————————————————————————————————
接下来是anchor的构建

def build(anchor_generator_config):
#构建过程二选一，是使用grid_anchor_generator还是ssd_anchor_generator在这里我们看grid_anchor_generator
 if not isinstance(anchor_generator_config,
                   anchor_generator_pb2.AnchorGenerator):
   raise ValueError('anchor_generator_config not of type '
                    'anchor_generator_pb2.AnchorGenerator')
 if anchor_generator_config.WhichOneof(
     'anchor_generator_oneof') == 'grid_anchor_generator':
   grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator
  #使用传入的参数对grid_anchor_generator进行初始化，具体的自行看
   return grid_anchor_generator.GridAnchorGenerator(
       scales=[float(scale) for scale in grid_anchor_generator_config.scales],
       aspect_ratios=[float(aspect_ratio)
                      for aspect_ratio
                      in grid_anchor_generator_config.aspect_ratios],
       base_anchor_size=[grid_anchor_generator_config.height,
                         grid_anchor_generator_config.width],
       anchor_stride=[grid_anchor_generator_config.height_stride,
                      grid_anchor_generator_config.width_stride],
       anchor_offset=[grid_anchor_generator_config.height_offset,
                      grid_anchor_generator_config.width_offset])
 elif anchor_generator_config.WhichOneof(
     'anchor_generator_oneof') == 'ssd_anchor_generator':
   ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator
   return multiple_grid_anchor_generator.create_ssd_anchors(
       num_layers=ssd_anchor_generator_config.num_layers,
       min_scale=ssd_anchor_generator_config.min_scale,
       max_scale=ssd_anchor_generator_config.max_scale,
       aspect_ratios=ssd_anchor_generator_config.aspect_ratios,
       reduce_boxes_in_lowest_layer=(ssd_anchor_generator_config
                                     .reduce_boxes_in_lowest_layer))
 else:
   raise ValueError('Empty anchor generator.')

—————————————————————————————————————————————————————
接下来是构建hyperparams_builder.build的那个模块

def build(hyperparams_config, is_training):
#根据给出的配置文件构建tf-slim的arg_scope，
#返回的arg_scope中包含了权重的初始化，归一化，激活函数，BN等信息。
#如果BN没有定义，则不包含BN层。
#BN的参数是否进行训练是基于is_training参数和
#conv_hyperparams_config.batch_norm.train这两个参数。
  if not isinstance(hyperparams_config,
                    hyperparams_pb2.Hyperparams):
    raise ValueError('hyperparams_config not of type '
                     'hyperparams_pb.Hyperparams.')
#如果有batch_norm，则进行BN，
  batch_norm = None
  batch_norm_params = None
  if hyperparams_config.HasField('batch_norm'):
    batch_norm = slim.batch_norm
    batch_norm_params = _build_batch_norm_params(
        hyperparams_config.batch_norm, is_training)

  affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
  if hyperparams_config.HasField('op') and (
      hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
    affected_ops = [slim.fully_connected]
  with slim.arg_scope(
      affected_ops,
      weights_regularizer=_build_regularizer(
          hyperparams_config.regularizer),
      weights_initializer=_build_initializer(
          hyperparams_config.initializer),
      activation_fn=_build_activation_fn(hyperparams_config.activation),
      normalizer_fn=batch_norm,
      normalizer_params=batch_norm_params) as sc:
    return sc

—————————————————————————————————————————————————————

接下来是bbox的预测的构建

def build(argscope_fn, box_predictor_config, is_training, num_classes):
#基于配置文件进行box的预测的配置
#argscope_fn接受两个输入：hyperparams_pb2.Hyperparams proto以及表示是否进行训练
  if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
    raise ValueError('box_predictor_config not of type '
                     'box_predictor_pb2.BoxPredictor.')
#获取配置文件中的box_predictor_oneof对应的参数，这里给出的是rfcn_box_predictor
  box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')

  if  box_predictor_oneof == 'convolutional_box_predictor':
    conv_box_predictor = box_predictor_config.convolutional_box_predictor
    conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
                                   is_training)
    box_predictor_object = box_predictor.ConvolutionalBoxPredictor(
        is_training=is_training,
        num_classes=num_classes,
        conv_hyperparams=conv_hyperparams,
        min_depth=conv_box_predictor.min_depth,
        max_depth=conv_box_predictor.max_depth,
        num_layers_before_predictor=(conv_box_predictor.
                                     num_layers_before_predictor),
        use_dropout=conv_box_predictor.use_dropout,
        dropout_keep_prob=conv_box_predictor.dropout_keep_probability,
        kernel_size=conv_box_predictor.kernel_size,
        box_code_size=conv_box_predictor.box_code_size,
        apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores)
    return box_predictor_object

  if box_predictor_oneof == 'mask_rcnn_box_predictor':
    mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor
    fc_hyperparams = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
                                 is_training)
    conv_hyperparams = None
    if mask_rcnn_box_predictor.HasField('conv_hyperparams'):
      conv_hyperparams = argscope_fn(mask_rcnn_box_predictor.conv_hyperparams,
                                     is_training)
    box_predictor_object = box_predictor.MaskRCNNBoxPredictor(
        is_training=is_training,
        num_classes=num_classes,
        fc_hyperparams=fc_hyperparams,
        use_dropout=mask_rcnn_box_predictor.use_dropout,
        dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability,
        box_code_size=mask_rcnn_box_predictor.box_code_size,
        conv_hyperparams=conv_hyperparams,
        predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
        mask_prediction_conv_depth=(mask_rcnn_box_predictor.
                                    mask_prediction_conv_depth),
        predict_keypoints=mask_rcnn_box_predictor.predict_keypoints)
    return box_predictor_object
#如果是rfcn_box_predictor，则进行之后的操作
  if box_predictor_oneof == 'rfcn_box_predictor':
    rfcn_box_predictor = box_predictor_config.rfcn_box_predictor
#进行hyperparams_builder.build。
    conv_hyperparams = argscope_fn(rfcn_box_predictor.conv_hyperparams,
                                   is_training)
# 初始化一个box的预测器，对正样本ROI预测类型以及位置
#用于第二阶段的RFCN的结构
    box_predictor_object = box_predictor.RfcnBoxPredictor(
        is_training=is_training,
        num_classes=num_classes,
        conv_hyperparams=conv_hyperparams,
        crop_size=[rfcn_box_predictor.crop_height,
                   rfcn_box_predictor.crop_width],
        num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height,
                          rfcn_box_predictor.num_spatial_bins_width],
        depth=rfcn_box_predictor.depth,
        box_code_size=rfcn_box_predictor.box_code_size)
    return box_predictor_object
  raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))

—————————————————————————————————————————————————————

上面的函数中有hyperparams_builder.build，那么就看看这个

def build(hyperparams_config, is_training):
#其实也是返回一个tf-slim 的arg_scope。
  if not isinstance(hyperparams_config,
                    hyperparams_pb2.Hyperparams):
    raise ValueError('hyperparams_config not of type '
                     'hyperparams_pb.Hyperparams.')

  batch_norm = None
  batch_norm_params = None
  if hyperparams_config.HasField('batch_norm'):
    batch_norm = slim.batch_norm
    batch_norm_params = _build_batch_norm_params(
        hyperparams_config.batch_norm, is_training)

  affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
  if hyperparams_config.HasField('op') and (
      hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
    affected_ops = [slim.fully_connected]
  with slim.arg_scope(
      affected_ops,
      weights_regularizer=_build_regularizer(
          hyperparams_config.regularizer),
      weights_initializer=_build_initializer(
          hyperparams_config.initializer),
      activation_fn=_build_activation_fn(hyperparams_config.activation),
      normalizer_fn=batch_norm,
      normalizer_params=batch_norm_params) as sc:
    return sc

—————————————————————————————————————————————————————

已经获取了box以及预测的类别，之后就是要进行一些后处理，可以看看后处理的构建post_processing_builder.build(frcnn_config.second_stage_post_processing)的具体内容。

def build(post_processing_config):
 #构建可调用的后处理操作，主要之基于配置文件对性nms以及得分排序的操作。
  if not isinstance(post_processing_config, post_processing_pb2.PostProcessing):
    raise ValueError('post_processing_config not of type '
                     'post_processing_pb2.Postprocessing.')
#构建nms
  non_max_suppressor_fn = _build_non_max_suppressor(
      post_processing_config.batch_non_max_suppression)
#构建得分排序
  score_converter_fn = _build_score_converter(
      post_processing_config.score_converter)
  return non_max_suppressor_fn, score_converter_fn

—————————————————————————————————————————————————————

nms的构建，继续看post_processing.batch_multiclass_non_max_suppression这个函数

def _build_non_max_suppressor(nms_config):
  if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0:
    raise ValueError('iou_threshold not in [0, 1.0].')
  if nms_config.max_detections_per_class > nms_config.max_total_detections:
    raise ValueError('max_detections_per_class should be no greater than '
                     'max_total_detections.')

  non_max_suppressor_fn = functools.partial(
      post_processing.batch_multiclass_non_max_suppression,
      score_thresh=nms_config.score_threshold,
      iou_thresh=nms_config.iou_threshold,
      max_size_per_class=nms_config.max_detections_per_class,
      max_total_size=nms_config.max_total_detections)
  return non_max_suppressor_fn

—————————————————————————————————————————————————————

接下来是针对loss的build_hard_example_miner

def build_hard_example_miner(config,
                             classification_weight,
                             localization_weight):
#核心是 losses.HardExampleMiner，由于没有使用就不看了，需要的话自己看
  loss_type = None
  if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
    loss_type = 'both'
  if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
    loss_type = 'cls'
  if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
    loss_type = 'loc'

  max_negatives_per_positive = None
  num_hard_examples = None
  if config.max_negatives_per_positive > 0:
    max_negatives_per_positive = config.max_negatives_per_positive
  if config.num_hard_examples > 0:
    num_hard_examples = config.num_hard_examples
#只是一个初始化，具体的自己看
  hard_example_miner = losses.HardExampleMiner(
      num_hard_examples=num_hard_examples,
      iou_threshold=config.iou_threshold,
      loss_type=loss_type,
      cls_loss_weight=classification_weight,
      loc_loss_weight=localization_weight,
      max_negatives_per_positive=max_negatives_per_positive,
      min_negatives_per_image=config.min_negatives_per_image)
  return hard_example_miner

—————————————————————————————————————————————————————
函数最后也就是最重要的rfcn_meta_arch.RFCNMetaArch，其实就是RFCNMetaArch的初始化。就是构建一个faster r-cnn的模型之后将第二阶段进行替换。

class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
  """R-FCN Meta-architecture definition."""

  def __init__(self,
               is_training,
               num_classes,
               image_resizer_fn,
               feature_extractor,
               first_stage_only,
               first_stage_anchor_generator,
               first_stage_atrous_rate,
               first_stage_box_predictor_arg_scope,
               first_stage_box_predictor_kernel_size,
               first_stage_box_predictor_depth,
               first_stage_minibatch_size,
               first_stage_positive_balance_fraction,
               first_stage_nms_score_threshold,
               first_stage_nms_iou_threshold,
               first_stage_max_proposals,
               first_stage_localization_loss_weight,
               first_stage_objectness_loss_weight,
               second_stage_rfcn_box_predictor,
               second_stage_batch_size,
               second_stage_balance_fraction,
               second_stage_non_max_suppression_fn,
               second_stage_score_conversion_fn,
               second_stage_localization_loss_weight,
               second_stage_classification_loss_weight,
               hard_example_miner,
               parallel_iterations=16):
    super(RFCNMetaArch, self).__init__(
        is_training,
        num_classes,
        image_resizer_fn,
        feature_extractor,
        first_stage_only,
        first_stage_anchor_generator,
        first_stage_atrous_rate,
        first_stage_box_predictor_arg_scope,
        first_stage_box_predictor_kernel_size,
        first_stage_box_predictor_depth,
        first_stage_minibatch_size,
        first_stage_positive_balance_fraction,
        first_stage_nms_score_threshold,
        first_stage_nms_iou_threshold,
        first_stage_max_proposals,
        first_stage_localization_loss_weight,
        first_stage_objectness_loss_weight,
        None,  # initial_crop_size is not used in R-FCN
        None,  # maxpool_kernel_size is not use in R-FCN
        None,  # maxpool_stride is not use in R-FCN
        None,  # fully_connected_box_predictor is not used in R-FCN.
        second_stage_batch_size,
        second_stage_balance_fraction,
        second_stage_non_max_suppression_fn,
        second_stage_score_conversion_fn,
        second_stage_localization_loss_weight,
        second_stage_classification_loss_weight,
        hard_example_miner,
        parallel_iterations)

    self._rfcn_box_predictor = second_stage_rfcn_box_predictor

TensorFlow物体检测网络模型加载过程

了解程序加载模型的具体细节

猜你喜欢