一，配置文件

下面的配置文件是自定义的，参考：时空动作检测的配置文件系统
# 模型设置
model = dict(# 模型的配置
    type='FastRCNN',# 时空检测器类型
    backbone=dict(# Backbone 字典设置
        type='ResNet3dSlowFast',# Backbone 名
        pretrained=None, # 预训练模型的 url 或文件位置
        resample_rate=4, # tau, 其对应论文中的参数τ
        speed_ratio=4, # alpha, 其对应论文中的参数α
        channel_ratio=8, # beta_inv， 其对应论文中β的倒数
        slow_pathway=dict( # 慢速路径
            type='resnet3d',# 使用resnet3d网络
            depth=50,# 其深度为50
            pretrained=None,# 不使用预训练模型
            lateral=True,# 是否使用侧面链接的方式
            conv1_kernel=(1, 7, 7),# 第一层卷积核大小
            dilations=(1, 1, 1, 1),#Dilation of each stage.  Default: (1, 1, 1, 1).
            conv1_stride_t=1,# 第一层卷积层在时序维度上的步伐
            pool1_stride_t=1,# 第一个池化层在时序方向上的步伐
            inflate=(0, 0, 1, 1), #Inflate Dims of each block. Default: (0, 0, 1, 1).
            spatial_strides=(1, 2, 2, 1)),# Spatial strides of residual blocks of each stage.Default: ``(1, 2, 2, 2)``.
        fast_pathway=dict( # 快速路径
            type='resnet3d',# 使用resnet3d网络
            depth=50, # 其深度为50
            pretrained=None,# 不使用预训练模型
            lateral=False,# 是否使用侧面链接的方式
            base_channels=8,# 基础通道数目
            conv1_kernel=(5, 7, 7),# 第一层卷积核大小
            conv1_stride_t=1, # 第一层卷积层在时序维度上的步伐
            pool1_stride_t=1, # 第一个池化层在时序方向上的步伐
            spatial_strides=(1, 2, 2, 1))),# Spatial strides of residual blocks of each stage.Default: ``(1, 2, 2, 2)``.
    roi_head=dict( # roi_head 字典设置
        type='Via3RoIHead',# roi_head 名
        bbox_roi_extractor=dict( # bbox_roi_extractor 字典设置
            type='SingleRoIExtractor3D', # bbox_roi_extractor 名
            roi_layer_type='RoIAlign', # RoI op 类型
            output_size=8, # RoI op 输出特征尺寸
            with_temporal_pool=True),# 时序维度是否要经过池化
        bbox_head=dict( # bbox_head 字典设置
            type='BBoxHeadAVA',# bbox_head 名
            in_channels=2304,# 输入特征通道数 2048+256
            num_classes=7,# 动作类别数 + 1（背景）
            topk=(1, 1),#Parameter for evaluating multilabel accuracy. Default: (3, 5)
            multilabel=True,# 数据集是否多标签
            dropout_ratio=0.5)  # dropout 比率
    ),
    # 模型训练和测试的设置
    train_cfg=dict(# 训练 FastRCNN 的超参配置
        rcnn=dict(# rcnn 训练字典设置
            assigner=dict(# assigner 字典设置
                type='MaxIoUAssignerAVA',# assigner 名
                pos_iou_thr=0.9,# 正样本 IoU 阈值, > pos_iou_thr -> positive
                neg_iou_thr=0.9,# 负样本 IoU 阈值, < neg_iou_thr -> negative
                min_pos_iou=0.9),# 正样本最小可接受 IoU
            sampler=dict(# sample 字典设置
                type='RandomSampler',# sampler 名
                num=32,# sampler 批大小
                pos_fraction=1, # sampler 正样本边界框比率
                neg_pos_ub=-1,# 负样本数转正样本数的比率上界
                add_gt_as_proposals=True),# 是否添加 ground truth 为候选
            pos_weight=1.0, # 正样本 loss 权重
            debug=False)), # 是否为 debug 模式
    test_cfg=dict(# 测试 FastRCNN 的超参设置
    			rcnn=dict(# rcnn 测试字典设置
    				action_thr=0.0)))# 某行为的阈值
# 图像进行正则化处理
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)

train_pipeline = [# 训练数据前处理流水线步骤组成的列表
    dict(# SampleFrames 类的配置
    	type='SampleVia3Frames', # 选定采样哪些视频帧
    	clip_len=32, # 每个输出视频片段的帧
    	frame_interval=2),# 所采相邻帧的时序间隔
    dict( # RawFrameDecode 类的配置
    	type='RawFrameDecode'), # 给定帧序列，加载对应帧，解码对应帧
    dict(# RandomRescale 类的配置
    	type='RandomRescale', # 给定一个范围，进行随机短边缩放
    	scale_range=(256, 320)), # RandomRescale 的短边缩放范围
    dict(# RandomCrop 类的配置
    	type='RandomCrop',  # 给定一个尺寸进行随机裁剪
    	size=256), # 裁剪尺寸
    dict(# Flip 类的配置
    	type='Flip',# 图片翻转 
    	flip_ratio=0.5),# 执行翻转几率
    dict(# Normalize 类的配置
        type='Normalize',# 图片正则化
        mean=[123.675, 116.28, 103.53],# 图片正则化参数
        std=[58.395, 57.12, 57.375],# 图片正则化参数
        to_bgr=False),
    dict(# FormatShape 类的配置
    	type='FormatShape',  # 将图片格式转变为给定的输入格式
    	input_format='NCTHW',  # 最终的图片组成格式
    	collapse=True), # 去掉 N 梯度当 N == 1
    dict( # Rename 类的配置
    	type='Rename', # 重命名 key 名
    	mapping=dict(imgs='img')),# 改名映射字典
    dict(# ToTensor 类的配置
    	type='ToTensor',  # ToTensor 类将其他类型转化为 Tensor 类型
    	keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),# 将被从其他类型转化为 Tensor 类型的特征
    dict(# ToDataContainer 类的配置
        type='ToDataContainer',# 将一些信息转入到 ToDataContainer 中
        fields=[# 域字典
            dict(key=['proposals', 'gt_bboxes', 'gt_labels'],  # 将转化为 DataContainer 的键
            stack=False)# 是否要堆列这些 tensor
        ]),
    dict(# Collect 类的配置
        type='Collect',# Collect 类决定哪些键会被传递到时空检测器中
        keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'], # 输入的键
        meta_keys=[ 'original_shape', 'img_shape', 'flip_direction', 'img_norm_cfg']# 输入的元键
    )
]
val_pipeline = [# 验证数据前处理流水线步骤组成的列表
    dict(# SampleFrames 类的配置
    	type='SampleVia3Frames', # 选定采样哪些视频帧
    	clip_len=32, # 每个输出视频片段的帧
    	frame_interval=2),# 所采相邻帧的时序间隔
    dict(# RawFrameDecode 类的配置
    	type='RawFrameDecode'),# 给定帧序列，加载对应帧，解码对应帧
    dict(# Resize 类的配置
    	type='Resize',  # 调整图片尺寸
    	scale=(-1, 256)),# 调整比例
    dict(# Normalize 类的配置
        type='Normalize',# 图片正则化
        mean=[123.675, 116.28, 103.53],# 图片正则化参数
        std=[58.395, 57.12, 57.375],
        to_bgr=False),
    dict(# FormatShape 类的配置
    	type='FormatShape', # 将图片格式转变为给定的输入格式
    	input_format='NCTHW', # 最终的图片组成格式
    	collapse=True),# 去掉 N 梯度当 N == 1
    dict(# Rename 类的配置
    	type='Rename', # 重命名 key 名
    	mapping=dict(imgs='img')),# 改名映射字典
    dict(# ToTensor 类的配置
    	type='ToTensor', # ToTensor 类将其他类型转化为 Tensor 类型
    	keys=['img', 'proposals']), # 将被从其他类型转化为 Tensor 类型的特征
    dict(# ToDataContainer 类的配置
    	type='ToDataContainer', # 将一些信息转入到 ToDataContainer 中
    	fields=[# 转化为 Datacontainer 的域
    		dict(# 域字典
    			key='proposals',  # 将转化为 DataContainer 的键
    			stack=False)]),# 是否要堆列这些 tensor
    dict( # Collect 类的配置
        type='Collect',# Collect 类决定哪些键会被传递到时空检测器中
        keys=['img', 'proposals'],# 输入的键
        meta_keys=['img_shape'], # 输入的元键
        nested=True)# 是否将数据包装为嵌套列表
]

dataset_type = 'VIA3Dataset' # 训练，验证，测试的数据集类型
train_images_root = './Datasets/Interaction/images/train'#'data/Interaction/images/train'
train_annotations_root = './Datasets/Interaction/annotations/train'#'data/Interaction/annotations/train'

test_images_root = './Datasets/Interaction/images/test'#'data/Interaction/images/test'
test_annotations_root = './Datasets/Interaction/annotations/test'#'data/Interaction/annotations/test'

train_seq1_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq1.json',
    proposal_file=train_annotations_root + '/seq1_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq2_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq2.json',
    proposal_file=train_annotations_root + '/seq2_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq3_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq3.json',
    proposal_file=train_annotations_root + '/seq3_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq4_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq4.json',
    proposal_file=train_annotations_root + '/seq4_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq5_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq5.json',
    proposal_file=train_annotations_root + '/seq5_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq6_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq6.json',
    proposal_file=train_annotations_root + '/seq6_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq7_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq7.json',
    proposal_file=train_annotations_root + '/seq7_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq8_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq8.json',
    proposal_file=train_annotations_root + '/seq8_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq9_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq9.json',
    proposal_file=train_annotations_root + '/seq9_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq10_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq10.json',
    proposal_file=train_annotations_root + '/seq10_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq11_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq11.json',
    proposal_file=train_annotations_root + '/seq11_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq12_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq12.json',
    proposal_file=train_annotations_root + '/seq12_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq13_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq13.json',
    proposal_file=train_annotations_root + '/seq13_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq14_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq14.json',
    proposal_file=train_annotations_root + '/seq14_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)


train_seq15_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq15.json',
    proposal_file=train_annotations_root + '/seq15_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq16_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq16.json',
    proposal_file=train_annotations_root + '/seq16_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq17_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq17.json',
    proposal_file=train_annotations_root + '/seq17_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq18_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq18.json',
    proposal_file=train_annotations_root + '/seq18_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq19_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq19.json',
    proposal_file=train_annotations_root + '/seq19_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)

train_seq20_cfg = dict(
    type=dataset_type,
    data_prefix=train_images_root,
    ann_file=train_annotations_root +  '/seq20.json',
    proposal_file=train_annotations_root + '/seq20_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=train_pipeline)



test_seq5_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq5.json',
    proposal_file=test_annotations_root + '/seq5_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

test_seq10_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq10.json',
    proposal_file=test_annotations_root + '/seq10_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

test_seq15_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq15.json',
    proposal_file=test_annotations_root + '/seq15_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

test_seq20_cfg = dict(
    type=dataset_type,
    data_prefix=test_images_root,
    ann_file=test_annotations_root +  '/seq20.json',
    proposal_file=test_annotations_root + '/seq20_proposal.json',
    # custom_classes=['None', 'handshake', 'point', 'push'],
    attribute='person',
    custom_classes=None,
    pipeline=val_pipeline)

# 数据加载的相关配置
data = dict(
    # videos_per_gpu=8,
    # workers_per_gpu=10,
    videos_per_gpu=4,#每个GPU加载4个视频数据,可以理解为batch_size # 单个 GPU 的批大小
    workers_per_gpu=5,#每个GPU分配5个线程 # 单个 GPU 的 dataloader 的进程
    #videos_per_gpu=2,
    #workers_per_gpu=3,
    val_dataloader=dict(# 验证过程 dataloader 的额外设置
    	videos_per_gpu=1),# 单个 GPU 的批大小
    test_dataloader=dict(# 测试过程 dataloader 的额外设置
    	videos_per_gpu=1),# 单个 GPU 的批大小
    train=dict(
        type='ConcatDataset',
        datasets=[
            train_seq1_cfg,
            train_seq2_cfg,
            train_seq3_cfg,
            train_seq4_cfg,
            #train_seq5_cfg,
            train_seq6_cfg,
            train_seq7_cfg,
            train_seq8_cfg,
            train_seq9_cfg,
            #train_seq10_cfg,
            train_seq11_cfg,
            train_seq12_cfg,
            train_seq13_cfg,
            train_seq14_cfg,
            #train_seq15_cfg,
            train_seq16_cfg,
            train_seq17_cfg,
            train_seq18_cfg,
            train_seq19_cfg,
            #train_seq20_cfg,
        ],
        separate_eval=True,
    ),
    val=dict(
        type='ConcatDataset',
        datasets=[
            test_seq5_cfg,
            test_seq10_cfg,
            test_seq15_cfg,
            test_seq20_cfg,
        ],
        separate_eval=True,
    ),
    test = dict(
        type='ConcatDataset',
        datasets=[
            test_seq5_cfg,
            test_seq10_cfg,
            test_seq15_cfg,
            test_seq20_cfg,
        ],
        separate_eval=True,
    ),
)
# 优化器设置
optimizer = dict(
    # 构建优化器的设置，支持：
    # (1) 所有 PyTorch 原生的优化器，这些优化器的参数和 PyTorch 对应的一致；
    # (2) 自定义的优化器，这些优化器在 `constructor` 的基础上构建。
    # 更多细节可参考 "tutorials/5_new_modules.md" 部分
	type='SGD',  # 优化器类型 参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13
    lr=0.01,  # 学习率, 参数的细节使用可参考 PyTorch 的对应文档
	momentum=0.9,  # 动量大小
	weight_decay=1e-05)# Adam 优化器的权重衰减
optimizer_config = dict(# 用于构建优化器钩子的设置
	grad_clip=dict(max_norm=40, norm_type=2))# 使用梯度裁剪
# 学习策略设置
lr_config = dict( # 用于注册学习率调整钩子的设置
    policy='step',# 调整器策略, 支持 CosineAnnealing，Cyclic等方法。更多细节可参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9
    step=[10, 15],# 学习率衰减步长
    warmup='linear',# Warmup 策略
    warmup_by_epoch=True, # Warmup 单位为 epoch 还是 iteration
    warmup_iters=5,# warmup 数
    warmup_ratio=0.1)# 初始学习率为 warmup_ratio * lr
total_epochs = 20  # 训练模型的总周期数
#total_epochs = 500
checkpoint_config = dict(# 模型权重文件钩子设置，更多细节可参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py
	interval=1)# 模型权重文件保存间隔
workflow = [('train', 1)] # runner 的执行流. [('train', 1)] 代表只有一个执行流，并且这个名为 train 的执行流只执行一次
#evaluation = dict(interval=1, save_best='[email protected]')
evaluation = dict(# 训练期间做验证的设置
	interval=1)# 执行验证的间隔
log_config = dict(# 注册日志钩子的设置
	interval=20, # 打印日志间隔
	hooks=[dict(type='TextLoggerHook')])# 训练期间执行的钩子
# 运行设置
dist_params = dict(backend='nccl') # 建立分布式训练的设置（端口号，多 GPU 通信框架等）
log_level = 'INFO'# 日志等级
#work_dir = './work_dirs/ava/slowfast_kinetics_pretrained_r50_8x8x1_20e_ava_rgb' # 记录当前实验日志和模型权重文件的文件夹
load_from = 'https://download.openmmlab.com/mmaction/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb/slowfast_r50_8x8x1_256e_kinetics400_rgb_20200716-73547d2b.pth' # 从给定路径加载模型作为预训练模型. 这个选项不会用于断点恢复训练
#resume_from = 'work_dirs/my_slowfast_kinetics_pretrained_r50_8x8x1_20e_via3_ rgb/latest.pth'
resume_from = None  # 加载给定路径的模型权重文件作为断点续连的模型, 训练将从该时间点保存的周期点继续进行
find_unused_parameters = False
gpu_ids = range(0, 1)
omnisource = False
module_hooks = []
二，自定义ava数据集加载过程解析

/JN-OpenLib-mmaction2/mmaction/models/heads/roi_head.py
在这里插入图片描述
JN-OpenLib-mmaction2/mmaction/datasets/pipelines/loading.py
在这里插入图片描述
/JN-OpenLib-mmaction2/mmaction/datasets/via3_dataset.py
在这里插入图片描述
【mmaction2 slowfast 行为分析(商用级别)】配置文件和自定义ava数据集加载过程解析

目录

一，配置文件

二，自定义ava数据集加载过程解析

猜你喜欢