-
Training is required first, and the training will automatically generate: latest.pth weight file
-
Generate ".pkl" file from weight file;
Take the following faster_rcnn
as an example, –out is only the address of the generated weight file, and result is the name of the generated pkl file;
python ./tools/test.py ./configs/faster_rcnn_r50_fpn_1x.py ./work_dirs/faster_rcnn_r50_fpn_1x/latest.pth --out=result.pkl
- Draw the confusion matrix,
take the faster_rcnn algorithm as an example
# !python tools/analysis_tools/confusion_matrix.py -h
!python tools/analysis_tools/confusion_matrix.py \
configs/faster_rcnn/faster-rcnn_r50_fpn_2x_voc_cc.py \
work_dirs/faster-rcnn_r50_fpn_2x_voc/result_epoch_24.pkl \
work_dirs/faster-rcnn_r50_fpn_2x_voc \
--show
Reference website: https://zhuanlan.zhihu.com/p/607576946
- Calculate FLOPs and Params
python tools/get_flops.py ${
CONFIG_FILE} [--shape ${
INPUT_SHAPE}]
Among them, "CONFIG_FILE" represents the currently used algorithm, and "INPUT_SHAPE" represents the size of the input image. Every time I input an error, it is very uncomfortable.
case:
python tools/get_flops.py tools/analysis_tools/get_flops.py configs/faster_cnn/faster_cnn_r50_fpn_1x.coco.py
Because every time I input: [–shape ${INPUT_SHAPE}], an error is reported, so I didn’t input it, and changed the default value in the py file as input.
I found a way later, as follows:
python tools/get_flops.py tools/analysis_tools/get_flops.py configs/faster_cnn/faster_cnn_r50_fpn_1x.coco.py --shape 1000 608
Among them, "–shape 1000 608" represents the size of the input image, and "3, 1000, 608" represents 3 channels
References: https://zhuanlan.zhihu.com/p/607576946
- Output FPS, I use the official code provided, but there are always problems, probably because my data set is in VOC format, so I re-find a code on the Internet to replace the official "benchmark.py" code, the content as follows:
import argparse
import time
import torch
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
from mmdet.datasets import (build_dataloader, build_dataset,
replace_ImageToTensor)
from mmdet.models import build_detector
def parse_args():
parser = argparse.ArgumentParser(description='MMDet benchmark a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--log-interval', default=1, help='interval of logging')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# import modules from string list.
if cfg.get('custom_imports', None):
from mmcv.utils import import_modules_from_strings
import_modules_from_strings(**cfg['custom_imports'])
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# build the dataloader
samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
if samples_per_gpu > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=False,
shuffle=False)
# build the model and load checkpoint
cfg.model.train_cfg = None
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
load_checkpoint(model, args.checkpoint, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_conv_bn(model)
model = MMDataParallel(model, device_ids=[0])
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 5
pure_inf_time = 0
# benchmark with 2000 image and take the average
for i, data in enumerate(data_loader):
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(return_loss=False, rescale=True, **data)
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
if i >= num_warmup:
pure_inf_time += elapsed
if (i + 1) % args.log_interval == 0:
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Done image [{
i + 1:<3}/ 2000], fps: {
fps:.1f} img / s')
if (i + 1) == 2000:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
print(f'Overall fps: {
fps:.1f} img / s')
break
if __name__ == '__main__':
main()
Just remember to copy and paste.
As shown in the figure below, remember to configure the two files "config" and "checkpoint".
The running code is as follows:
python tools/analysis_tools/benchmark_new.py configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py configs/faster_rcnn_log_faster_rcnn_r50_fpn_1x_coco/latest.pth --fuse-conv-bn
The running results are as follows:
The above problem is to output each picture, if you need the average value, please use the following code:
import argparse
import time
import torch
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel
from mmcv.runner import load_checkpoint, wrap_fp16_model
from mmdet.datasets import (build_dataloader, build_dataset,
replace_ImageToTensor)
from mmdet.models import build_detector
def parse_args():
parser = argparse.ArgumentParser(description='MMDet benchmark a model')
parser.add_argument('config', help='test config file path')
parser.add_argument('checkpoint', help='checkpoint file')
parser.add_argument(
'--log-interval', default=1, help='interval of logging')
parser.add_argument(
'--fuse-conv-bn',
action='store_true',
help='Whether to fuse conv and bn, this will slightly increase'
'the inference speed')
parser.add_argument(
'--cfg-options',
nargs='+',
action=DictAction,
help='override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. If the value to '
'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
'Note that the quotation marks are necessary and that no white space '
'is allowed.')
args = parser.parse_args()
return args
def main():
args = parse_args()
cfg = Config.fromfile(args.config)
if args.cfg_options is not None:
cfg.merge_from_dict(args.cfg_options)
# import modules from string list.
if cfg.get('custom_imports', None):
from mmcv.utils import import_modules_from_strings
import_modules_from_strings(**cfg['custom_imports'])
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
torch.backends.cudnn.benchmark = True
cfg.model.pretrained = None
cfg.data.test.test_mode = True
# build the dataloader
samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)
if samples_per_gpu > 1:
# Replace 'ImageToTensor' to 'DefaultFormatBundle'
cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)
dataset = build_dataset(cfg.data.test)
data_loader = build_dataloader(
dataset,
samples_per_gpu=1,
workers_per_gpu=cfg.data.workers_per_gpu,
dist=False,
shuffle=False)
# build the model and load checkpoint
cfg.model.train_cfg = None
model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))
fp16_cfg = cfg.get('fp16', None)
if fp16_cfg is not None:
wrap_fp16_model(model)
load_checkpoint(model, args.checkpoint, map_location='cpu')
if args.fuse_conv_bn:
model = fuse_conv_bn(model)
model = MMDataParallel(model, device_ids=[0])
model.eval()
# the first several iterations may be very slow so skip them
num_warmup = 5
pure_inf_time = 0
ii = 0
sum = 0
sum1 = 0
# benchmark with 2000 image and take the average
count = 0
sum = 0
for i, data in enumerate(data_loader):
# print(i)
# print(data)
# print(0000)
torch.cuda.synchronize()
start_time = time.perf_counter()
with torch.no_grad():
model(return_loss=False, rescale=True, **data)
torch.cuda.synchronize()
elapsed = time.perf_counter() - start_time
#if i >= num_warmup:
pure_inf_time += elapsed
#if (i + 1) % args.log_interval == 0:
count += 1
print('count:',count)
fps = (i + 1 - num_warmup) / pure_inf_time
print('fps:', fps)
sum += fps
print('sum:', sum)
# print(fps)
print(sum/count)
#print(f'Done image [{i + 1:<3}/ 2000], fps: {fps:.1f} img / s')
sys.stdout = Logger(sys.stdout) # record log
# sys.stderr = Logger(sys.stderr) # record error
#print('fps: ', sum/count, 'img/s')
if (i + 1) == 2000:
pure_inf_time += elapsed
fps = (i + 1 - num_warmup) / pure_inf_time
# print(fps+'00000000')
print(f'Overall fps: {
fps:.1f} img / s')
# print(".1f")
break
import sys, os, time
sys.setrecursionlimit(3000)
class Logger(object):
def __init__(self, stream=sys.stdout):
output_dir = "./config/" # folder
if not os.path.exists(output_dir):
os.makedirs(output_dir)
#log_name = '{}.txt'.format(time.strftime('%Y-%m-%d-%H-%M',time.localtime(time.time())))
log_name_time = time.strftime('%Y-%m-%d-%H-%M-%S',time.localtime(time.time()))
log_name = log_name_time + ".txt"
filename = os.path.join(output_dir, log_name)
self.terminal = stream
self.log = open(filename, 'a+')
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
pass
if __name__ == '__main__':
main()
5. Output json result file
This command will output “results.bbox.json” file.
python tools/test.py [configs_file] [pth] --eval-options "jsonfile_prefix=results_name" --eval bbox
Since my data set itself is in the format of VOC, but I want to generate three evaluation indicators of APs, APm, and APl. Remember, these three evaluation indicators are only available in the coco data set format. After a long time of tossing, if you want to generate these indicators with this mmdetectio, you must first convert the format of the data set to the coco format, otherwise you will always report errors.
When we convert, we use the py file that comes with mmdetection. The code is as follows:
python tools/dataset_converters/pascal_voc.py ../../../data/devkit ../../../data/coco
Mainly by using voc's xml file to generate coco's json tag file. Then, copy the picture to the coco dataset.
As shown in the figure below, one is in voc format and the other is in coco format. After conversion, it is shown in the figure below.
python tools/dataset_converters/pascal_voc.py data/VOCdevkit -o data/coco --out-format coco
After the conversion we use tools/test.py
or directly usemmdet/datasets/coco.py
As a reminder, tools/test.py
the conversion is performed while testing, and mmdet/datasets/coco.py
it is a direct test, so it is necessary to generate a ".pkl" file first, so it will be much faster. The "pkl" generation method needs to use the "pth" file to generate, you can refer to the first question.
If your method is still in voc format, you need to set the method to coco format, otherwise it will not correspond, so you 或者
will call this coco dataset when you call tools/test.py mmdet/datasets/coco.py`, which is what I said before In that format, remember that in theory you only need to change the "test" set, and the others don't need to be changed. Therefore, when you call the test set to test the three evaluation indicators you need, APs, APm, and APl.
mmdet/datasets/coco.py
Remember to change the "CLASSES" of this file, it seems to be wrong, it is not mmdet/datasets/coco.py
a file to run. However, this place is better off changing. The method seems to need to call
CLASSES = ('fire', 'yanwu',)
Let's take faster_r_cnn as an example. The modified coco method is as follows. Remember that we only modified the test set, because we are only testing and will not verify.
model = dict(
type='FasterRCNN',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0.0, 0.0, 0.0, 0.0],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))),
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100)))
dataset_type = 'VOCDataset'
data_root = '/home/yuan3080/桌面/detection_paper_6/mmdetection-master1/mmdetection-master_yanhuo/data/VOCdevkit/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=3,
dataset=dict(
type='VOCDataset',
ann_file=[
'/home/yuan3080/桌面/detection_paper_6/mmdetection-master1/mmdetection-master_yanhuo/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
'/home/yuan3080/桌面/detection_paper_6/mmdetection-master1/mmdetection-master_yanhuo/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt'
],
img_prefix=[
'/home/yuan3080/桌面/detection_paper_6/mmdetection-master1/mmdetection-master_yanhuo/data/VOCdevkit/VOC2007/',
'/home/yuan3080/桌面/detection_paper_6/mmdetection-master1/mmdetection-master_yanhuo/data/VOCdevkit/VOC2007/'
],
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
])),
val=dict(
type='VOCDataset',
ann_file=
'/home/yuan3080/桌面/detection_paper_6/mmdetection-master1/mmdetection-master_yanhuo/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt',
img_prefix=
'/home/yuan3080/桌面/detection_paper_6/mmdetection-master1/mmdetection-master_yanhuo/data/VOCdevkit/VOC2007/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
# 1
type='CocoDataset',
ann_file= 'data/coco/voc07_test.json',
img_prefix="data/VOCdevkit/",
#
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1000, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
evaluation = dict(interval=1, metric='bbox') #
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[8, 11])
runner = dict(type='EpochBasedRunner', max_epochs=100)
checkpoint_config = dict(interval=20)
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
auto_scale_lr = dict(enable=False, base_batch_size=32)
work_dir = 'configs/faster_rcnn_log_faster_rcnn_r50_fpn_1x_coco/'
auto_resume = False
gpu_ids = [0]
Remember to observe the path and type below. It seems that there is no need to change the others. If you need to change it, you can try more.
The code running under the "eval_metric.py" file is as follows:
python tools/analysis_tools/eval_metric.py configs/faster_rcnn_log_faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco1.py result.pkl --eval bbox
The resulting effect is shown below.
The code to run under the "test.py" file is as follows:
python tools/test.py configs/faster_rcnn_log_faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco1.py configs/faster_rcnn_log_faster_rcnn_r50_fpn_1x_coco/latest.pth --eval bbox
The effect is as follows.