跟着官方demo一起做
参考自:
github-facebookresearch detectron2
detectron2 官方教程
detectron2 官方demo
beginer tutorials
install detectron2
参考之前的文章:windows安装detectron2
安装完成,开始下一步,学会如何运行detectron2,完成第一个demo。
跟着官方demo一起做
1. Run a pre-train detectron2 model
先导入头文件。
import cv2
import os
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
下载一张COCO数据集的图片。
im = cv2.imread("1.jpg")
cv2.imshow("image1",im)
cv2.waitKey(0)
加载配置文件cfg
model_zoo
相当与一系列 现成模型的集合。
其中,WEIGHTS
如果没有,会自动下载。这里可能速度会很慢。可以点这里 。下载之后,文件会存在C:\Users\your_name\.torch\iopath_cache\detectron2\COCO-InstanceSegmentation\mask_rcnn_R_50_FPN_3x\137849600
下。
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
# 权重,从model_zoo中获得,如果没有,自动会下载
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
outputs = predictor(im)
可视化输出
v = Visualizer(im[:, :, ::-1],
MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow('image2',out.get_image()[:, :, ::-1])
cv2.waitKey(0)
完整代码如下:
import cv2
import os
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
if __name__ == "__main__":
im = cv2.imread("1.jpg")
cv2.imshow("image1",im)
cv2.waitKey(0)
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set threshold for this model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
outputs = predictor(im)
print(outputs["instances"].pred_classes)
print(outputs["instances"].pred_boxes)
v = Visualizer(im[:, :, ::-1],
MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow('image2',out.get_image()[:, :, ::-1])
cv2.waitKey(0)
2. Train on a custom dataset
这里,我们需要
- 训练模型
- 自定义数据集
下载新的数据集。
这个数据集只有一个分类。我们需要做的是ballon
的识别。
先导入头文件
import os
import json
import random
import cv2
from detectron2.engine.defaults import DefaultPredictor
from detectron2.model_zoo.model_zoo import get
import numpy as np
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog,MetadataCatalog
from detectron2.utils.visualizer import Visualizer,ColorMode
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator,inference_on_dataset
from detectron2.data import build_detection_test_loader
img_dir = "your_dir"
class_names = "balloon"
数据集加载
接着,我们需要把balloon
数据集 转换成detectron2
的格式,并在上面register
。
这里用到一个函数DatasetCatalog.register(dataset_name,function)
function
被调用之后,会返回一个包含所有图片对象的list
dataset_dicts
每个对象包含file_name
,height
,width
等属性。具体看这里。我们需要做的就是,构建、封装这样的对象。
def get_ballon_dicts(img_dir):
# 找到标记文件
json_file = os.path.join(img_dir,"via_region_data.json")
# 打开并读入
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
# print(list(imgs_anns.values())[0])
"""
imgs_anns value example:{
"fileref" : ""
"size" : "1115004"
"filename" : "34020010494_e5cb88e1c4_k.jpg"
"base64_img_data" : ""
"file_attributes" : {}
"regions" : {
# 只有一个区域
"0" : {
"shape_attributes" : {
"name" : "polygon"
"all_points_x" : [...]
"all_points_y" : [...]
}
"region_attributes" : {}
}
}
}
"""
# 枚举 文件字典中的 values
for idx, v in enumerate(imgs_anns.values()):
record = {
}
# 图片的名字
filename = os.path.join(img_dir,v["filename"])
# 图片的高,宽
height, width = cv2.imread(filename=filename).shape[:2]
# 保存到record
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
# ballon 标注的 区域 不止一个
annos = v["regions"]
objs = []
# 枚举 区域字典的 每一个[key,value]
# 这里 key 代表 第i个 区域(从0开始计算)
for _, anno in annos.items():
# 只有两个值 不是region_attributes 就是 shape_attributes
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
# 读出x坐标 与 y坐标,组合成点p,即poly = [[x1,y1],[x2,y2],...,[xn,yn]]
poly = [(x+0.5,y+0.5) for x,y in zip(px,py)]
# 对于poly中的每一个x,对于x中的每一个p
# 即,poly = [[1,2],[4,5],...,[9,10]] - > poly = [1,2,4,5...,9,10]
poly = [p for x in poly for p in x]
obj = {
# bounding box:左上角,右下角
"bbox": [
np.min(px),
np.min(py),
np.max(px),
np.max(py)
],
"bbox_mode": BoxMode.XYXY_ABS,
# 只有一个类
"segmentation": [poly],
"category_id": 0
}
objs.append(obj)
# end-for annos.items() 结束遍历 每个区域
record["annotations"] = objs
dataset_dicts.append(record)
# end-for imgs_anno.values() 结局遍历 每张图片
return dataset_dicts
数据集加载测试
我们使用def test_data_loading()
函数来验证,数据集是否正常加载。
def test_data_loading():
for d in ["train","val"]:
DatasetCatalog.register(class_names+"_"+d,lambda dir=d:get_ballon_dicts(img_dir+dir))
MetadataCatalog.get(class_names+"_"+d).set(thing_classes=[class_names])
balloon_metadata = MetadataCatalog.get(class_names+"_train")
dataset_dicts = get_ballon_dicts(img_dir+"train")
# 随机选取3个图片验证
for d in random.sample(dataset_dicts,3):
img = cv2.imread(d["file_name"])
visualizer = Visualizer(img[:,:,::-1],metadata=balloon_metadata, scale=0.5)
out = visualizer.draw_dataset_dict(d)
cv2.imshow(d["file_name"],out.get_image()[:,:,::-1])
cv2.waitKey(0)
训练
接着,我们就要开始训练模型了。
注意要使用GPU,即cfg.MODEL.DEVICE = "cuda" # cpu or cuda
。
CPU某些操作可能不支持,导致程序假死。
训练前,记得需要注册数据集。
# 注册数据集
for d in ["train","val"]:
DatasetCatalog.register(class_names+"_"+d,lambda dir=d:get_ballon_dicts(img_dir+dir))
MetadataCatalog.get(class_names+"_"+d).set(thing_classes=[class_names])
def train():
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = (class_names+"_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.DEVICE = "cuda" # cpu or cuda
cfg.SOLVER.IMS_PER_BATCH = 2
# 调整图片大小,太大GPU带不动
cfg.INPUT.MIN_SIZE_TRAIN = (64,64)
# 学习率
cfg.SOLVER.BASE_LR = 0.00025
# 迭代数
cfg.SOLVER.MAX_ITER = 300
# 不使用【学习率衰减】
cfg.SOLVER.STEPS = []
# batch_size = 128 默认为512
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 8
# 只有一个分类
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
os.makedirs(cfg.OUTPUT_DIR,exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
评估模型
def evaluate():
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
# 加载权重
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR,"model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.DEVICE = "cuda" # cpu or cuda
cfg.SOLVER.IMS_PER_BATCH = 2
# 只有一个分类
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
predictor = DefaultPredictor(cfg)
dataset_dicts = get_ballon_dicts(img_dir+"val")
balloon_metadata = MetadataCatalog.get(class_names+"_val")
# 随机选择几个样本
for d in random.sample(dataset_dicts,3):
im = cv2.imread(d["file_name"])
outputs = predictor(im)
v = Visualizer(im[:,:,::-1],
metadata=balloon_metadata,
scale=0.5,
instance_mode=ColorMode.IMAGE_BW)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow(d["file_name"],out.get_image()[:,:,::-1])
cv2.waitKey(0)
使用COCO数据集标准评估
这里可能有点错误。仅参考即可。
def evaluate_coco():
"""
We can also evaluate its performance using AP metric implemented in COCO API.
This gives an AP of ~70. Not bad!
"""
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = (class_names+"_train",)
cfg.DATASETS.TEST = ()
# 加载权重
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR,"model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.DEVICE = "cuda" # cpu or cuda
cfg.SOLVER.IMS_PER_BATCH = 2
# 只有一个分类
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
trainer = DefaultTrainer(cfg)
evaluator = COCOEvaluator(dataset_name=class_names+"_val",cfg=cfg,distributed=True,output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg,class_names+"_val")
print(inference_on_dataset(trainer.model,val_loader,evaluator))
完整代码
import os
import json
import random
import cv2
from detectron2.engine.defaults import DefaultPredictor
from detectron2.model_zoo.model_zoo import get
import numpy as np
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog,MetadataCatalog
from detectron2.utils.visualizer import Visualizer,ColorMode
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.evaluation import COCOEvaluator,inference_on_dataset
from detectron2.data import build_detection_test_loader
img_dir = "your_dir"
class_names = "balloon"
def get_ballon_dicts(img_dir):
# 找到标记文件
json_file = os.path.join(img_dir,"via_region_data.json")
# 打开并读入
with open(json_file) as f:
imgs_anns = json.load(f)
dataset_dicts = []
# print(list(imgs_anns.values())[0])
"""
imgs_anns value example:{
"fileref" : ""
"size" : "1115004"
"filename" : "34020010494_e5cb88e1c4_k.jpg"
"base64_img_data" : ""
"file_attributes" : {}
"regions" : {
# 只有一个区域
"0" : {
"shape_attributes" : {
"name" : "polygon"
"all_points_x" : [...]
"all_points_y" : [...]
}
"region_attributes" : {}
}
}
}
"""
# 枚举 文件字典中的 values
for idx, v in enumerate(imgs_anns.values()):
record = {
}
# 图片的名字
filename = os.path.join(img_dir,v["filename"])
# 图片的高,宽
height, width = cv2.imread(filename=filename).shape[:2]
# 保存到record
record["file_name"] = filename
record["image_id"] = idx
record["height"] = height
record["width"] = width
# ballon 标注的 区域 不止一个
annos = v["regions"]
objs = []
# 枚举 区域字典的 每一个[key,value]
# 这里 key 代表 第i个 区域(从0开始计算)
for _, anno in annos.items():
# 只有两个值 不是region_attributes 就是 shape_attributes
assert not anno["region_attributes"]
anno = anno["shape_attributes"]
px = anno["all_points_x"]
py = anno["all_points_y"]
# 读出x坐标 与 y坐标,组合成点p,即poly = [[x1,y1],[x2,y2],...,[xn,yn]]
poly = [(x+0.5,y+0.5) for x,y in zip(px,py)]
# 对于poly中的每一个x,对于x中的每一个p
# 即,poly = [[1,2],[4,5],...,[9,10]] - > poly = [1,2,4,5...,9,10]
poly = [p for x in poly for p in x]
obj = {
# bounding box:左上角,右下角
"bbox": [
np.min(px),
np.min(py),
np.max(px),
np.max(py)
],
"bbox_mode": BoxMode.XYXY_ABS,
# 只有一个类
"segmentation": [poly],
"category_id": 0
}
objs.append(obj)
# end-for annos.items() 结束遍历 每个区域
record["annotations"] = objs
dataset_dicts.append(record)
# end-for imgs_anno.values() 结局遍历 每张图片
return dataset_dicts
def test_data_loading():
for d in ["train","val"]:
DatasetCatalog.register(class_names+"_"+d,lambda dir=d:get_ballon_dicts(img_dir+dir))
MetadataCatalog.get(class_names+"_"+d).set(thing_classes=[class_names])
balloon_metadata = MetadataCatalog.get(class_names+"_train")
dataset_dicts = get_ballon_dicts(img_dir+"train")
for d in random.sample(dataset_dicts,1):
img = cv2.imread(d["file_name"])
visualizer = Visualizer(img[:,:,::-1],metadata=balloon_metadata, scale=0.5)
out = visualizer.draw_dataset_dict(d)
cv2.imshow(d["file_name"],out.get_image()[:,:,::-1])
cv2.waitKey(0)
def train():
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = (class_names+"_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.DEVICE = "cuda" # cpu or cuda
cfg.SOLVER.IMS_PER_BATCH = 2
# 调整图片大小,太大GPU带不动
cfg.INPUT.MIN_SIZE_TRAIN = (64,64)
# 学习率
cfg.SOLVER.BASE_LR = 0.00025
# 迭代数
cfg.SOLVER.MAX_ITER = 300
# 不使用【学习率衰减】
cfg.SOLVER.STEPS = []
# batch_size = 128 默认为512
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 8
# 只有一个分类
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
os.makedirs(cfg.OUTPUT_DIR,exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
def evaluate():
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
# 加载权重
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR,"model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.DEVICE = "cuda" # cpu or cuda
cfg.SOLVER.IMS_PER_BATCH = 2
# 只有一个分类
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
predictor = DefaultPredictor(cfg)
dataset_dicts = get_ballon_dicts(img_dir+"val")
balloon_metadata = MetadataCatalog.get(class_names+"_val")
# 随机选择几个样本
for d in random.sample(dataset_dicts,3):
im = cv2.imread(d["file_name"])
outputs = predictor(im)
v = Visualizer(im[:,:,::-1],
metadata=balloon_metadata,
scale=0.5,
instance_mode=ColorMode.IMAGE_BW)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow(d["file_name"],out.get_image()[:,:,::-1])
cv2.waitKey(0)
def evaluate_coco():
"""
We can also evaluate its performance using AP metric implemented in COCO API.
This gives an AP of ~70. Not bad!
"""
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = (class_names+"_train",)
cfg.DATASETS.TEST = ()
# 加载权重
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR,"model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.DEVICE = "cuda" # cpu or cuda
cfg.SOLVER.IMS_PER_BATCH = 2
# 只有一个分类
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
trainer = DefaultTrainer(cfg)
evaluator = COCOEvaluator(dataset_name=class_names+"_val",cfg=cfg,distributed=True,output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg,class_names+"_val")
print(inference_on_dataset(trainer.model,val_loader,evaluator))
if __name__ == "__main__":
# 注册数据集
for d in ["train","val"]:
DatasetCatalog.register(class_names+"_"+d,lambda dir=d:get_ballon_dicts(img_dir+dir))
MetadataCatalog.get(class_names+"_"+d).set(thing_classes=[class_names])
# test_data_loading()
# 训练
train()
# 预测
# evaluate()
# evaluate_coco()