利用SAM实现自动标注
目录
一、下载安装
1.1 下载SAM-Tool和SAM
git clone https://github.com/zhouayi/SAM-Tool.git
git clone https://github.com/facebookresearch/segment-anything.git
1.2 下载SAM模型文件
wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth
1.3 安装SAM
进入SAM文件夹:cd segment-anything
安装SAM:pip install -e .
二、配置项目
(1)将SAM工具中的helper文件夹移动到SAM主文件夹中:cp -r helpers/ …/segment-anything/
(2)构建数据文件夹格式为dataset_path/images和dataset_path/embeddings(images里用来放图片)
三、提取信息
(1)提取图片embedding:python helpers/extract_embeddings.py --checkpoint-path sam_vit_h_4b8939.pth --dataset-folder 你的数据集地址 --device cuda
(2)运行完在embeddings下会有对应的npy文件
四、获得SAM onnx文件
(1)首先环境中要有onnx和onnxruntime(-gpu)
(2)检测图像的文件(可调整后面的图片高/宽):python helpers/generate_onnx.py --checkpoint-path sam_vit_h_4b8939.pth --onnx-model-path ./sam_onnx.onnx --orig-im-size 720 1280
(3)运行完会有对应的sam_onnx.onnx文件,将其移到SAM工具主文件夹中:cp sam_onnx.onnx …/SAM-Tool/
五、标注
(1)opencv库要删掉opencv-python,改用headless版的:pip install opencv-python-headless
(2)调用标注程序命令为(后面可改标注类别,用逗号隔开):python segment_anything_annotator.py --onnx-model-path sam_onnx.onnx --dataset-path /home/dxfcv/workspace/sunsirui/label/segment-anything/dataset --categories cement,metal,plastics
(3)标注框出现后开始标注,首先点击目标,然后添加对象,最后选定类别,接下来下一张,标完所有就保存,最后会在数据文件夹下出现annotations.json的coco格式文件
(4)查看标注文件:python cocoviewer.py -i …/segment-anything/dataset/ -a …/segment-anything/dataset/annotations.json
六、格式转换
6.1 json转voc
import json
import xml.etree.ElementTree as ET
import os
jsonPath = "/home/dxfcv/workspace/sunsirui/label/dataset/annotations.json"#改这个输入json
vocPath = "/home/dxfcv/workspace/sunsirui/label/dataset/voc"//# 改这个输出文件夹
with open(jsonPath, 'r') as f:
data = json.load(f)
info = data["info"]
images = data["images"]
annotations = data["annotations"]
categories = data["categories"]
# 对每个图像处理
for img_data in images:
# 创建 VOC XML 文件
xml_file = ET.Element('annotation')
ET.SubElement(xml_file, 'folder').text = 'VOC'
ET.SubElement(xml_file, 'filename').text = os.path.basename(img_data["file_name"])
source = ET.SubElement(xml_file, 'source')
ET.SubElement(source, 'database').text = 'My Database'
ET.SubElement(source, 'annotation').text = 'COCO'
ET.SubElement(source, 'image').text = 'flickr'
size = ET.SubElement(xml_file, 'size')
ET.SubElement(size, 'width').text = str(img_data['width'])
ET.SubElement(size, 'height').text = str(img_data['height'])
ET.SubElement(size, 'depth').text = '3'
ET.SubElement(xml_file, 'segmented').text = '0'
# 查找该图像的所有标注框
bbox_list = []
category_ids = []
for ann_data in annotations:
if ann_data['image_id'] == img_data['id']:
bbox = ann_data['bbox']
bbox_list.append(bbox)
category_ids.append(ann_data['category_id'])
# 对每个标注框处理
for i in range(len(bbox_list)):
bbox = bbox_list[i]
category_id = category_ids[i]
# 转换 COCO 格式到 VOC 格式
x_min = bbox[0]
y_min = bbox[1]
x_max = bbox[0] + bbox[2]
y_max = bbox[1] + bbox[3]
class_name = categories[category_id]['name']
# 创建 VOC XML 标注
obj = ET.SubElement(xml_file, 'object')
ET.SubElement(obj, 'name').text = class_name
ET.SubElement(obj, 'pose').text = 'Unspecified'
ET.SubElement(obj, 'truncated').text = '0'
ET.SubElement(obj, 'difficult').text = '0'
bndbox = ET.SubElement(obj, 'bndbox')
ET.SubElement(bndbox, 'xmin').text = str(int(x_min))
ET.SubElement(bndbox, 'ymin').text = str(int(y_min))
ET.SubElement(bndbox, 'xmax').text = str(int(x_max))
ET.SubElement(bndbox, 'ymax').text = str(int(y_max))
# 将 XML 文件保存到 VOC 目标文件夹中
xml_str = ET.tostring(xml_file)
with open(os.path.join(vocPath, os.path.basename(img_data["file_name"]).replace('.jpg', '.xml')), 'wb') as f:
f.write(xml_str)
6.2 json转yolo
import os
import json
from tqdm import tqdm
import argparse
def convert(size, box):
'''
size: 图片的宽和高(w,h)
box格式: x,y,w,h
返回值:x_center/image_width y_center/image_height width/image_width height/image_height
'''
dw = 1. / (size[0])
dh = 1. / (size[1])
x = box[0] + box[2] / 2.0
y = box[1] + box[3] / 2.0
w = box[2]
h = box[3]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--json_file', default='/home/dxfcv/workspace/sunsirui/label/dataset/annotations.json',
type=str, help="coco file path")
parser.add_argument('--save_dir', default='/home/dxfcv/workspace/sunsirui/label/dataset/labels', type=str,
help="where to save .txt labels")
arg = parser.parse_args()
data = json.load(open(arg.json_file, 'r'))
# 如果存放txt文件夹不存在,则创建
if not os.path.exists(arg.save_dir):
os.makedirs(arg.save_dir)
id_map = {
}
# 解析目标类别,也就是 categories 字段,并将类别写入文件 classes.txt 中
with open(os.path.join(arg.save_dir, 'classes.txt'), 'w') as f:
for i, category in enumerate(data['categories']):
f.write(f"{
category['name']}\n")
id_map[category['id']] = i
for img in tqdm(data['images']):
# 解析 images 字段,分别取出图片文件名、图片的宽和高、图片id
# filename = os.path.join(arg.image_file , img["file_name"])
filename = img["file_name"].split('/')[1]
img_width = img["width"]
img_height = img["height"]
img_id = img["id"]
head, tail = os.path.splitext(filename)
# txt文件名,与对应图片名只有后缀名不一样
txt_name = head + ".txt"
f_txt = open(os.path.join(arg.save_dir, txt_name), 'w')
for ann in data['annotations']:
if ann['image_id'] == img_id:
box = convert((img_width, img_height), ann["bbox"])
# 写入txt,共5个字段
f_txt.write("%s %s %s %s %s\n" % (
id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
f_txt.close()