Using SAM to Realize Automatic Labeling

Using SAM to Realize Automatic Labeling


1. Download and install

1.1 Download SAM-Tool and SAM

git clone https://github.com/zhouayi/SAM-Tool.git
git clone https://github.com/facebookresearch/segment-anything.git

1.2 Download SAM model file

wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

1.3 install SAM

Enter the SAM folder: cd segment-anything
Install SAM: pip install -e .

2. Configuration items

(1) Move the helper folder in the SAM tool to the SAM main folder: cp -r helpers/ …/segment-anything/ (
2) Build the data folder format as dataset_path/images and dataset_path/embeddings (used in images to put pictures)

3. Extract information

(1) Extract image embedding: python helpers/extract_embeddings.py --checkpoint-path sam_vit_h_4b8939.pth --dataset-folder Your dataset address --device cuda
(2) After running, there will be a corresponding npy file under embeddings

Fourth, get the SAM onnx file

(1) First, there must be onnx and onnxruntime(-gpu) in the environment
(2) The file for detecting images (the height/width of the following images can be adjusted): python helpers/generate_onnx.py --checkpoint-path sam_vit_h_4b8939.pth --onnx -model-path ./sam_onnx.onnx --orig-im-size 720 1280
(3) There will be a corresponding sam_onnx.onnx file after running, move it to the main folder of the SAM tool: cp sam_onnx.onnx …/SAM -Tool/

5. Labeling

(1) The opencv library needs to delete opencv-python and use the headless version instead: pip install opencv-python-headless
(2) The command to call the annotation program is (you can change the annotation category later, separated by commas): python segment_anything_annotator.py --onnx-model-path sam_onnx.onnx --dataset-path /home/dxfcv/workspace/sunsirui/label/segment-anything/dataset --categories cement,metal,plastics
(3) Start labeling after the label box appears, first Click the target, then add the object, finally select the category, and then the next one, save everything after marking, and finally the coco format file of annotations.json will appear under the data folder
(4) View the annotation file: python cocoviewer.py -i .../segment-anything/dataset/ -a .../segment-anything/dataset/annotations.json

6. Format conversion

6.1 convert json to voc

import json
import xml.etree.ElementTree as ET
import os

jsonPath = "/home/dxfcv/workspace/sunsirui/label/dataset/annotations.json"#改这个输入json
vocPath = "/home/dxfcv/workspace/sunsirui/label/dataset/voc"//# 改这个输出文件夹
with open(jsonPath, 'r') as f:
    data = json.load(f)

info = data["info"]
images = data["images"]
annotations = data["annotations"]
categories = data["categories"]

# 对每个图像处理
for img_data in images:
    # 创建 VOC XML 文件
    xml_file = ET.Element('annotation')
    ET.SubElement(xml_file, 'folder').text = 'VOC'
    ET.SubElement(xml_file, 'filename').text = os.path.basename(img_data["file_name"])
    source = ET.SubElement(xml_file, 'source')
    ET.SubElement(source, 'database').text = 'My Database'
    ET.SubElement(source, 'annotation').text = 'COCO'
    ET.SubElement(source, 'image').text = 'flickr'
    size = ET.SubElement(xml_file, 'size')
    ET.SubElement(size, 'width').text = str(img_data['width'])
    ET.SubElement(size, 'height').text = str(img_data['height'])
    ET.SubElement(size, 'depth').text = '3'
    ET.SubElement(xml_file, 'segmented').text = '0'

    # 查找该图像的所有标注框
    bbox_list = []
    category_ids = []
    for ann_data in annotations:
        if ann_data['image_id'] == img_data['id']:
            bbox = ann_data['bbox']
            bbox_list.append(bbox)
            category_ids.append(ann_data['category_id'])

    # 对每个标注框处理
    for i in range(len(bbox_list)):
        bbox = bbox_list[i]
        category_id = category_ids[i]
        # 转换 COCO 格式到 VOC 格式
        x_min = bbox[0]
        y_min = bbox[1]
        x_max = bbox[0] + bbox[2]
        y_max = bbox[1] + bbox[3]
        class_name = categories[category_id]['name']
        # 创建 VOC XML 标注
        obj = ET.SubElement(xml_file, 'object')
        ET.SubElement(obj, 'name').text = class_name
        ET.SubElement(obj, 'pose').text = 'Unspecified'
        ET.SubElement(obj, 'truncated').text = '0'
        ET.SubElement(obj, 'difficult').text = '0'
        bndbox = ET.SubElement(obj, 'bndbox')
        ET.SubElement(bndbox, 'xmin').text = str(int(x_min))
        ET.SubElement(bndbox, 'ymin').text = str(int(y_min))
        ET.SubElement(bndbox, 'xmax').text = str(int(x_max))
        ET.SubElement(bndbox, 'ymax').text = str(int(y_max))

    # 将 XML 文件保存到 VOC 目标文件夹中
    xml_str = ET.tostring(xml_file)
    with open(os.path.join(vocPath, os.path.basename(img_data["file_name"]).replace('.jpg', '.xml')), 'wb') as f:
        f.write(xml_str)

6.2 Convert json to yolo


import os
import json
from tqdm import tqdm
import argparse
 
 
def convert(size, box):
    '''
    size: 图片的宽和高(w,h)
    box格式: x,y,w,h
    返回值:x_center/image_width y_center/image_height width/image_width height/image_height
    '''
 
    dw = 1. / (size[0])
    dh = 1. / (size[1])
    x = box[0] + box[2] / 2.0
    y = box[1] + box[3] / 2.0
    w = box[2]
    h = box[3]
 
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)
 
 
if __name__ == '__main__':
 
    parser = argparse.ArgumentParser()
    parser.add_argument('--json_file', default='/home/dxfcv/workspace/sunsirui/label/dataset/annotations.json',
                        type=str, help="coco file path")
    parser.add_argument('--save_dir', default='/home/dxfcv/workspace/sunsirui/label/dataset/labels', type=str,
                        help="where to save .txt labels")
    arg = parser.parse_args()
 
    data = json.load(open(arg.json_file, 'r'))
 
    # 如果存放txt文件夹不存在,则创建
    if not os.path.exists(arg.save_dir):
        os.makedirs(arg.save_dir)
 
    id_map = {
    
    }
 
    # 解析目标类别,也就是 categories 字段,并将类别写入文件 classes.txt 中
    with open(os.path.join(arg.save_dir, 'classes.txt'), 'w') as f:
        for i, category in enumerate(data['categories']):
            f.write(f"{
      
      category['name']}\n")
            id_map[category['id']] = i
 
    for img in tqdm(data['images']):
 
        # 解析 images 字段,分别取出图片文件名、图片的宽和高、图片id
        # filename = os.path.join(arg.image_file , img["file_name"])
        filename =  img["file_name"].split('/')[1]
        img_width = img["width"]
        img_height = img["height"]
        img_id = img["id"]
        head, tail = os.path.splitext(filename)
 
        # txt文件名,与对应图片名只有后缀名不一样
        txt_name = head + ".txt"
        f_txt = open(os.path.join(arg.save_dir, txt_name), 'w')
 
        for ann in data['annotations']:
            if ann['image_id'] == img_id:
                box = convert((img_width, img_height), ann["bbox"])
 
                # 写入txt,共5个字段
                f_txt.write("%s %s %s %s %s\n" % (
                    id_map[ann["category_id"]], box[0], box[1], box[2], box[3]))
 
        f_txt.close()

Guess you like

Origin blog.csdn.net/qq_37249793/article/details/131956211