DAMO-YOLO trains its own data set and uses onnxruntime inference deployment

DAMO-YOLO trains its own data set and uses onnxruntime inference deployment

DAMO-YOLO is a target detection algorithm developed by the Intelligent Computing Laboratory of Alibaba DAMO Academy that takes into account both speed and accuracy. It maintains high inference speed while maintaining high accuracy.

DAMO-YOLO introduces a series of new technologies based on the YOLO framework and significantly modifies the entire detection framework. Specifically include:

  1. Based on the new detection backbone structure of NAS search, the MAE-NAS method is used to quickly search for network structures suitable for detection tasks, such as MAE-NAS-L35 and MAE-NAS-L45.
  2. The deeper neck structure uses RepGFPN technology to achieve efficient multi-scale feature fusion, improving feature expression capabilities and model performance. The streamlined head structure uses ZeroHead technology, which reduces redundant parameters and calculations and improves model speed and accuracy.
  3. Distillation technology is introduced to further improve the effect. The large model is used as the teacher model and the small model is used as the student model. The generalization ability of the small model is improved through the knowledge distillation method.
  4. DAMO-YOLO also provides efficient training strategies and convenient and easy-to-use deployment tools, which can quickly solve practical problems in industrial implementation. The code and documentation of DAMO-YOLO can be accessed through the following link, or the online experience of DAMO-YOLO on the ModelScope platform can be accessed through the following link.

1.modelscope platform: https://modelscope.cn/models/damo/cv_tinynas_objectdetection_damoyolo/summary
2.github link: https://github.com/tinyvision/DAMO-YOLO
Insert image description here

1. Data preparation

DAMO-YOLO currently supports coco数据集and voc数据集formats. This article recommends using the coco data set format. If you use labelimg to mark the xml file, you can use the following voc2coco.pyscript for one-click conversion (the data needs to be placed in my format, otherwise you need to change the script path in ):

.
├── ./voc2coco.py
├── ./data
│   ├── ./data/Annotations
│   │   ├── ./data/Annotations/0.xml
│   │   ├── ./data/Annotations/1000.xml
│   │   ├── ...
│   ├── ./data/images
│   │   ├── ./data/images/0.jpg
│   │   ├── ./data/images/1000.jpg
│   │   ├── ...

voc2coco.pyScript, please note that you need to change category_setthe category (to your own data set):

import xml.etree.ElementTree as ET
import os
import json
import collections
import random
import shutil


category_set = ['ship']

# 设置随机数种子,可以是任意整数
random_seed = 42

# 设置随机数种子
random.seed(random_seed)


coco_train = dict()
coco_train['images'] = []
coco_train['type'] = 'instances'
coco_train['annotations'] = []
coco_train['categories'] = []

coco_val = dict()
coco_val['images'] = []
coco_val['type'] = 'instances'
coco_val['annotations'] = []
coco_val['categories'] = []

# category_set = dict()
image_set = set()
train_image_id = 1
val_image_id = 200000  # Assuming you have less than 200000 images
category_item_id = 1
annotation_id = 1


def split_list_by_ratio(input_list, ratio=0.8):
    # 计算切分的索引位置
    split_index = int(len(input_list) * ratio)
    # 随机打乱列表
    random.shuffle(input_list)
    # 划分为两个列表并返回
    return input_list[:split_index], input_list[split_index:]

def addCatItem(name):
    '''
    增加json格式中的categories部分
    '''
    global category_item_id
    category_item = collections.OrderedDict()
    category_item['supercategory'] = 'none'
    category_item['id'] = category_item_id
    category_item['name'] = name
    coco_train['categories'].append(category_item)
    coco_val['categories'].append(category_item)
    category_item_id += 1


def addImgItem(file_name, size, img_suffixes, is_train):
    global train_image_id  # 声明变量为全局变量
    global val_image_id  # 声明变量为全局变量
    # global image_id
    if file_name is None:
        raise Exception('Could not find filename tag in xml file.')
    if size['width'] is None:
        raise Exception('Could not find width tag in xml file.')
    if size['height'] is None:
        raise Exception('Could not find height tag in xml file.')
    # image_item = dict()    #按照一定的顺序,这里采用collections.OrderedDict()
    image_item = collections.OrderedDict()
    jpg_name = os.path.splitext(file_name)[0] + img_suffixes
    image_item['file_name'] = jpg_name
    image_item['width'] = size['width']
    image_item['height'] = size['height']
    # image_item['id'] = image_id
    # coco['images'].append(image_item)
    if is_train:
        image_item['id'] = train_image_id
        coco_train['images'].append(image_item)
        image_id = train_image_id
        train_image_id += 1
    else:
        image_item['id'] = val_image_id
        coco_val['images'].append(image_item)
        image_id = val_image_id
        val_image_id += 1
    image_set.add(jpg_name)
    image_id = image_id + 1
    return image_id


def addAnnoItem(object_name, image_id, category_id, bbox, is_train):
    global annotation_id
    # annotation_item = dict()
    annotation_item = collections.OrderedDict()
    annotation_item['segmentation'] = []
    seg = []
    # bbox[] is x,y,w,h
    # left_top
    seg.append(bbox[0])
    seg.append(bbox[1])
    # left_bottom
    seg.append(bbox[0])
    seg.append(bbox[1] + bbox[3])
    # right_bottom
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1] + bbox[3])
    # right_top
    seg.append(bbox[0] + bbox[2])
    seg.append(bbox[1])
    annotation_item['segmentation'].append(seg)
    annotation_item['area'] = bbox[2] * bbox[3]
    annotation_item['iscrowd'] = 0
    annotation_item['image_id'] = image_id
    annotation_item['bbox'] = bbox
    annotation_item['category_id'] = category_id
    annotation_item['id'] = annotation_id
    annotation_item['ignore'] = 0
    annotation_id += 1
    # coco['annotations'].append(annotation_item)
    if is_train:
        coco_train['annotations'].append(annotation_item)
    else:
        coco_val['annotations'].append(annotation_item)

def parseXmlFiles(xml_path, xmllist, img_suffixes, is_train):
    for f in xmllist:
        if not f.endswith('.xml'):
            continue

        bndbox = dict()
        size = dict()
        current_image_id = None
        current_category_id = None
        file_name = None
        size['width'] = None
        size['height'] = None
        size['depth'] = None

        xml_file = os.path.join(xml_path, f)
        print(xml_file)

        tree = ET.parse(xml_file)
        root = tree.getroot()  # 抓根结点元素

        if root.tag != 'annotation':  # 根节点标签
            raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))

        # elem is <folder>, <filename>, <size>, <object>
        for elem in root:
            current_parent = elem.tag
            current_sub = None
            object_name = None

            # elem.tag, elem.attrib,elem.text
            if elem.tag == 'folder':
                continue

            if elem.tag == 'filename':
                file_name = elem.text
                if file_name in category_set:
                    raise Exception('file_name duplicated')

            # add img item only after parse <size> tag
            elif current_image_id is None and file_name is not None and size['width'] is not None:
                if file_name not in image_set:
                    current_image_id = addImgItem(file_name, size, img_suffixes, is_train)  # 图片信息
                    print('add image with {} and {}'.format(file_name, size))
                else:
                    raise Exception('duplicated image: {}'.format(file_name))
                    # subelem is <width>, <height>, <depth>, <name>, <bndbox>
            for subelem in elem:
                bndbox['xmin'] = None
                bndbox['xmax'] = None
                bndbox['ymin'] = None
                bndbox['ymax'] = None

                current_sub = subelem.tag
                if current_parent == 'object' and subelem.tag == 'name':
                    object_name = subelem.text
                    # if object_name not in category_set:
                    #    current_category_id = addCatItem(object_name)
                    # else:
                    # current_category_id = category_set[object_name]
                    current_category_id = category_set.index(object_name) + 1  # index默认从0开始,但是json文件是从1开始,所以+1
                elif current_parent == 'size':
                    if size[subelem.tag] is not None:
                        raise Exception('xml structure broken at size tag.')
                    size[subelem.tag] = int(subelem.text)

                # option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
                for option in subelem:
                    if current_sub == 'bndbox':
                        if bndbox[option.tag] is not None:
                            raise Exception('xml structure corrupted at bndbox tag.')
                        bndbox[option.tag] = int(option.text)

                # only after parse the <object> tag
                if bndbox['xmin'] is not None:
                    if object_name is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_image_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    if current_category_id is None:
                        raise Exception('xml structure broken at bndbox tag')
                    bbox = []
                    # x
                    bbox.append(bndbox['xmin'])
                    # y
                    bbox.append(bndbox['ymin'])
                    # w
                    bbox.append(bndbox['xmax'] - bndbox['xmin'])
                    # h
                    bbox.append(bndbox['ymax'] - bndbox['ymin'])
                    print(
                        'add annotation with {},{},{},{}'.format(object_name, current_image_id - 1, current_category_id,
                                                                 bbox))
                    addAnnoItem(object_name, current_image_id - 1, current_category_id, bbox, is_train)



def copy_img(img_path, file_list, img_suffixes, new_folder):
    # global train_image_id  # 将train_image_id声明为全局变量
    # global val_image_id  # 将val_image_id声明为全局变量

    parent_directory = os.path.dirname(img_path)
    dest_folder = os.path.join(parent_directory, new_folder)
    # 创建目标文件夹
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)

    for each_file in file_list:
        file_prefix = os.path.splitext(each_file)[0]
        old_img_path = os.path.join(img_path, file_prefix + img_suffixes)
        new_img_path = os.path.join(dest_folder, file_prefix + img_suffixes)
        shutil.copy(old_img_path, new_img_path)
        # print(f'已拷贝图片到{new_img_path}')

        # 更新image_id
        # if new_folder == 'train':
        #     train_image_id += 1
        # else:
        #     val_image_id += 1



def check_image_folder_suffix(folder_path):
    # 获取文件夹中所有文件的后缀名,并将它们放入一个集合(set)中
    file_suffixes = set()
    for file_name in os.listdir(folder_path):
        if os.path.isfile(os.path.join(folder_path, file_name)):
            _, file_suffix = os.path.splitext(file_name)
            file_suffixes.add(file_suffix)

    # 检查集合中后缀名的数量,如果数量为1,则所有图片都是同一个后缀,返回后缀名,否则报错
    assert len(file_suffixes) == 1, "图片文件夹中的后缀名不统一"
    return file_suffixes.pop()



if __name__ == '__main__':
    # 存放img和xml的文件夹
    img_path = 'data/images'
    xml_path = 'data/Annotations'
    # 确保img文件夹中只有一种格式
    img_suffixes = check_image_folder_suffix(img_path)
    annotation_folder = os.path.join('data', 'annotations')
    os.makedirs(annotation_folder, exist_ok=True)
    # 保存生成的coco格式的json路径
    train_json_file = os.path.join(annotation_folder, 'instances_train2017.json')
    val_json_file = os.path.join(annotation_folder, 'instances_val2017.json')
    # 添加categories部分
    for categoryname in category_set:
        addCatItem(categoryname)
    # 获取所有的XML文件列表
    xmllist = os.listdir(xml_path)
    # 按8:2的随机比例划分为两个列表
    train_list, val_list = split_list_by_ratio(xmllist, ratio=0.8)
    print(train_list)
    print('--------------------')
    print(val_list)
    # 拷贝图片到新的文件夹
    copy_img(img_path, train_list, img_suffixes, 'train2017')
    copy_img(img_path, val_list, img_suffixes, 'val2017')
    parseXmlFiles(xml_path, train_list, img_suffixes, True)
    parseXmlFiles(xml_path, val_list, img_suffixes, False)
    json.dump(coco_train, open(train_json_file, 'w'))
    json.dump(coco_val, open(val_json_file, 'w'))

After the operation is completed, the train2017/val2017/annotations folder will be generated databelow . Note that the name of the coco data set you prepared needs to be the same as below, otherwise you need to change the demo-yolo source code:

.
├── ./voc2coco.py
├── ./data
│   ├── ./data/train2017
│   │   ├── ./data/train2017/0.jpg
│   │   ├── ./data/train2017/3.jpg
│   │   ├── ...
│   ├── ./data/val2017
│   │   ├── ./data/val2017/5.jpg
│   │   ├── ./data/val2017/16.jpg
│   │   ├── ...
│   ├── ./data/annotations
│   │   ├── ./data/annotations/instances_train2017.json
│   │   ├── ./data/annotations/instances_val2017.json

Finally, move the entire data folder mentioned above to the datasets folder.

2. Install dependencies

Just follow the official installation method, pythonand torchuse a higher version. Pay special attention to export PYTHONPATH=$PWD:$PYTHONPATHthis line of command. It is a Shell command used to add the current working directory (obtained through $PWD) to the python module search path. So that python can find and import custom modules or libraries in the current working directory. If you reopen the terminal, you need to export it again. If you don't want to be so troublesome, you can also write it to the ~/.bashrc file. Remember to change your own path; in addition, don't use it. I always thought it was installed this way python setup.py install. , the result is always reported that the package does not exist. There is also a discussion on this point in the official warehouse https://github.com/tinyvision/DAMO-YOLO/issues/13

(1) Install DAMO-YOLO

git clone https://github.com/tinyvision/DAMO-YOLO.git
cd DAMO-YOLO/
conda create -n DAMO-YOLO python=3.7 -y
conda activate DAMO-YOLO
conda install pytorch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0 cudatoolkit=10.2 -c pytorch
pip install -r requirements.txt
export PYTHONPATH=$PWD:$PYTHONPATH

(2) Install pycocotools

pip install cython;
pip install git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI # for Linux
pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI # for Windows

3. Modify configuration file

The DAMO-YOLO basic network has T/S/M/L and other models, and there are also some lightweight networks. According to the required model size, download the corresponding torch pre-training model. Here we take s as an example: Find the corresponding one
Insert image description here
belowconfigs/damoyolo_tinynasL25_S.py Configuration file, you can modify batch_size/base_lr_per_img/image_max_range according to your own data set. num_classes and class_names under ZeroHead need to be modified to the corresponding number and name of your own data set.
Insert image description here

Pay special attention to this. If you need to use a pre-trained model, you need to add it under the file self.train.finetune_path, followed by the downloaded weight path:
Insert image description here
If the error of the video memory being full is reported during the verification phase, you need to modify damo/config/base.pythe batch_size of the test below.
Insert image description here

4. Training->Verification->Inference->Export

Single card training:

python -m torch.distributed.launch --nproc_per_node=1 tools/train.py -f configs/damoyolo_tinynasL25_S.py

Doka training:

python -m torch.distributed.launch --nproc_per_node=4 tools/train.py -f configs/damoyolo_tinynasL25_S.py

Model verification:

python -m torch.distributed.launch --nproc_per_node=1 tools/eval.py -f configs/damoyolo_tinynasL25_S.py -c workdirs/damoyolo_tinynasL25_S/latest_ckpt.pth --fuse --conf 0.25 --nms 0.45

Model reasoning:

python -m torch.distributed.launch --nproc_per_node=1 tools/demo.py -p datasets/JPEGImages/11.jpg -f configs/damoyolo_tinynasL25_S.py --engine workdirs/damoyolo_tinynasL25_S/latest_ckpt.pth --infer_size 640 640

Model export:

# onnx export 
python tools/converter.py -f configs/damoyolo_tinynasL25_S.py -c workdirs/damoyolo_tinynasL25_S/latest_ckpt.pth --batch_size 1 --img_size 640

# trt export
python tools/converter.py -f configs/damoyolo_tinynasL25_S.py -c workdirs/damoyolo_tinynasL25_S/latest_ckpt.pth --batch_size 1 --img_size 640 --trt --end2end --trt_eval

5.onnxruntime reasoning

Enter the damo-onnx folder and modify the relevant parameters of infer.py, mainly the model path, image path, etc. The code is as follows. The code will traverse all the images in the folder, infer them and save them in the specified folder ( Note that you need to modify damo-onnx/coco_classes.txtthe category name below):

import cv2
import os
import copy
from damoyolo.damoyolo_onnx import DAMOYOLO
import torch

def main():
    # 指定参数
    model_path = 'damoyolo/model/damoyolo_tinynasL35_M.onnx'
    score_th = 0.4
    nms_th = 0.85
    coco_classes = get_coco_classes()

    # 指定输入图片文件夹和输出图片文件夹
    input_folder = '/home/lzj/ssd2t/01.my_algo/damo-yolo/datasets/data/val2017'
    output_folder = 'output_images_folder'

    # 创建输出文件夹
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)


    # 初始化模型
    model = DAMOYOLO(model_path)
    print(f"Model loaded: {
      
      model_path}")
    # 遍历输入文件夹中的图片
    for filename in os.listdir(input_folder):
        if filename.endswith(('.jpg', '.png', '.jpeg')):
            image_path = os.path.join(input_folder, filename)
            image = cv2.imread(image_path)

            # 进行推理
            bboxes, scores, class_ids = model(image, nms_th=nms_th)

            # 绘制结果并保存图片
            result_image = draw_debug(image, score_th, bboxes, scores, class_ids, coco_classes)
            output_path = os.path.join(output_folder, filename)
            cv2.imwrite(output_path, result_image)
            print(f"Output saved: {
      
      output_path}")

def draw_debug(image, score_th, bboxes, scores, class_ids, coco_classes):
    debug_image = copy.deepcopy(image)

    for bbox, score, class_id in zip(bboxes, scores, class_ids):
        x1, y1, x2, y2 = int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])

        if score_th > score:
            continue

        # 绘制边界框
        debug_image = cv2.rectangle(debug_image, (x1, y1), (x2, y2), (0, 255, 0), thickness=2)

        # 显示类别和分数
        score = '%.2f' % score
        text = '%s:%s' % (str(coco_classes[int(class_id)]), score)
        debug_image = cv2.putText(debug_image, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), thickness=2)

    return debug_image

def get_coco_classes():
    with open('coco_classes.txt', 'rt') as f:
        coco_classes = f.read().rstrip('\n').split('\n')
    return coco_classes

if __name__ == '__main__':
    main()

At this point, you have completed training the damo-yolo model on your own data set and used onnx for inference; the complete code has been placed in github: https://github.com/ZhijunLStudio/DAMO-YOLO-ONNX
If you still have tensorrt inference needs, You can refer to this warehouse: https://github.com/hpc203/DAMO-YOLO-detect-onnxrun-cpp-py

References:
1. https://github.com/tinyvision/DAMO-YOLO
2. https://github.com/Kazuhito00/DAMO-YOLO-ONNX-Sample

Guess you like

Origin blog.csdn.net/weixin_45921929/article/details/132788299