paddledet トレーニング回転ターゲット検出 ppyoloe-r トレーニング独自のデータセット

1. データ変換
labelme2coco、本来は labelme でマークされた点を opencv で 4 つの座標に変換します

# encoding=utf-8
import argparse
import collections
import datetime
import glob
import json
import os
import os.path as osp
import sys
import uuid
import cv2
import imgviz
import numpy as np

import labelme

try:
    import pycocotools.mask
except ImportError:
    print("Please install pycocotools:\n\n    pip install pycocotools\n")
    sys.exit(1)


def main():
    input_dir='G:/customer/visionary_s_3d_dete/zhixi'
    output_dir='dataset/zhixi'
    os.makedirs(output_dir)
    os.makedirs(osp.join(output_dir, "JPEGImages"))

    data = dict(
        images=[
            # license, url, file_name, height, width, date_captured, id
        ],
        annotations=[
            # segmentation, area, iscrowd, image_id, bbox, category_id, id
        ],
        categories=[
            # supercategory, id, name
        ],
    )
    data["categories"].append(
        dict(supercategory=None, id=0, name='sack',)
    )

    out_ann_file = osp.join(output_dir, "annotations.json")
    label_files = glob.glob(osp.join(input_dir, "*.json"))
    for image_id, filename in enumerate(label_files):
        print("Generating dataset from:", filename)

        label_file = labelme.LabelFile(filename=filename)

        base = osp.splitext(osp.basename(filename))[0]
        out_img_file = osp.join(output_dir, "JPEGImages", base + ".jpg")

        img = labelme.utils.img_data_to_arr(label_file.imageData)
        imgviz.io.imsave(out_img_file, img)
        data["images"].append(
            dict(
                license=0,
                url=None,
                file_name=osp.relpath(out_img_file, osp.dirname(out_ann_file)),
                height=img.shape[0],
                width=img.shape[1],
                date_captured=None,
                id=image_id,
            )
        )

        masks = {}  # for area
        segmentations = collections.defaultdict(list)  # for segmentation
        for shape in label_file.shapes:
            points = shape["points"]
            label = shape["label"]
            group_id = shape.get("group_id")
            shape_type = shape.get("shape_type")
            mask = labelme.utils.shape_to_mask(
                img.shape[:2], points, shape_type
            )

            if group_id is None:
                group_id = uuid.uuid1()

            instance = (label, group_id)

            if instance in masks:
                masks[instance] = masks[instance] | mask
            else:
                masks[instance] = mask

            if shape_type == "rectangle":
                (x1, y1), (x2, y2) = points
                x1, x2 = sorted([x1, x2])
                y1, y2 = sorted([y1, y2])
                points = [x1, y1, x2, y1, x2, y2, x1, y2]
            if shape_type == "circle":
                (x1, y1), (x2, y2) = points
                r = np.linalg.norm([x2 - x1, y2 - y1])
                # r(1-cos(a/2))<x, a=2*pi/N => N>pi/arccos(1-x/r)
                # x: tolerance of the gap between the arc and the line segment
                n_points_circle = max(int(np.pi / np.arccos(1 - 1 / r)), 12)
                i = np.arange(n_points_circle)
                x = x1 + r * np.sin(2 * np.pi / n_points_circle * i)
                y = y1 + r * np.cos(2 * np.pi / n_points_circle * i)
                points = np.stack((x, y), axis=1).flatten().tolist()
            elif shape_type=="polygon":
                points=np.float32(points)
                rect = cv2.minAreaRect(points)  # 最小外接矩形
                points = cv2.boxPoints(rect).flatten()
                points=points.tolist()
                # points = np.asarray(box).flatten().tolist()

            segmentations[instance].append(points)
        segmentations = dict(segmentations)

        for instance, mask in masks.items():
            mask = np.asfortranarray(mask.astype(np.uint8))
            mask = pycocotools.mask.encode(mask)
            area = float(pycocotools.mask.area(mask))
            bbox = pycocotools.mask.toBbox(mask).flatten().tolist()

            data["annotations"].append(
                dict(
                    id=len(data["annotations"]),
                    image_id=image_id,
                    category_id=0,
                    segmentation=segmentations[instance],
                    area=area,
                    bbox=bbox,
                    iscrowd=0,
                )
            )
    with open(out_ann_file, "w") as f:
        json.dump(data, f)


if __name__ == "__main__":
    main()

2. data.yml ファイルを変更します。
このパスは実際の状況に応じて変更する必要があります。機能しない場合は、デバッグして問題がどこにあるかを確認してください。

metric: RBOX
num_classes: 15

TrainDataset:
  !COCODataSet
    image_dir:
    anno_path: annotations/annotations.json
    dataset_dir: ../dataset/zhixi
    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']

EvalDataset:
  !COCODataSet
    image_dir:
    anno_path: annotations/annotations.json
    dataset_dir: ../dataset/zhixi
    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']

TestDataset:
  !ImageFolder
    anno_path: annotations/annotations.json
    dataset_dir: ../dataset/zhixi

3.训练报错
RuntimeError: (PreconditionNotMet) Paddle が依存するサードパーティのダイナミックライブラリ (cublas64_102.dll;cublas64_10.dll) が正しく構成されていません。(エラーコードは126)

解決策: パスは C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0\bin です。

bin パス内の cublas64_100.dll の名前を cublas64_10.dll に変更します。

bin パスで、cusolver64_100 の名前を cusolver64_10 に変更します
—————————————————
著作権表示: この記事は、CC 4.0 BY に従って、CSDN ブロガー「Earl Li's Finger Sand」のオリジナル記事です。 -SA 著作権契約。転載する場合は、元の情報源リンクとこの声明を添付してください。
元のリンク: https://blog.csdn.net/m0_37690102/article/details/123474171

paddledet トレーニング回転ターゲット検出 ppyoloe-r トレーニング独自のデータセット

おすすめ