PPOCRLabel标注格式和icdar2015格式互转

1、PPOCRLabel标注格式转icdar2015格式

# -*- coding : UTF-8 -*-
# @file   : conver_json_icdar.py
# @Time   : 2021/4/9 11:24
# @Author : wmz
 
import os
import json
 
 
def json_2_icdar(js_path, ic_path):
    with open(js_path, 'r', encoding='utf-8') as f:
        for line in f.readlines():
            print(line)
            content = line.split('\t')
            print(content[0])
            txt_file = str(content[0]).replace('jpg', 'txt')
            dst_file = os.path.join(ic_path, txt_file)
            # write file
            file_lineinfo = open(dst_file, 'w', encoding='utf-8')
            list_dict = json.loads(content[1])
            nsize = len(list_dict)
            print(nsize)
            for i in range(nsize):
                print(list_dict[i])
                lin = list_dict[i]
                info = lin['transcription']
                points = lin['points']
                points = [int(y) for x in points for y in x]
                pts = ','.join(map(str, points))
                lineinfo = pts + ',' + info + '\n'
                file_lineinfo.write(lineinfo)
            file_lineinfo.close()
 
 
if __name__ == "__main__":
    src_path = r"C:\Users\WT\Desktop\hkb-bz\Label.txt"
    dst_path = r"C:\Users\WT\Desktop\hkb-bz"
    json_2_icdar(src_path, dst_path)

2、icdar2015数据集转换成paddleOCR标注数据格式

import os
import argparse
import json
 
 
def gen_rec_label(input_path, out_label):
    with open(out_label, 'w') as out_file:
        with open(input_path, 'r') as f:
            for line in f.readlines():
                tmp = line.strip('\n').replace(" ", "").split(',')
                img_path, label = tmp[0], tmp[1]
                label = label.replace("\"", "")
                out_file.write(img_path + '\t' + label + '\n')
 
 
def gen_det_label(root_path, input_dir, out_label):
    with open(out_label, 'w',encoding='utf-8') as out_file:
        for label_file in os.listdir(input_dir):
            img_path = root_path + label_file[3:-4] + ".jpg"
            label = []
            print(label_file)
            with open(os.path.join(input_dir, label_file), "r",encoding='UTF-8-sig') as f:
                for line in f.readlines():
                    print(line)
                    #tmp = str(line).replace("\\xef\\xbb\\xbf", "").split(',')
                    #tmp = str(line).strip("\\r\\n").replace("\\xef\\xbb\\xbf", "").split(',')
                    tmp=str(line).strip("\n\r").split(',')
                    print(tmp)
 
                    points = tmp[:8]
                    print(len(points))
                    s = []
                    for i in range(0, len(points), 2):
 
                        b = points[i:i + 2]
                        print(b)
                        b = [int(float(t)) for t in b]
                        s.append(b)
                    result = {
    
    "transcription": tmp[8], "points": s}
                    label.append(result)
 
            out_file.write(img_path + '\t' + json.dumps(
                label, ensure_ascii=False) + '\n')
 
 
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--mode',
        type=str,
        default="rec",
        help='Generate rec_label or det_label, can be set rec or det')
    parser.add_argument(
        '--root_path',
        type=str,
        default="text_localization/ch4_test_images/",
        help='The root directory of images.Only takes effect when mode=det ')
    parser.add_argument(
        '--input_path',
        type=str,
        default="icdar2015/text_localization/Challenge4_Test_Task1_GT",
        help='Input_label or input path to be converted')
    parser.add_argument(
        '--output_label',
        type=str,
        default="text_localization/test_icdar2015_label.txt",
        help='Output file name')
    args = parser.parse_args()
    if args.mode == "rec":
        print("Generate rec label")
        gen_rec_label(args.input_path, args.output_label)
    elif args.mode == "det":
        gen_det_label(args.root_path, args.input_path, args.output_label)

参考:
https://blog.csdn.net/qq_41672428/article/details/110427385
https://blog.csdn.net/juluwangriyue/article/details/115565665

猜你喜欢

转载自blog.csdn.net/hhhhhhhhhhwwwwwwwwww/article/details/124094061