图像识别数据集处理——python 文件操作

版权声明:转载请说明来源,谢谢 https://blog.csdn.net/wsp_1138886114/article/details/84029488

通过 excel 获取数据集

数据集放在一个总文件夹中,excel中对图像标识做了记录,我们需要通过读取excel中的图像文件名来获取相应的图像。
实例文件结构如下:

源目录
 ├── 二级目录1
 │   ├──example_01.jpg
 │   └──example_02.jpg
 ├── 二级目录2
 │   ├──example_03.jpg
 │   ├──example_04.jpg
 │   └──example_05.jpg
目标目录

excel 文件:img_list.xlsx
在这里插入图片描述

# -*- coding: utf-8 -*-
import xlrd
import os
import shutil

def read_excel(excel_path):
    workbook = xlrd.open_workbook(excel_path)
    sheet = workbook.sheet_by_index(0)
    nrows = sheet.nrows
    list1 = []
    for i in range(nrows):
        list0 = str(sheet.row_values(i)[0])
        list1.append(list0[-14:])
    return list1


def file_and_forder(original_path):
    folder_filename_list = []
    for root_dir,dir_name,filenames in os.walk(original_path):
        for filename in filenames:
            folder_filename_list.append(root_dir+"/"+filename)
    return folder_filename_list


def copy_img_move(original_path,Target_path,excel_path):
    list1 = read_excel(excel_path)
    folder_filename_list = file_and_forder(original_path)
    for filename_single in folder_filename_list:
        print("filename_single",filename_single)
        if filename_single[-14:] in list1:
            shutil.copy(filename_single,Target_path)
    print("处理完成!")


if __name__ == '__main__':
    copy_img_move("./源目录","./目标目录","img_list.xlsx")

通过 json 获取数据集

import json
import shutil
import os
from glob import glob
from tqdm import tqdm

# 该json文件中种类,先生成59个文件夹,用于放置图像文件
try:
    for i in range(0,59):
        os.mkdir("./data/train/" + str(i))
except:
    pass
    
file_train = json.load(open("./data/labels/train_annotations.json","r",encoding="utf-8"))
file_val = json.load(open("./data/labels/validation_annotations.json","r",encoding="utf-8"))

file_list = file_train + file_val

for file in tqdm(file_list):
    filename = file["image_id"]
    origin_path = "./data/images/" + filename
    ids = file["disease_class"]
    if ids ==  44:
        continue
    if ids == 45:
        continue
    if ids > 45:
        ids = ids -2
    save_path = "./data/train/" + str(ids) + "/"
    shutil.copy(origin_path,save_path)


猜你喜欢

转载自blog.csdn.net/wsp_1138886114/article/details/84029488
今日推荐