CIFAR-10 および CIFAR-100 データセットを「ラベル フォルダー」に処理します (ImageFolder で読み取り可能)。

次の python スクリプトをデータセット ソース ファイルに入れて実行します。すると、取得したデータ形式はImageFolderの可読形式になります(trainフォルダとvalフォルダが分かれており、各フォルダにはラベルにちなんで名付けられた#classesフォルダがあります)

CIFAR-10

import pickle
import numpy as np
import os
import cv2

def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

loc_1 = './train'
loc_2 = './val'
if not os.path.exists(loc_1):
    os.mkdir(loc_1)
if not os.path.exists(loc_2):
    os.mkdir(loc_2)

def unzip():
    meta = unpickle('batches.meta')
    label_names = meta[b'label_names']
    for i in label_names:
        dir1 = loc_1 + '/' + i.decode()
        dir2 = loc_2 + '/' + i.decode()
        if not os.path.exists(dir1):
            os.mkdir(dir1)
        if not os.path.exists(dir2):
            os.mkdir(dir2)

    for i in range(1,6):
        data_name = 'data_batch_' + str(i)
        data = unpickle(data_name)
        for j in range (10000):
            img = np.reshape(data[b'data'][j], (3, 32, 32))
            img = np.transpose(img, (1, 2, 0))
            label = label_names[data[b'labels'][j]].decode()
            img_name = label + '_' + str(i*10000 + j) + '.jpg'
            img_save_path = loc_1 + '/' + label + '/' + img_name
            cv2.imwrite(img_save_path, img)
        print(data_name + ' finished')

    test_data = unpickle('test_batch')
    for i in range (10000):
            img = np.reshape(test_data[b'data'][i], (3, 32, 32))
            img = np.transpose(img, (1, 2, 0))
            label = label_names[test_data[b'labels'][i]].decode()
            img_name = label + '_' + str(i) + '.jpg'
            img_save_path = loc_2 + '/' + label + '/' + img_name
            cv2.imwrite(img_save_path, img)
    print('test_batch finished')

if __name__ == '__main__':
    unzip()

変換前:
ここに画像の説明を挿入

変換後:
ここに画像の説明を挿入

CIFAR-100

import pickle
import numpy as np
import os
import cv2

def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')  # encoding='latin1'
    return dict 

loc_1 = './train'
loc_2 = './val'
if not os.path.exists(loc_1):
    os.mkdir(loc_1)
if not os.path.exists(loc_2):
    os.mkdir(loc_2)

def unzip():
    meta = unpickle('meta')
    print(meta.keys())
    label_names = meta[b'fine_label_names']
    for i in label_names:
        dir1 = loc_1 + '/' + i.decode()
        dir2 = loc_2 + '/' + i.decode()
        if not os.path.exists(dir1):
            os.mkdir(dir1)
        if not os.path.exists(dir2):
            os.mkdir(dir2)

    i = 1
    data_name = 'train'
    data = unpickle(data_name)
    for j in range (50000):
        img = np.reshape(data[b'data'][j], (3, 32, 32))
        img = np.transpose(img, (1, 2, 0))
        label = label_names[data[b'fine_labels'][j]].decode()
        img_name = label + '_' + str(i * 10000 + j) + '.jpg'
        img_save_path = loc_1 + '/' + label + '/' + img_name
        cv2.imwrite(img_save_path, img)
    print(data_name + ' finished')

    test_data = unpickle('test')
    for i in range (10000):
            img = np.reshape(test_data[b'data'][i], (3, 32, 32))
            img = np.transpose(img, (1, 2, 0))
            label = label_names[test_data[b'fine_labels'][i]].decode()
            img_name = label + '_' + str(i) + '.jpg'
            img_save_path = loc_2 + '/' + label + '/' + img_name
            cv2.imwrite(img_save_path, img)
    print('test_batch finished')

if __name__ == '__main__':
    unzip()

変換前:
ここに画像の説明を挿入

変換後:
ここに画像の説明を挿入

おすすめ

転載: blog.csdn.net/qq_16763983/article/details/127154817