MNIST、Fashion-MNIST、SmallNORB三款标准图像数据集

本部分介绍三款标准图像识别数据集,分别为MNIST、Fashion-MNIST、SmallNORB。

目录

一、MNIST

​二、Fashion-MNIST

三、SmallNORB

四、解析代码


一、MNIST

链接:https://pan.baidu.com/s/1onS1F4jWgtbUURymJXFtAg 
提取码:5lfn 


二、Fashion-MNIST

链接:https://pan.baidu.com/s/1NFqZ1SJ__EZJ8boWbx_Mng 
提取码:je02

三、SmallNORB

链接:https://pan.baidu.com/s/1VapMXBTZwRP1vcCsznN7Lg 
提取码:ziek

四、解析代码

code1~code3用于提取三款数据集数据,code4将三种数据集进行集中操作。最后地数据集形式转化为,[图像样本个数,单一图像拉伸为一维向量长度],例如:MNIST解析之后为[60000, 784]。

code1:py for MNIST

# import cPickle
import pickle
import gzip
import numpy as np

def load_data():
    f = gzip.open('D:\dataset\mnist.pkl.gz', 'rb')
    training_data, validation_data, test_data = pickle.load(f, encoding='bytes')
    # print (training_data)
    # print (validation_data)
    # print (test_data)
    f.close()
    return (training_data, validation_data, test_data)

def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

def load_data_wrapper():
    tr_d, va_d, te_d = load_data()
    #training_data
    training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
    training_results = [vectorized_result(y) for y in tr_d[1]]
    training_data = zip(training_inputs, training_results)

    #validation_data
    validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
    validation_results = [vectorized_result(y) for y in va_d[1]]
    validation_data = zip(validation_inputs, validation_results)

    #test_data
    test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
    test_data = zip(test_inputs, te_d[1])

    return (training_data, validation_data, test_data)


code2:py for Fashion-MNIST

import os
import gzip
import numpy as np
# import matplotlib.pyplot as plt

'''
0:T-shirt/top
1:Trouser
2:Pullover
3:Dress
4:Coat
5:Sandal
6:Shirt
7:Sneaker
8:Bag
9:Ankle boot
'''
def vectorized_result(j):
    e = np.zeros((10, 1))
    e[j] = 1.0
    return e

def load_mnist(path, kind='train'):
    labels_path = os.path.join(path,'%s-labels-idx1-ubyte.gz' % kind)
    images_path = os.path.join(path,'%s-images-idx3-ubyte.gz' % kind)
    with gzip.open(labels_path, 'rb') as lbpath:
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8,offset=8)
    with gzip.open(images_path, 'rb') as imgpath:
        images = np.frombuffer(imgpath.read(), dtype=np.uint8,offset=16).reshape(len(labels), 784)
    return images, labels

def fashionmnist_loader():
    x_train, y_train = load_mnist('D:\dataset\\fashionMnist')
    x_test, y_test = load_mnist('D:\dataset\\fashionMnist', 't10k')
    y_tr = np.zeros((len(x_train), 10))
    for i in range(len(x_train)):
        y_tr[i][int(y_train[i])] = 1.0
    return x_train/255.0, y_tr, x_test/255.0, y_test

code3:py for SmallNORB

import numpy as np
import pickle
# from sklearn.decomposition import PCA


def smallnorb_loader():
    f_x_train = open('D:\dataset\smallnorb_x_train_24300x2048.pkl', 'rb')
    x_train = pickle.load(f_x_train, encoding='bytes')
    f_y_train = open('D:\dataset\smallnorb_y_train.pkl', 'rb')
    y_train = pickle.load(f_y_train, encoding='bytes')

    f_x_test = open('D:\dataset\smallnorb_x_test_24300x2048.pkl', 'rb')
    x_test = pickle.load(f_x_test, encoding='bytes')
    f_y_test = open('D:\dataset\smallnorb_y_test.pkl', 'rb')
    y_test = pickle.load(f_y_test, encoding='bytes')

    return x_train, y_train, x_test, y_test

code4:py for get_Dataset

import numpy as np
import mnist_loader
import fashionmnist_loader
import smallNORBpkl_loader

def get_Dataset(name='mnist'):
    if name == 'mnist':
        t, v, tt = mnist_loader.load_data_wrapper()
        validation_data = list(v)
        training_data = list(t) + validation_data
        testing_data = list(tt)

        len_t = len(training_data)
        len_tdi = len(training_data[0][0])
        len_tl = len(training_data[0][1])
        x_train = np.zeros((len_t, len_tdi))
        y_train = np.zeros((len_t, len_tl))
        for i in range(len_t):
            x_train[i] = np.array(training_data[i][0]).transpose()
            y_train[i] = np.array(training_data[i][1]).transpose()

        len_tt = len(testing_data)
        x_test = np.zeros((len_tt, len_tdi))
        y_test = np.zeros(len_tt)
        for i in range(len_tt):
            x_test[i] = np.array(testing_data[i][0]).transpose()
            y_test[i] = testing_data[i][1]
        return x_train, y_train, x_test, y_test
    elif name == 'fashion':
        return fashionmnist_loader.fashionmnist_loader()
    elif name == 'smallnorb':
        x_train, y_tr, x_test, y_test = smallNORBpkl_loader.smallnorb_loader()
        length = len(y_tr)
        y_train = np.zeros((length, 5))
        for i in range(length):
            y_train[i][int(y_tr[i])] = 1.0
        return x_train, y_train, x_test, y_test
    else:
        pass

使用code:

import get_Dataset

x_train, y_train, x_test, y_test = get_Dataset.get_Dataset(name='mnist')
发布了331 篇原创文章 · 获赞 135 · 访问量 11万+

猜你喜欢

转载自blog.csdn.net/Triple_WDF/article/details/103343946