上一篇文章已经实现了本地数据集的导入函数,现在用这个函数来导入数据并可视化:
导入数据:
def load_data(data_folder):
files = [
'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'
]
paths = []
for fname in files:
paths.append(os.path.join(data_folder,fname))
with gzip.open(paths[0], 'rb') as lbpath:
y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[1], 'rb') as imgpath:
x_train = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28)
with gzip.open(paths[2], 'rb') as lbpath:
y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8)
with gzip.open(paths[3], 'rb') as imgpath:
x_test = np.frombuffer(
imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28)
return (x_train, y_train), (x_test, y_test)
这是上一篇文章实现的函数
可视化:
import os
import gzip
import numpy as np
import matplotlib.pyplot as plt
(train_images, train_labels), (test_images, test_labels) = load_data('/home/peterwang/Downloads/Mnist')
fig = plt.figure()
for i in range(15):
plt.subplot(3,5,i+1)
plt.imshow(train_images[i], cmap='Greys') # 使用灰度图
plt.title("Label:" + str(train_labels[i])) # 设置标签
# 删除x, y轴标记
plt.xticks([])
plt.yticks([])
plt.show()
这里取前15张图片,可视化结果如图:
参考B站教学视频:添加链接描述