1、CIFAR数据集准备

Step1：CIFAR-10数据集下载

在GitHub上找的压缩包：-VGG19-CIFAR10-/数据集/CIFAR10/cifar-10-batches-py at master · shiyadong123/-VGG19-CIFAR10- · GitHub

Step2：解压

我的了解：data_batch_1到data_batch_5以及test_batch文件都包含一万张分辨率为32 * 32的图片信息

data_batch为字典，包含四个字典键：
b’batch_label’
b’labels’ 标签
b’data’ 图片像素值
b’filenames’
如果希望观察其具体内容及形式，可在下面代码中加个print(dict)试试。

Step3：提前准备好存放图片的文件夹

类似这样↓
在这里插入图片描述
分为test和train两个文件夹，每个文件夹里各包含十个子文件夹，命名为1~10。
（养成文件名和文件夹名不含中文而且取怪一点的好习惯，否则可能会喜提ERROR）

Step4：文件转图片

把文件夹里的六个文件转化为图片↓

import numpy as np  
import cv2  
  
import matplotlib.pyplot as plt  
  
def unpickle(file):#打开cifar-10文件的其中一个batch（一共5个batch）  
    import pickle  
    with open("cifar-10-batches-py/"+file, 'rb') as fo:  
        dict = pickle.load(fo, encoding='bytes')  
    return dict  
  
#每解压一个文件都改一遍路径名，
data_batch=unpickle("data_batch_1")#打开cifar-10文件的data_batch_1  

cifar_data=data_batch[b'data']#这里每个字典键的前面都要加上b  
cifar_label=data_batch[b'labels']  
cifar_data=np.array(cifar_data)#把字典的值转成array格式，方便操作  
print(cifar_data.shape)#(10000,3072)  
cifar_label=np.array(cifar_label)  
print(cifar_label.shape)#(10000,)  
  
label_name=['airplane','automobile','brid','cat','deer','dog','frog','horse','ship','truck']  

# 将rgb矩阵转换为可显示图片并保存
def imwrite_images(k):#k的值可以选择1-10000范围内的值  
    for i in range(k):  
        image=cifar_data[i]  
        image=image.reshape(-1,1024)  
        r=image[0,:].reshape(32,32)#红色分量  
        g=image[1,:].reshape(32,32)#绿色分量  
        b=image[2,:].reshape(32,32)#蓝色分量  
        img=np.zeros((32,32,3))  
        #RGB还原成彩色图像  
        img[:,:,0]=r  
        img[:,:,1]=g  
        img[:,:,2]=b  
  
        #plt.imshow(img.astype("uint8"))  
        #plt.imshow(img/255)        #plt.show()  
        print(i)  
        cv2.imwrite("picture//train//"+str(cifar_label[i])+"//"+str(label_name[cifar_label[i]])+"_"+str(i+40000)+".jpg",img)  
	    print("第%d张图片保存完毕"%k)  
imwrite_images(10000)

欧了.

2、AlexNet实现

Step1：训练模型

根据自己条件设置参数
我先尝试用的BATCH_SIZE=50，好像有点小，得到五个模型，最高准确率是第五次53%。
后来BATCH_SIZE=1000时显示内存不足，就用的500。
用了四个小时，得到20个model
最后准确率稳定在79%左右，最高的是第十九次，结果已没保存忘了。
在这里插入图片描述

import torch  
#import os  
from torch import nn  
from torch.nn import functional as F  
#from torch.autograd import Variable  
import matplotlib.pyplot as plt  
from torchvision.datasets import ImageFolder  
import torch.optim as optim  
import torch.utils.data  
#from PIL import Image  
import torchvision.transforms as transforms  
  
# 超参数设置  
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
EPOCH = 20  
BATCH_SIZE = 500  
  
# 网络模型构建  
class AlexNet(nn.Module):  
    def __init__(self, num_classes=10):  
        super(AlexNet, self).__init__()  
        self.features = nn.Sequential(  
            nn.Conv2d(3, 48, kernel_size=11),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
            nn.Conv2d(48, 128, kernel_size=5, padding=2),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
            nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),  
            nn.ReLU(inplace=True),  
            nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),  
            nn.ReLU(inplace=True),  
            nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
        )  
        self.classifier = nn.Sequential(  
            nn.Linear(6 * 6 * 128, 2048),  
            nn.ReLU(inplace=True),  
            nn.Dropout(0.5),  
            nn.Linear(2048, 2048),  
            nn.ReLU(inplace=True),  
            nn.Dropout(),  
            nn.Linear(2048, num_classes),  
        )  
  
    def forward(self, x):  
        x = self.features(x)  
        x = torch.flatten(x, start_dim=1)  
        x = self.classifier(x)  
  
        return x  
  
# 归一化，常用以下数据，均值和标准差  
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  
  
# 训练集  
path_1 = r'D://code//5000//ML//picture//train//'  
trans_1 = transforms.Compose([  
    transforms.Resize((65,65)),  
    transforms.ToTensor(),  
    normalize,  
])  
  
# 数据集  
train_set = ImageFolder(root=path_1, transform=trans_1)  
# 数据加载器  
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE,  
                                           shuffle=True, num_workers=0)  
  
# 测试集  
path_2 = r'D://code//5000//ML//picture//test//'  
trans_2 = transforms.Compose([  
    transforms.Resize((65,65)),  
    transforms.ToTensor(),  
    normalize,  
])  
test_data = ImageFolder(root=path_2, transform=trans_2)  
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE,  
                                          shuffle=True, num_workers=0)  
  
# 验证集  
path_3 = r'D://code//5000//ML//picture//test//'  
valid_data = ImageFolder(root=path_2, transform=trans_2)  
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=BATCH_SIZE,  
                                           shuffle=True, num_workers=0)  
  
# 定义模型  
model = AlexNet().to(DEVICE)  
# 优化器的选择  
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)  #传统梯度下降算法，lr学习率  
  
  
# 训练过程  
def train_model(model, device, train_loader, optimizer, epoch):  
    train_loss = 0  
    model.train()  
    for batch_index, (data, label) in enumerate(train_loader):  
        data, label = data.to(device), label.to(device)  
        optimizer.zero_grad()  
        output = model(data)  
        loss = F.cross_entropy(output, label)  
        loss.backward()  
        optimizer.step()  
        if batch_index % 300 == 0:  
            train_loss = loss.item()  
            print('Train Epoch:{}\ttrain loss:{:.6f}'.format(epoch, loss.item()))  
  
    return train_loss  
  
# 测试部分的函数  
def test_model(model, device, test_loader):  
    model.eval()  
    correct = 0.0  
    test_loss = 0.0  
  
    # 不需要梯度的记录  
    with torch.no_grad():  
        for data, label in test_loader:  
            data, label = data.to(device), label.to(device)  
            output = model(data)  
            test_loss += F.cross_entropy(output, label).item()  
            pred = output.argmax(dim=1)  
            correct += pred.eq(label.view_as(pred)).sum().item()  
        test_loss /= len(test_loader.dataset)  
        print('Test_average_loss:{:.4f},Accuracy:{:3f}\n'.format(  
            test_loss, 100 * correct / len(test_loader.dataset)  
        ))  
        acc = 100 * correct / len(test_loader.dataset)  
  
        return test_loss, acc  
  
# 训练开始  
list = []  
Train_Loss_list = []  
Valid_Loss_list = []  
Valid_Accuracy_list = []  
  
# Epoc的调用  
for epoch in range(6,26):  
    # 训练集训练  
    train_loss = train_model(model, DEVICE, train_loader, optimizer, epoch)  
    Train_Loss_list.append(train_loss)  
    torch.save(model, r'save_model\model%s.pth' % epoch)  
  
    # 验证集进行验证  
    test_loss, acc = test_model(model, DEVICE, valid_loader)  
    Valid_Loss_list.append(test_loss)  
    Valid_Accuracy_list.append(acc)  
    list.append(test_loss)  
  
# 验证集的test_loss  
  
min_num = min(list)  
min_index = list.index(min_num)  
  
print('model%s' % (min_index + 1))  
print('验证集最高准确率： ')  
print('{}'.format(Valid_Accuracy_list[min_index]))  
  
# 取最好的进入测试集进行测试  
model = torch.load(r'save_model\model%s.pth' % (min_index + 1))  
model.eval()  
  
accuracy = test_model(model, DEVICE, test_loader)  
print('测试集准确率')  
print('{}%'.format(accuracy))  
  
# 绘图  
# 字体设置，字符显示  
plt.rcParams['font.sans-serif'] = ['SimHei']  
plt.rcParams['axes.unicode_minus'] = False  
  
# 坐标轴变量含义  
x1 = range(0, EPOCH)  
y1 = Train_Loss_list  
y2 = Valid_Loss_list  
y3 = Valid_Accuracy_list  
  
# 图表位置  
plt.subplot(221)  
# 线条  
plt.plot(x1, y1, '-o')  
# 坐标轴批注  
plt.ylabel('训练集损失')  
plt.xlabel('轮数')  
  
plt.subplot(222)  
plt.plot(x1, y2, '-o')  
plt.ylabel('验证集损失')  
plt.xlabel('轮数')  
  
plt.subplot(212)  
plt.plot(x1, y3, '-o')  
plt.ylabel('验证集准确率')  
plt.xlabel('轮数')  
  
# 显示  
plt.show()

Step2：pth转onnx

将准确率最高的model24.pth转化为model24.onnx

import torch  
import torch.nn as nn  
import torchvision  
  
class AlexNet(nn.Module):  
    def __init__(self, num_classes=10):  
        super(AlexNet, self).__init__()  
        self.features = nn.Sequential(  
            nn.Conv2d(3, 48, kernel_size=11),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
            nn.Conv2d(48, 128, kernel_size=5, padding=2),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
            nn.Conv2d(128, 192, kernel_size=3, stride=1, padding=1),  
            nn.ReLU(inplace=True),  
            nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),  
            nn.ReLU(inplace=True),  
            nn.Conv2d(192, 128, kernel_size=3, stride=1, padding=1),  
            nn.ReLU(inplace=True),  
            nn.MaxPool2d(kernel_size=3, stride=2),  
        )  
        self.classifier = nn.Sequential(  
            nn.Linear(6 * 6 * 128, 2048),  
            nn.ReLU(inplace=True),  
            nn.Dropout(0.5),  
            nn.Linear(2048, 2048),  
            nn.ReLU(inplace=True),  
            nn.Dropout(),  
            nn.Linear(2048, num_classes),  
        )  
  
    def forward(self, x):  
        x = self.features(x)  
        x = torch.flatten(x, start_dim=1)  
        x = self.classifier(x)  
  
        return x  
  
  
dummy_input = torch.randn(1, 3, 65,65)  
model = torch.load(r'D:\code\5000\Alexnet\save_model\model24.pth',map_location='cpu')  
model.eval()  
input_names = ["input"]  
output_names = ["output"]  
torch.onnx.export(model,  
                  dummy_input,  
                  "model24.onnx",  
                  verbose=True,  
                  input_names=input_names,  
                  output_names=output_names)

Step3：调用onnx预测

Example 1

尝试输入一张图片进行预测，随便选用的airplane_44.jpg

import cv2 as cv  
import numpy as np  
  
def img_process(image):  
    mean = np.array([0.5,0.5,0.5],dtype=np.float32).reshape(1,1,3)  
    std = np.array([0.5,0.5,0.5],dtype=np.float32).reshape(1,1,3)  
    new_img = ((image/255. -mean)/std).astype(np.float32)  
    return new_img  
  
img = cv.imread("D://code//5000//ML//picture//test//0//airplane_44.jpg")  
cv.imshow("img",img)  
img_t = cv.resize(img,(65,65))    #将图片改为模型适用的尺寸  
img_t = img_process(img_t)  
#img_t = np.transpose(img_t,[2,0,1])  
#img_t = img_t[np.newaxis,:]   #扩展一个新维度  
  
layerNames = ["output"]   # 这里的输出的名称应该于前面的转模型时候定义的一致  
blob=cv.dnn.blobFromImage(img_t,scalefactor=1.0,swapRB=True,crop=False)  # 将image转化为 1x3x64x64 格式输入模型中  
net = cv.dnn.readNetFromONNX("model24.onnx")  
net.setInput(blob)  
outs = net.forward(layerNames)  
print(outs)

输出

C:\Users\acer\.conda\envs\pytorch\python.exe D:/code/5000/Alexnet/predict.py
(array([[ 4.9695697 , -2.0295908 ,  1.7662119 , -1.8614476 ,  1.3672656 ,
        -3.35308   , -1.7841126 , -1.9462793 ,  2.6365623 ,  0.49384755]],
      dtype=float32),)

进程已结束,退出代码0

最大的就是第一个，标签为0，就对了嘛。

Example 2

再换成5号文件夹里的dog_9997.jpg
得到

C:\Users\acer\.conda\envs\pytorch\python.exe D:/code/5000/Alexnet/predict.py
(array([[-1.675911  , -3.8933187 ,  3.03588   ,  2.0604815 ,  0.62322545,
         4.3882637 , -0.34412646,  1.3543239 , -2.5193105 , -2.8624332 ]],
      dtype=float32),)

进程已结束,退出代码0

第六个最大，标签为5，欧了。
至少说明模型能用，至于为什么准确率只有百分之八十……有待探究。

【Pytorch】CIFAR-10+AlexNet，.pth格式转化为.onnx格式并调用（学习笔记）

目录