Use GPU to run MNIST (LeNet-5 model) using pytorch and tensorflow respectively [you can get a preliminary understanding of the conversion between tnesorflow and pytorch]

pytorch version: 

There are several steps in total:

1. Load the data set (of course you can use your own data set here, but pay attention to adjusting the parameters, h, w, channel)

2. Define the neural network model (two methods are written here)

3. Check the model

4. Transfer the model to the gpu

5. Loss functions and optimizers

6. Training

7. Draw curves

8. Evaluate the model 

#LeNet-5网络结构
import torch.nn as nn
import torch
import torchvision
from torchvision import transforms
from torch.utils import data
import matplotlib.pyplot as plt
import torch.nn.functional as F
#定义加载数据集函数
def load_data_mnist(batch_size):
    '''下载MNIST数据集然后加载到内存中'''
    train_dataset=torchvision.datasets.MNIST(root='dataset',train=True,transform=transforms.ToTensor(),download=True)
    test_dataset=torchvision.datasets.MNIST(root='dataset',train=False,transform=transforms.ToTensor(),download=True)
    return (data.DataLoader(train_dataset,batch_size,shuffle=True),
           data.DataLoader(test_dataset,batch_size,shuffle=False))

#LeNet-5在MNIST数据集上的表现
batch_size=64
train_iter,test_iter=load_data_mnist(batch_size=batch_size)
###########################################################################
# 神经网络模型的第一种形式
# net = nn.Sequential(
#     nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
#     nn.AvgPool2d(kernel_size=2, stride=2),
#     nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
#     nn.AvgPool2d(kernel_size=2, stride=2), nn.Flatten(),
#     nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
#     nn.Linear(120, 84), nn.Sigmoid(),
#     nn.Linear(84, 10))


# 神经网络模型的第二种形式
class net1(nn.Module):
    def __init__(self):
        super(net1, self).__init__()
        self.C1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
        self.C2 = nn.Conv2d(6, 16, kernel_size=5)
        self.linear1 = nn.Linear(16 * 5 * 5, 120)
        self.linear2 = nn.Linear(120, 84)
        self.linear3 = nn.Linear(84, 10)

    def forward(self, x):
        n = self.C1(x)
        n = nn.Sigmoid()(n)
        n = nn.AvgPool2d(kernel_size=2, stride=2)(n)
        n = self.C2(n)
        n = nn.Sigmoid()(n)
        n = nn.AvgPool2d(kernel_size=2, stride=2)(n)
        n = nn.Flatten()(n)
        n = self.linear1(n)
        n = nn.Sigmoid()(n)
        n = self.linear2(n)
        n = nn.Sigmoid()(n)
        n = self.linear3(n)

        return n


net = net1()
######################################################################
#检查模型
x = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)

# 对应第一种神经网络模型

# for layer in net:
#     x = layer(x)
#     print(layer.__class__.__name__, 'output shape:\t', x.shape)


# 对应第二种神经网络模型

model_modules = [x for x in net.modules()]
print(model_modules)
print(len(model_modules))
####################################################################
# 获取GPU设备
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# 传送网络到GPU
net.to(device)
######################################################################

#损失函数
loss_function=nn.CrossEntropyLoss()
#优化器
optimizer=torch.optim.Adam(net.parameters())
#####################################################################
# 开始训练
num_epochs = 10
train_loss = []

for epoch in range(num_epochs):
    for batch_idx, (x, y) in enumerate(train_iter):
        #         x = x.view(x.size(0), 28 * 28)



        # 传送输入和标签到GPU
        x, y = x.to(device), y.to(device)         # x是inputs, y是labels



        out = net(x)
        y_onehot = F.one_hot(y, num_classes=10).float()  # 转为one-hot编码

        loss = loss_function(out, y_onehot)  # 均方差
        # 清零梯度
        optimizer.zero_grad()
        loss.backward()
        # w' = w -lr * grad
        optimizer.step()
        train_loss.append(loss.item())
        if batch_idx % 10 == 0:
            print(epoch, batch_idx, loss.item())


####################################################################

#绘制损失曲线
plt.figure(figsize=(8,3))
plt.grid(True,linestyle='--',alpha=0.5)
plt.plot(train_loss,label='loss')
plt.legend(loc="best")
plt.show()
################################################
#评估模型

total_correct = 0
for batch_idx, (x, y) in enumerate(test_iter):
    #     x = x.view(x.size(0),28*28)



    # 传送输入和标签到GPU
    x, y = x.to(device), y.to(device)  # x是inputs, y是labels



    out = net(x)
    pred = out.argmax(dim=1)
    correct = pred.eq(y).sum().float().item()
    total_correct += correct

total_num = len(test_iter.dataset)
test_acc = total_correct / total_num
print(total_correct, total_num)
print("test acc:", test_acc)

matpotlib shows:

#############################################################################

If the trained model is relatively large and has multiple modules, the subclasses of nn.modules can be nested and integrated into the final large model.

For example:

Here I separate the nn.Conv2d(1, 6, kernel_size=5, padding=2) layer into a module.

By calling the net2 class in the net1 class, the complete model is finally implemented


class net2(nn.Module):
    def __init__(self):
        super(net2, self).__init__()
        self.CCC1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)

    def forward(self,x):
        x = self.CCC1(x)

        return x



class net1(nn.Module):
    def __init__(self):
        super(net1, self).__init__()
        # self.C1 = nn.Conv2d(1, 6, kernel_size=5, padding=2)
        self.net22 = net2()

        self.C2 = nn.Conv2d(6, 16, kernel_size=5)
        self.linear1 = nn.Linear(16 * 5 * 5, 120)
        self.linear2 = nn.Linear(120, 84)
        self.linear3 = nn.Linear(84, 10)

    def forward(self, x):
        # n = self.C1(x)
        n = self.net22(x)
        n = nn.Sigmoid()(n)
        n = nn.AvgPool2d(kernel_size=2, stride=2)(n)
        n = self.C2(n)
        n = nn.Sigmoid()(n)
        n = nn.AvgPool2d(kernel_size=2, stride=2)(n)
        n = nn.Flatten()(n)
        n = self.linear1(n)
        n = nn.Sigmoid()(n)
        n = self.linear2(n)
        n = nn.Sigmoid()(n)
        n = self.linear3(n)

        return n


net = net1()

############################################################################

Another very neat way to check the model:

(But you need to download the torchsummary package)

from torchsummary import summary
summary(net, (1,28,28)) 
# The first parameter is the model you instantiated, the second parameter is the size you input, batch_size is automatically -1, no need to write it here. So (1, 28, 28) is C, H, W    
# This statement must be placed after net.to(device), otherwise there will be an error 


output table of the model and parameter data on different devices: You can see the output of different layers size and parameters

tensorflow version:


import tensorflow as tf

# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Normalize pixel values to be between 0 and 1
x_train, x_test = x_train / 255.0, x_test / 255.0

# Define model architecture
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10)
])

# Compile model
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

# Train model
model.fit(x_train, y_train, epochs=5)

# Evaluate model on test data
model.evaluate(x_test, y_test)

Guess you like

Origin blog.csdn.net/djdjdhch/article/details/130435960