[pytorch入门]神经网络tutorial教程笔记

代码来源https://github.com/yunjey/pytorch-tutorial,很好的教程。

1.Basis

1.1 linear_regression.py

#linear_regression.py
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt


# Hyper-parameters
input_size = 1
output_size = 1
num_epochs = 60
learning_rate = 0.001

# Toy dataset
x_train = np.array([[3.3], [4.4], [5.5], [6.71], [6.93], [4.168], 
                    [9.779], [6.182], [7.59], [2.167], [7.042], 
                    [10.791], [5.313], [7.997], [3.1]], dtype=np.float32)
print(x_train.shape)
y_train = np.array([[1.7], [2.76], [2.09], [3.19], [1.694], [1.573], 
                    [3.366], [2.596], [2.53], [1.221], [2.827], 
                    [3.465], [1.65], [2.904], [1.3]], dtype=np.float32)
print(y_train.shape)
# Linear regression model
model = nn.Linear(input_size, output_size)

# Loss and optimizer
criterion =  nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
    # Convert numpy arrays to torch tensors
    inputs = torch.from_numpy(x_train)
    targets = torch.from_numpy(y_train)
    # Forward pass
    outputs =  model(inputs)
    loss = criterion(outputs, targets)
    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if((epoch+1) % 5 ==0):
        print("Epoch [{}/{}], Loss:{:.4f}".format(epoch, num_epochs, loss.item()))
# Plot the graph
predicted = model(torch.from_numpy(x_train)).detach().numpy()

plt.plot(x_train, y_train, 'ro', label='Original data')
plt.plot(x_train, predicted, label='Fitted line')
plt.legend()
plt.show()
# Save the model checkpoint
#model.save(model.state_dict, 'model.pth')

1.2



import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms


# Hyper-parameters 
input_size = 28 * 28    # 784
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# MNIST dataset (images and labels)
train_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data', 
                                          train=False, 
                                          transform=transforms.ToTensor())


# Data loader (input pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

# Logistic regression model
model = nn.Linear(input_size, num_classes)

# Loss and optimizer
# nn.CrossEntropyLoss() computes softmax internally
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  
 
# Train the model
total_step = len(train_loader)
#print(total_step) # 600
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Reshape images to (batch_size, input_size)
        images = images.reshape(-1, input_size)   #  torch.Size([100, 1, 28, 28]) ==>  torch.Size([100, 784]) 重点:相当于每次100张图片一起训练,但是[1,10]

        '''
        print(labels)
            tensor([0, 8, 3, 4, 7, 0, 6, 3, 0, 7, 1, 1, 5, 3, 7, 1, 2, 6, 2, 1, 6, 3, 1, 1,
            4, 3, 1, 9, 5, 1, 4, 4, 6, 6, 3, 3, 9, 7, 9, 8, 8, 2, 6, 3, 3, 2, 0, 8,
            8, 8, 8, 3, 8, 7, 5, 0, 7, 8, 3, 5, 1, 1, 8, 6, 9, 5, 2, 2, 1, 4, 6, 3,
            0, 9, 2, 5, 9, 6, 3, 5, 4, 1, 1, 9, 3, 7, 3, 9, 3, 3, 8, 8, 8, 4, 3, 0,
            9, 1, 6, 3])
            torch.Size([100])
        print(images.shape) 
            torch.Size([100, 1, 28, 28])
        '''
        # Forward pass
        outputs = model(images)
        #print(outputs.shape)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, input_size)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()

    print('Accuracy of the model on the 10000 test images: {} %'.format(100 * correct // total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

1.2 feedforward_neural_network.py


#feedforward_neural_network.py
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms


# Device configuration
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = torch.device('cuda' if torch.cuda.is_availavle() else 'cpu')
# Hyper-parameters 
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# MNIST dataset 
train_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data', 
                                           train=True, 
                                           transform=transforms.ToTensor(),  
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data', 
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

# Fully connected neural network with one hidden layer
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out



model = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

# Train the model
total_step = len(train_loader)
print(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Move tensors to the configured device
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
# In test phase, we don't need to compute gradients (for memory efficiency)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
#torch.save(model.state_dict(), 'model.ckpt')

2. intermediate

2.1 convolutional_neural_network.py

import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms


# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.001

# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
                                           train=True, 
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='/home/cw/pytorch-tutorial-1/data',
                                          train=False, 
                                          transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            # nn.MaxPool2d(kernel_size=2, stride=2))
            nn.AdaptiveAvgPool2d((14, 14))
        )

        # Conv2d
        # out_channels = 16 
        # 28 - 5 + 4 + 1 = 28 
        # [100, 1, 28, 28] ==> [100, 16, 28, 28]
        
        # MaxPool2d
        # 28 / 2 = 14 
        # [100, 16, 28, 28] ==> [100, 16, 14, 14]
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            #nn.MaxPool2d(kernel_size=2, stride=2))
            nn.AdaptiveAvgPool2d((7, 7))
        )

        # out_channels = 32 
        # 14 - 5 + 4 + 1 = 14 
        # [100, 16, 14, 14] ==> [100, 32, 14, 14]

        # MaxPool2d
        # (14 - 2) / 2 = 7 
        # [100, 32, 7, 7] ==> [100, 32, 7, 7]
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1) # [32 * 7 * 7, 1]
        out = self.fc(out)
        return out

model = ConvNet(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        #print(images.shape)
        images = images.to(device) #[100, 1, 28, 28]
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

2.2 deep_residual_network

# ---------------------------------------------------------------------------- #
# An implementation of https://arxiv.org/pdf/1512.03385.pdf                    #
# See section 4.2 for the model architecture on CIFAR-10                       #
# Some part of the code was referenced from below                              #
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py   #
# ---------------------------------------------------------------------------- #

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters
num_epochs = 80
batch_size = 100
learning_rate = 0.001
#可以看出Compose里面的参数实际上就是个列表,而这个列表里面的元素就是你想要执行的transform操作。
# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4), # 填充
    transforms.RandomHorizontalFlip(), # 依概率p水平翻转
    transforms.RandomCrop(32), # 随机裁剪
    transforms.ToTensor()]) # 转为tensor,并归一化至[0-1]

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='/home/Dataset/cifar10',
                                             train=True, 
                                             transform=transform,
                                             download=False)

test_dataset = torchvision.datasets.CIFAR10(root='/home/Dataset/cifar10',
                                            train=False, 
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                     stride=stride, padding=1, bias=False)

# Residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.relu(out)
        return out

# ResNet
    # conv3x3(3,16)
    # Bn(16)
    # Relu

    # layers
        # layer1
        # in_channels = 16 
        # out_channels = 16 blocks [2] stride = 1
        # downsample = None
        # in_channels = out_channels = 16
        # layers[block[16, 16]]
        # nn.Sequential(block(16, 16)) 
            # conv3x3
            # bn
            # relu
            # conv3x3
            # bn
        # [100, 16, 32, 32]

    # layer2
        # in_channels = 16
        # out_channels = 32 blocks [2] stride = 2
        # if  16 != 32:
        #   downsample = nn.Sequential(conv3x3(16, 32, stride = 2))
        # con3x3 ==> (32 - 3 + 1) / 2 + 1 =  16
        # [100, 16, 16, 16]
        # in_channels = out_channels = 32
        # layers.append(block(32, 32))
        # nn.Sequential(block(32, 32))
            # conv3x3
            # bn
            # relu
            # conv3x3
            # bn
        #[100, 32, 16, 16]

    # layer3
        # in_channel = 32 out_channels =64 stride = 2 
        # downsample = nn.Sequential(conv3x3(32,64,2))
        # (16 - 3 + 1) / 2 + 1 = 8
        # in_channels = out_channels = 64
        # layers.append(block(64, 64))
        # nn.Sequential(block(64, 64))
            # conv3x3
            # bn
            # relu
            # conv3x3
            # bn    
        # [100, 64, 8, 8]
    # avepool [100, 64, 1]
    # out ==> fc [100, 10, 1]
    
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(3, 16)  #[100, 3, 32, 32] ==> [100, 16, 32, 32]
        self.bn = nn.BatchNorm2d(16) 
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0]) 
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2) # [100, 64, 8, 8]
        self.avg_pool = nn.AvgPool2d(8) # [100, 64, 1]
        self.fc = nn.Linear(64, num_classes) # [1,64] ==> [1, 10]


    # layer1
    # in_channels = 16 
    # out_channels = 16 blocks [2] stride = 1
    # downsample = None
    # in_channels = out_channels = 16
    # layers[block[16, 16]]
    # nn.Sequential(block(16, 16))
    # [100, 16, 32, 32]


    # layer2
    # in_channels = 16
    # out_channels = 32 blocks [2] stride = 2
    # if  16 != 32:
    #   downsample = nn.Sequential(conv3x3(16, 32, stride = 2))
    # (32 - 3 + 1) / 2 + 1 =  16
    # [100, 16, 16, 16]
    # in_channels = out_channels = 32
    # layers.append(block(32, 32))
    # nn.Sequential(block(32, 32))
    #[100, 32, 16, 16]'


    # layer3
    # in_channel = 32 out_channels =64 stride = 2 
    # downsample = nn.Sequential(conv3x3(32,64,2))
    # (16 - 3 + 1) / 2 + 1 = 8
    # [100, 64, 8, 8]
    # in_channels = out_channels = 64
    # 
    # [100, 64, 8, 8]


    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
            
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# For updating learning rate
def update_lr(optimizer, lr):    
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr
# Train the model
total_step = len(train_loader)
curr_lr = learning_rate
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ("Epoch [{}/{}], Step [{}/{}] Loss: {:.4f}"
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Decay learning rate
    if (epoch+1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

# Test the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the model on the test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'resnet.ckpt')

import torch
input = torch.randn(3, requires_grad=True)

# input  output
tensor([ 0.3107,  0.6120, -1.0163], requires_grad=True)

target = torch.empty(3).random_(2)

# target output
tensor([1., 0., 0.])

import torch.nn.functional as F
loss = F.binary_cross_entropy_with_logits(input, target)  #ruduction: 默认是mean

# loss output
tensor(0.6347, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)



# 手动实现过程:
# 先对input sigmod
# 然后求torch.mean

import math
pred = torch.sigmoid(input)
print(pred)

print(1.0 * torch.log(torch.tensor(0.5771))) # tensor(-0.5497)
print(1.0 * torch.log(torch.tensor(1 - 0.6484)))  # tensor(-1.0453)
 
result  = torch.mean(target * torch.log(pred ) + (1 - target) * torch.log(1 - pred ) )

# pred and result pred
tensor([0.5771, 0.6484, 0.2658], grad_fn=<SigmoidBackward>)
tensor(0.6347, grad_fn=<NegBackward>)

猜你喜欢

转载自blog.csdn.net/weixin_43823854/article/details/109045795
今日推荐