Mnist classification task:
-
Basic network construction and training methods, common function analysis
-
torch.nn.functional module
-
nn.Module module
-
Read the Mnist dataset
- will automatically download
%matplotlib inline from pathlib import Path import requests DATA_PATH = Path("data") PATH = DATA_PATH / "mnist" PATH.mkdir(parents=True, exist_ok=True) URL = "http://deeplearning.net/data/mnist/" FILENAME = "mnist.pkl.gz" if not (PATH / FILENAME).exists(): content = requests.get(URL + FILENAME).content (PATH / FILENAME).open("wb").write(content) import pickle import gzip with gzip.open((PATH / FILENAME).as_posix(), "rb") as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1") from matplotlib import pyplot import numpy as np pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray") print(x_train.shape)
-
\
-
Note that data needs to be converted into tensor to participate in subsequent modeling training
import torch x_train, y_train, x_valid, y_valid = map( torch.tensor, (x_train, y_train, x_valid, y_valid) ) n, c = x_train.shape x_train, x_train.shape, y_train.min(), y_train.max() print(x_train, y_train) print(x_train.shape) print(y_train.min(), y_train.max())
torch.nn.functional Many layers and functions will be seen here
There are many functions in torch.nn.functional, which will be commonly used in the future. So when to use nn.Module and when to use nn.functional? In general, if the model has learnable parameters, it is best to use nn.Module, and nn.functional is relatively simpler in other cases
import torch.nn.functional as F loss_func = F.cross_entropy def model(xb): return xb.mm(weights) + bias
bs = 64 xb = x_train[0:bs] # a mini-batch from x yb = y_train[0:bs] weights = torch.randn([784, 10], dtype = torch.float, requires_grad = True) bs = 64 bias = torch.zeros(10, requires_grad=True) print(loss_func(model(xb), yb))
Create a model to simplify the code more
- Must inherit nn.Module and call the constructor of nn.Module in its constructor
- No need to write backpropagation function, nn.Module can use autograd to automatically implement backpropagation
- The learnable parameters in the Module can return an iterator through named_parameters() or parameters()
from torch import nn class Mnist_NN(nn.Module): def __init__(self): super().__init__() self.hidden1 = nn.Linear(784, 128) self.hidden2 = nn.Linear(128, 256) self.out = nn.Linear(256, 10) def forward(self, x): x = F.relu(self.hidden1(x)) x = F.relu(self.hidden2(x)) x = self.out(x) return x
net = Mnist_NN() print(net)
for name, parameter in net.named_parameters(): print(name, parameter,parameter.size())
hidden1.weight Parameter containing: tensor([[ 0.0018, 0.0218, 0.0036, ..., -0.0286, -0.0166, 0.0089], [-0.0349, 0.0268, 0.0328, ..., 0.0263, 0.0200, -0.0137], [ 0.0061, 0.0060, -0.0351, ..., 0.0130, -0.0085, 0.0073], ..., [-0.0231, 0.0195, -0.0205, ..., -0.0207, -0.0103, -0.0223], [-0.0299, 0.0305, 0.0098, ..., 0.0184, -0.0247, -0.0207], [-0.0306, -0.0252, -0.0341, ..., 0.0136, -0.0285, 0.0057]], requires_grad=True) torch.Size([128, 784]) hidden1.bias Parameter containing: tensor([ 0.0072, -0.0269, -0.0320, -0.0162, 0.0102, 0.0189, -0.0118, -0.0063, -0.0277, 0.0349, 0.0267, -0.0035, 0.0127, -0.0152, -0.0070, 0.0228, -0.0029, 0.0049, 0.0072, 0.0002, -0.0356, 0.0097, -0.0003, -0.0223, -0.0028, -0.0120, -0.0060, -0.0063, 0.0237, 0.0142, 0.0044, -0.0005, 0.0349, -0.0132, 0.0138, -0.0295, -0.0299, 0.0074, 0.0231, 0.0292, -0.0178, 0.0046, 0.0043, -0.0195, 0.0175, -0.0069, 0.0228, 0.0169, 0.0339, 0.0245, -0.0326, -0.0260, -0.0029, 0.0028, 0.0322, -0.0209, -0.0287, 0.0195, 0.0188, 0.0261, 0.0148, -0.0195, -0.0094, -0.0294, -0.0209, -0.0142, 0.0131, 0.0273, 0.0017, 0.0219, 0.0187, 0.0161, 0.0203, 0.0332, 0.0225, 0.0154, 0.0169, -0.0346, -0.0114, 0.0277, 0.0292, -0.0164, 0.0001, -0.0299, -0.0076, -0.0128, -0.0076, -0.0080, -0.0209, -0.0194, -0.0143, 0.0292, -0.0316, -0.0188, -0.0052, 0.0013, -0.0247, 0.0352, -0.0253, -0.0306, 0.0035, -0.0253, 0.0167, -0.0260, -0.0179, -0.0342, 0.0033, -0.0287, -0.0272, 0.0238, 0.0323, 0.0108, 0.0097, 0.0219, 0.0111, 0.0208, -0.0279, 0.0324, -0.0325, -0.0166, -0.0010, -0.0007, 0.0298, 0.0329, 0.0012, -0.0073, -0.0010, 0.0057], requires_grad=True) torch.Size([128]) hidden2.weight Parameter containing: tensor([[-0.0383, -0.0649, 0.0665, ..., -0.0312, 0.0394, -0.0801], [-0.0189, -0.0342, 0.0431, ..., -0.0321, 0.0072, 0.0367], [ 0.0289, 0.0780, 0.0496, ..., 0.0018, -0.0604, -0.0156], ..., [-0.0360, 0.0394, -0.0615, ..., 0.0233, -0.0536, -0.0266], [ 0.0416, 0.0082, -0.0345, ..., 0.0808, -0.0308, -0.0403], [-0.0477, 0.0136, -0.0408, ..., 0.0180, -0.0316, -0.0782]], requires_grad=True) torch.Size([256, 128]) hidden2.bias Parameter containing: tensor([-0.0694, -0.0363, -0.0178, 0.0206, -0.0875, -0.0876, -0.0369, -0.0386, 0.0642, -0.0738, -0.0017, -0.0243, -0.0054, 0.0757, -0.0254, 0.0050, 0.0519, -0.0695, 0.0318, -0.0042, -0.0189, -0.0263, -0.0627, -0.0691, 0.0713, -0.0696, -0.0672, 0.0297, 0.0102, 0.0040, 0.0830, 0.0214, 0.0714, 0.0327, -0.0582, -0.0354, 0.0621, 0.0475, 0.0490, 0.0331, -0.0111, -0.0469, -0.0695, -0.0062, -0.0432, -0.0132, -0.0856, -0.0219, -0.0185, -0.0517, 0.0017, -0.0788, -0.0403, 0.0039, 0.0544, -0.0496, 0.0588, -0.0068, 0.0496, 0.0588, -0.0100, 0.0731, 0.0071, -0.0155, -0.0872, -0.0504, 0.0499, 0.0628, -0.0057, 0.0530, -0.0518, -0.0049, 0.0767, 0.0743, 0.0748, -0.0438, 0.0235, -0.0809, 0.0140, -0.0374, 0.0615, -0.0177, 0.0061, -0.0013, -0.0138, -0.0750, -0.0550, 0.0732, 0.0050, 0.0778, 0.0415, 0.0487, 0.0522, 0.0867, -0.0255, -0.0264, 0.0829, 0.0599, 0.0194, 0.0831, -0.0562, 0.0487, -0.0411, 0.0237, 0.0347, -0.0194, -0.0560, -0.0562, -0.0076, 0.0459, -0.0477, 0.0345, -0.0575, -0.0005, 0.0174, 0.0855, -0.0257, -0.0279, -0.0348, -0.0114, -0.0823, -0.0075, -0.0524, 0.0331, 0.0387, -0.0575, 0.0068, -0.0590, -0.0101, -0.0880, -0.0375, 0.0033, -0.0172, -0.0641, -0.0797, 0.0407, 0.0741, -0.0041, -0.0608, 0.0672, -0.0464, -0.0716, -0.0191, -0.0645, 0.0397, 0.0013, 0.0063, 0.0370, 0.0475, -0.0535, 0.0721, -0.0431, 0.0053, -0.0568, -0.0228, -0.0260, -0.0784, -0.0148, 0.0229, -0.0095, -0.0040, 0.0025, 0.0781, 0.0140, -0.0561, 0.0384, -0.0011, -0.0366, 0.0345, 0.0015, 0.0294, -0.0734, -0.0852, -0.0015, -0.0747, -0.0100, 0.0801, -0.0739, 0.0611, 0.0536, 0.0298, -0.0097, 0.0017, -0.0398, 0.0076, -0.0759, -0.0293, 0.0344, -0.0463, -0.0270, 0.0447, 0.0814, -0.0193, -0.0559, 0.0160, 0.0216, -0.0346, 0.0316, 0.0881, -0.0652, -0.0169, 0.0117, -0.0107, -0.0754, -0.0231, -0.0291, 0.0210, 0.0427, 0.0418, 0.0040, 0.0762, 0.0645, -0.0368, -0.0229, -0.0569, -0.0881, -0.0660, 0.0297, 0.0433, -0.0777, 0.0212, -0.0601, 0.0795, -0.0511, -0.0634, 0.0720, 0.0016, 0.0693, -0.0547, -0.0652, -0.0480, 0.0759, 0.0194, -0.0328, -0.0211, -0.0025, -0.0055, -0.0157, 0.0817, 0.0030, 0.0310, -0.0735, 0.0160, -0.0368, 0.0528, -0.0675, -0.0083, -0.0427, -0.0872, 0.0699, 0.0795, -0.0738, -0.0639, 0.0350, 0.0114, 0.0303], requires_grad=True) torch.Size([256]) out.weight Parameter containing: tensor([[ 0.0232, -0.0571, 0.0439, ..., -0.0417, -0.0237, 0.0183], [ 0.0210, 0.0607, 0.0277, ..., -0.0015, 0.0571, 0.0502], [ 0.0297, -0.0393, 0.0616, ..., 0.0131, -0.0163, -0.0239], ..., [ 0.0416, 0.0309, -0.0441, ..., -0.0493, 0.0284, -0.0230], [ 0.0404, -0.0564, 0.0442, ..., -0.0271, -0.0526, -0.0554], [-0.0404, -0.0049, -0.0256, ..., -0.0262, -0.0130, 0.0057]], requires_grad=True) torch.Size([10, 256]) out.bias Parameter containing: tensor([-0.0536, 0.0007, 0.0227, -0.0072, -0.0168, -0.0125, -0.0207, -0.0558, 0.0579, -0.0439], requires_grad=True) torch.Size([10])
-
Use TensorDataset and DataLoader to simplify
from torch.utils.data import TensorDataset from torch.utils.data import DataLoader train_ds = TensorDataset(x_train, y_train) train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True) valid_ds = TensorDataset(x_valid, y_valid) valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
def get_data(train_ds, valid_ds, bs): return ( DataLoader(train_ds, batch_size=bs, shuffle=True), DataLoader(valid_ds, batch_size=bs * 2), )
- Generally, model.train() is added when training the model, so that Batch Normalization and Dropout will be used normally
- When testing, generally choose model.eval(), so that Batch Normalization and Dropout will not be used
import numpy as np def fit(steps, model, loss_func, opt, train_dl, valid_dl): for step in range(steps): model.train() for xb, yb in train_dl: loss_batch(model, loss_func, xb, yb, opt) model.eval() with torch.no_grad(): losses, nums = zip( *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl] ) val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums) print('当前step:'+str(step), '验证集损失:'+str(val_loss))
from torch import optim def get_model(): model = Mnist_NN() return model, optim.SGD(model.parameters(), lr=0.001)
def loss_batch(model, loss_func, xb, yb, opt=None): loss = loss_func(model(xb), yb) if opt is not None: loss.backward() opt.step() opt.zero_grad() return loss.item(), len(xb)
Three lines get it done!
train_dl, valid_dl = get_data(train_ds, valid_ds, bs) model, opt = get_model() fit(25, model, loss_func, opt, train_dl, valid_dl)
Current step: 0 validation set loss: 2.2796445930480957 Current step: 1 Validation set loss: 2.2440698066711424 Current step: 2 Validation set loss: 2.1889826164245605 Current step: 3 Validation set loss: 2.0985311767578123 Current step: 4 Validation set loss: 1.9517273582458496 Current step:5 Validation set loss: 1.7341805934906005 Current step:6 Validation set loss: 1.4719875366210937 Current step:7 Validation set loss: 1.2273896869659424 Current step: 8 Validation set loss: 1.0362271406173706 Current step:9 Validation set loss: 0.8963696184158325 Current step: 10 Validation set loss: 0.7927186088562012 Current step: 11 Validation set loss: 0.7141492074012756 Current step: 12 Validation set loss: 0.6529350900650024 Current step: 13 Validation set loss: 0.60417300491333 Current step: 14 Validation set loss: 0.5643046331882476 Current step: 15 Validation set loss: 0.5317994566917419 Current step: 16 Validation set loss: 0.5047958114624024 Current step:17 Validation set loss: 0.4813900615692139 Current step: 18 Validation set loss: 0.4618900228500366 Current step:19 Validation set loss: 0.4443243554592133 Current step: 20 Validation set loss: 0.4297310716629028 Current step: 21 Validation set loss: 0.416976597738266 Current step: 22 Validation set loss: 0.406348459148407 Current step: 23 Validation set loss: 0.3963301926612854 Current step: 24 Validation set loss: 0.38733808159828187 https://gitee.com/code-wenjiahao/neural-network-practical-classification-and-regression-tasks/tree/master