foreword
I recently learned about K-fold cross-validation , and I can't wait to experiment to see if its effect is as good as the teacher said. I hereby write this article.
The running environment of this article is: jupyter notebook on skleran|pytorch with ok installed
How to write pytorch code using k-fold cross-validation training on Mnist data
Load the model, optimizer and loss function
[Key: fold dataset processing]
Set the K value to [2,10] for training
k-fold cross validation
Five- fold cross-validation : Divide the data into 5 equal parts, take one part for testing in each experiment, and use the rest for training. Experiments were averaged 5 times. As shown above, in the first experiment, the first one is used as the test set, and the rest are used as the training set. In the second experiment, the second copy was used as the test set, and the rest were used as the training set. And so on~
baseline
Let's first look at the simplest, unpretentious basic model with no tricks added.
import package
#---------------------------------Torch Modules --------------------------------------------------------
from __future__ import print_function
import numpy as np
import pandas as pd
import torch.nn as nn
import math
import torch.nn.functional as F
import torch
import torchvision
from torch.nn import init
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision import models
import torch.nn.functional as F
from torch.utils import data
import matplotlib.pyplot as plt
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
set some initial values
###-----------------------------------variables-----------------------------------------------
# for Normalization
mean = [0.5]
std = [0.5]
# batch size
BATCH_SIZE =128
Iterations = 1 # epoch
learning_rate = 0.01
Load dataset
##-----------------------------------Commands to download and perpare the MNIST dataset ------------------------------------
train_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
test_transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./mnist', train=True, download=True,
transform=train_transform),
batch_size=BATCH_SIZE, shuffle=True) # train dataset
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./mnist', train=False,
transform=test_transform),
batch_size=BATCH_SIZE, shuffle=False) # test dataset
transforms are used to enhance the dataset, such as: rotation, cropping....etc
visualization dataset
#visualization
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
"""Plot a list of images."""
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if torch.is_tensor(img):
# Tensor Image
ax.imshow(img.numpy())
else:
# PIL Image
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
if titles:
ax.set_title(titles[i])
return axes
mnist_train = torchvision.datasets.MNIST(root="../data", train=True,
transform=train_transform,
download=True)
X, y = next(iter(data.DataLoader(mnist_train, batch_size=18)))
show_images(X.reshape(18, 28, 28), 2, 9)
【output】
Load the model, optimizer and loss function
model = nn.Sequential(nn.Flatten(), nn.Linear(784, 256), nn.ReLU(),nn.Linear(256, 10))
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01)
model.apply(init_weights);
## Loss function
criterion = torch.nn.CrossEntropyLoss() # pytorch's cross entropy loss function
# definin which paramters to train only the CNN model parameters
optimizer = torch.optim.SGD(model.parameters(),learning_rate)
training function
# defining the training function
# Train baseline classifier on clean data
def train(model, optimizer,criterion,epoch):
model.train() # setting up for training
for batch_idx, (data, target) in enumerate(train_loader): # data contains the image and target contains the label = 0/1/2/3/4/5/6/7/8/9
data = data.view(-1, 28*28).requires_grad_()
optimizer.zero_grad() # setting gradient to zero
output = model(data) # forward
loss = criterion(output, target) # loss computation
loss.backward() # back propagation here pytorch will take care of it
optimizer.step() # updating the weight values
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
test function
# to evaluate the model
## validation of test accuracy
def test(model, criterion, val_loader, epoch,train= False):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for batch_idx, (data, target) in enumerate(val_loader):
data = data.view(-1, 28*28).requires_grad_()
output = model(data)
test_loss += criterion(output, target).item() # sum up batch loss
pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item() # if pred == target then correct +=1
test_loss /= len(val_loader.dataset) # average test loss
if train == False:
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
test_loss, correct, val_loader.sampler.__len__(),
100. * correct / val_loader.sampler.__len__() ))
if train == True:
print('\nTrain set: Average loss: {:.4f}, Accuracy: {}/{} ({:.4f}%)\n'.format(
test_loss, correct, val_loader.sampler.__len__(),
100. * correct / val_loader.sampler.__len__() ))
return 100. * correct / val_loader.sampler.__len__()
train and test baseline
test_acc = torch.zeros([Iterations])
train_acc = torch.zeros([Iterations])
## training the logistic model
for i in range(Iterations):
train(model, optimizer,criterion,i)
train_acc[i] = test(model, criterion, train_loader, i,train=True) #Testing the the current CNN
test_acc[i] = test(model, criterion, test_loader, i)
torch.save(model,'perceptron.pt')
【output】
We can see that the accuracy rate is not very high, and it can even be said that the robustness of the model is not good, only 83%. So the next step is our main event, we are going to use k-fold cross-validation now, what needs to be modified?
- Step1, modify the data set
- Step2, set the k value
- STEP3, retrain
k fold cross validation[1-10]
Just insert this line of code at the bottom, it will overwrite the previous variable, and because it runs inside the function, there is no conflict with the previous code.
[Key: fold dataset processing]
#!pip install sklearn -i https://pypi.mirrors.ustc.edu.cn/simple
from sklearn.model_selection import KFold
train_init = datasets.MNIST('./mnist', train=True,
transform=train_transform)
test_init = datasets.MNIST('./mnist', train=False,
transform=test_transform)
# the dataset for k fold cross validation
dataFold = torch.utils.data.ConcatDataset([train_init, test_init])
def train_flod_Mnist(k_split_value):
different_k_mse = []
kf = KFold(n_splits=k_split_value,shuffle=True, random_state=0) # init KFold
for train_index , test_index in kf.split(dataFold): # split
# get train, val
train_fold = torch.utils.data.dataset.Subset(dataFold, train_index)
test_fold = torch.utils.data.dataset.Subset(dataFold, test_index)
# package type of DataLoader
train_loader = torch.utils.data.DataLoader(dataset=train_fold, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_fold, batch_size=BATCH_SIZE, shuffle=True)
# train model
test_acc = torch.zeros([Iterations])
train_acc = torch.zeros([Iterations])
## training the logistic model
for i in range(Iterations):
train(model, optimizer,criterion,i)
train_acc[i] = test(model, criterion, train_loader, i,train=True) #Testing the the current CNN
test_acc[i] = test(model, criterion, test_loader, i)
#torch.save(model,'perceptron.pt')
# one epoch, all acc
different_k_mse.append(np.array(test_acc))
return different_k_mse
What is the focus of the above code?
One is to merge the training set and test set of this Mnist dataset, there are many pits in this piece.
train_init = datasets.MNIST('./mnist', train=True,
transform=train_transform)
test_init = datasets.MNIST('./mnist', train=False,
transform=test_transform)
# the dataset for k fold cross validation
dataFold = torch.utils.data.ConcatDataset([train_init, test_init])
The second is to use KFold in Sklearn to divide the dataset and convert it back to the pytorch type of Dataloader.
kf = KFold(n_splits=k_split_value,shuffle=True, random_state=0) # init KFold
for train_index , test_index in kf.split(dataFold): # split
# get train, val 根据索引划分
train_fold = torch.utils.data.dataset.Subset(dataFold, train_index)
test_fold = torch.utils.data.dataset.Subset(dataFold, test_index)
# package type of DataLoader
train_loader = torch.utils.data.DataLoader(dataset=train_fold, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_fold, batch_size=BATCH_SIZE, shuffle=True)
For KFold:
Provide train/test set indices to split the data. Split the dataset into k folds (do not shuffle the data by default.
Parameter introduction:
n_splits : int, default is 5. Indicates split into 5-fold
shuffle : bool, default is False. Whether to shuffle the data before splitting the dataset. True shuffles, False does not.
random_state: int, the default is None When shuffle is True, if random_state is None, then each time the code is run, the obtained data segmentation is different. When random_state is specified, each time the code is run, the same segmentation can be obtained. data to ensure that the experiment is repeatable. random_state can be set to an integer according to your own preferences, such as random_state =42 is more commonly used. Once set, it cannot be changed.
example:
from sklearn.model_selection import KFold
import numpy as np
X = np.arange(24).reshape(12,2)
y = np.random.choice([1,2],12,p=[0.4,0.6])
kf = KFold(n_splits=5,shuffle=False) # 初始化KFold
for train_index , test_index in kf.split(X): # 调用split方法切分数据
print('train_index:%s , test_index: %s ' %(train_index,test_index))
【output】
train_index:[ 3 4 5 6 7 8 9 10 11] , test_index: [0 1 2]
train_index:[ 0 1 2 6 7 8 9 10 11] , test_index: [3 4 5]
train_index:[ 0 1 2 3 4 5 8 9 10 11] , test_index: [6 7]
train_index:[ 0 1 2 3 4 5 6 7 10 11] , test_index: [8 9]
train_index:[0 1 2 3 4 5 6 7 8 9] , test_index: [10 11]
Notice:
Set shuffle=False, the result is the same every time
Set shuffle=True, the result of each run is different
Set shuffle=True and random_state=integer, the result is the same every time
Set the K value to [2,10] for training
testAcc_compare_map = {}
for k_split_value in range(2, 10+1):
print('now k_split_value is:', k_split_value)
testAcc_compare_map[k_split_value] = train_flod_Mnist(k_split_value)
testAcc_compare_map saves the results of training under different k values, and then we can use this dictionary variable to calculate rmse and compare the robustness of the experimental results under different k values.
【Small experiment】
I would like to post a little homework to try to help you understand k-fold cross-validation:
1. Perform k-fold cross-validation, choose k=1-10, plot (a) mean train log rmse vs k (b) mean
log rmse vs k [20 points]
2. What happens when you increase the value of k? Explain the behavior of the double loss
as k increases [20 points]
【Supplementary Materials】
For RMSE:
In the experimental results, the change of ACC with the increase of K value is as follows [2=1 is the result of the baseline, here is k=[2,10]]: