深度学习torch之四（基于mnist数据集的实战练习实例）

1.程序运行时所需要的包，输入如下代码：

require 'torch'  
require 'nn'  
require 'optim'  
mnist = require 'mnist'

这里直接使用了mnist包，就不需要下载mnist数据集，方便使用。但是这也带来一个问题，可能有些小伙伴没有安装mnist则实验就没办法进行，放心，我提供了相应的解决方案，在终端你所安装torch的文件夹目录下输入：luarocks install mnist

2.载入数据集并对载入的数据集进行数据类型修改和数据的预处理：

fullset = mnist.traindataset()  
testset = mnist.testdataset()   
trainset = {  
    size = 50000,  
    data = fullset.data[{{1,50000}}]:double(),  
    label = fullset.label[{{1,50000}}]  
}  
validationset = {  
    size = 10000,  
    data = fullset.data[{{50001,60000}}]:double(),  
    label = fullset.label[{{50001,60000}}]  
}  
trainset.data = trainset.data - trainset.data:mean()  
validationset.data = validationset.data - validationset.data:mean()

3.神经网络模型的建立：

model = nn.Sequential()  
model:add(nn.Reshape(1, 28, 28))  
model:add(nn.MulConstant(1/256.0*3.2))  
model:add(nn.SpatialConvolutionMM(1, 20, 5, 5, 1, 1, 0, 0))  
model:add(nn.SpatialMaxPooling(2, 2 , 2, 2, 0, 0))  
model:add(nn.SpatialConvolutionMM(20, 50, 5, 5, 1, 1, 0, 0))  
model:add(nn.SpatialMaxPooling(2, 2 , 2, 2, 0, 0))  
model:add(nn.Reshape(4*4*50))  
model:add(nn.Linear(4*4*50, 500))  
model:add(nn.ReLU())  
model:add(nn.Linear(500, 10))  
model:add(nn.LogSoftMax())

4. 对model内部参数的初始化，我们现在不使用torch的初始化参数，而使用一种更高级的初始化方法，称之为xavier方法。概括来讲，就是根据每层的输入个数和输出个数来决定参数随机初始化的分布范围。

model = require('weight-init')(model, 'xavier')

5.定义损失函数：

criterion = nn.ClassNLLCriterion()

6.获取初始化参数，[flatParameters，flatGradParameters]getParameters(),返回两组参数， flatParameters（学习参数），flatGradParameters（梯度参数）：

x, dl_dx = model:getParameters()

7.设置SGD算法所需要的参数：

sgd_params = {  
   learningRate = 1e-2,  
   learningRateDecay = 1e-4,  
   weightDecay = 1e-3,  
   momentum = 1e-4  
}

8.feval=function(x_new),用于SGD求值函数，输入为设定的权值，输出为损失函数在该训练样本点上的损失loss_x,损失函数在该训练样本点上的梯度值dl_dx。

    local current_loss = 0  
    local count = 0  
    local shuffle = torch.randperm(trainset.size)  
    batch_size = batch_size or 200  
    for t = 1,trainset.size,batch_size do  
        -- setup inputs and targets for this mini-batch  
        local size = math.min(t + batch_size - 1, trainset.size) - t  
        local inputs = torch.Tensor(size, 28, 28)--:cuda()  
        local targets = torch.Tensor(size)--:cuda()  
        for i = 1,size do  
            local input = trainset.data[shuffle[i+t]]  
            local target = trainset.label[shuffle[i+t]]  
            -- if target == 0 then target = 10 end  
            inputs[i] = input  
            targets[i] = target  
        end  
        targets:add(1)  
        local feval = function(x_new)  
            -- reset data  
            if x ~= x_new then x:copy(x_new) end  
            dl_dx:zero()  
  
            -- perform mini-batch gradient descent  
            local loss = criterion:forward(model:forward(inputs), targets)  
            model:backward(inputs, criterion:backward(model.output, targets))  
  
            return loss, dl_dx  
        end  
  
        _, fs = optim.sgd(feval, x, sgd_params)  
  
        -- fs is a table containing value of the loss function  
        -- (just 1 value for the SGD optimization)  
        count = count + 1  
        current_loss = current_loss + fs[1]  
    end  
  
    -- normalize loss  
    return current_loss / count  
end

9.最后一步

eval = function(dataset, batch_size)  
    local count = 0  
    batch_size = batch_size or 200  
      
    for i = 1,dataset.size,batch_size do  
        local size = math.min(i + batch_size - 1, dataset.size) - i  
        local inputs = dataset.data[{{i,i+size-1}}] 
        local targets = dataset.label[{{i,i+size-1}}]:long()  
        local outputs = model:forward(inputs)  
        local _, indices = torch.max(outputs, 2)  
        indices:add(-1)  
        local guessed_right = indices:eq(targets):sum()  
        count = count + guessed_right  
    end  
  
    return count / dataset.size  
end  
  
max_iters = 30  
  
do  
    local last_accuracy = 0  
    local decreasing = 0  
    local threshold = 1 -- how many deacreasing epochs we allow  
    for i = 1,max_iters do  
        local loss = step()  
        print(string.format('Epoch: %d Current loss: %4f', i, loss))  
        local accuracy = eval(validationset)  
        print(string.format('Accuracy on the validation set: %4f', accuracy))  
        if accuracy < last_accuracy then  
            if decreasing > threshold then break end  
            decreasing = decreasing + 1  
        else  
            decreasing = 0  
        end  
        last_accuracy = accuracy  
    end  
end  
  
testset.data = testset.data:double()  
eval(testset)

10.整体代码如下：

require 'torch'  
require 'nn'  
require 'optim'  
--require 'cunn'  
--require 'cutorch'  
mnist = require 'mnist'  
  
fullset = mnist.traindataset()  
testset = mnist.testdataset()  
  
trainset = {  
    size = 50000,  
    data = fullset.data[{{1,50000}}]:double(),  
    label = fullset.label[{{1,50000}}]  
}  
  
validationset = {  
    size = 10000,  
    data = fullset.data[{{50001,60000}}]:double(),  
    label = fullset.label[{{50001,60000}}]  
}  
  
trainset.data = trainset.data - trainset.data:mean()  
validationset.data = validationset.data - validationset.data:mean()  
  
  
model = nn.Sequential()  
model:add(nn.Reshape(1, 28, 28))  
model:add(nn.MulConstant(1/256.0*3.2))  
model:add(nn.SpatialConvolutionMM(1, 20, 5, 5, 1, 1, 0, 0))  
model:add(nn.SpatialMaxPooling(2, 2 , 2, 2, 0, 0))  
model:add(nn.SpatialConvolutionMM(20, 50, 5, 5, 1, 1, 0, 0))  
model:add(nn.SpatialMaxPooling(2, 2 , 2, 2, 0, 0))  
model:add(nn.Reshape(4*4*50))  
model:add(nn.Linear(4*4*50, 500))  
model:add(nn.ReLU())  
model:add(nn.Linear(500, 10))  
model:add(nn.LogSoftMax())  
  
model = require('weight-init')(model, 'xavier')  
  
criterion = nn.ClassNLLCriterion()  
  
--model = model:cuda()  
--criterion = criterion:cuda()  
--trainset.data = trainset.data:cuda()  
--trainset.label = trainset.label:cuda()  
--validationset.data = validationset.data:cuda()  
--validationset.label = validationset.label:cuda()  
  
sgd_params = {  
   learningRate = 1e-2,  
   learningRateDecay = 1e-4,  
   weightDecay = 1e-3,  
   momentum = 1e-4  
}  
  
x, dl_dx = model:getParameters()  
  
step = function(batch_size)  
    local current_loss = 0  
    local count = 0  
    local shuffle = torch.randperm(trainset.size)  
    batch_size = batch_size or 200  
    for t = 1,trainset.size,batch_size do  
        -- setup inputs and targets for this mini-batch  
        local size = math.min(t + batch_size - 1, trainset.size) - t  
        local inputs = torch.Tensor(size, 28, 28)--:cuda()  
        local targets = torch.Tensor(size)--:cuda()  
        for i = 1,size do  
            local input = trainset.data[shuffle[i+t]]  
            local target = trainset.label[shuffle[i+t]]  
            -- if target == 0 then target = 10 end  
            inputs[i] = input  
            targets[i] = target  
        end  
        targets:add(1)  
        local feval = function(x_new)  
            -- reset data  
            if x ~= x_new then x:copy(x_new) end  
            dl_dx:zero()  
  
            -- perform mini-batch gradient descent  
            local loss = criterion:forward(model:forward(inputs), targets)  
            model:backward(inputs, criterion:backward(model.output, targets))  
  
            return loss, dl_dx  
        end  
  
        _, fs = optim.sgd(feval, x, sgd_params)  
  
        -- fs is a table containing value of the loss function  
        -- (just 1 value for the SGD optimization)  
        count = count + 1  
        current_loss = current_loss + fs[1]  
    end  
  
    -- normalize loss  
    return current_loss / count  
end  
  
eval = function(dataset, batch_size)  
    local count = 0  
    batch_size = batch_size or 200  
      
    for i = 1,dataset.size,batch_size do  
        local size = math.min(i + batch_size - 1, dataset.size) - i  
        local inputs = dataset.data[{{i,i+size-1}}]--:cuda()  
        local targets = dataset.label[{{i,i+size-1}}]:long()--:cuda()  
        local outputs = model:forward(inputs)  
        local _, indices = torch.max(outputs, 2)  
        indices:add(-1)  
        local guessed_right = indices:eq(targets):sum()  
        count = count + guessed_right  
    end  
  
    return count / dataset.size  
end  
  
max_iters = 30  
  
do  
    local last_accuracy = 0  
    local decreasing = 0  
    local threshold = 1 -- how many deacreasing epochs we allow  
    for i = 1,max_iters do  
        local loss = step()  
        print(string.format('Epoch: %d Current loss: %4f', i, loss))  
        local accuracy = eval(validationset)  
        print(string.format('Accuracy on the validation set: %4f', accuracy))  
        if accuracy < last_accuracy then  
            if decreasing > threshold then break end  
            decreasing = decreasing + 1  
        else  
            decreasing = 0  
        end  
        last_accuracy = accuracy  
    end  
end  
  
testset.data = testset.data:double()  
eval(testset)

11.如此所有的工作就完成了，运行代码，输出如图结果：

深度学习torch之四（基于mnist数据集的实战练习实例）

猜你喜欢