Writing LeNet with Caffe Python

Writing LeNet with Caffe Python

Foreword:

This article is translated from Solving in Python with LeNet , based on the application of the deep learning framework Caffe, the premise of running this code is:

  1. Installed Caffe, windows Caffe installation tutorial and add Python interface, please refer to Caffe installation and compiling Caffe Python interface

  2. Install Python 2.7, Anaconda is recommended. Start Jupyter Notebook after installation

Note: In order to run the program correctly, the code in this article has been modified from the original code.

Also, I will update the details in the code in the detailed catalog


Detailed catalog

  1. caffe.NetSpec()
  2. Caffe solver.net.forward(),solver.test_nets[0].forward() 和 solver.step(1)

text:

1. Start (Setup)

Import pylab and use it for drawing.

from pylab import *

Import caffe and add system variables.

caffe_root = 'D:/wincaffe/caffe-master/'  # 这是我的caffe 根目录

import sys
sys.path.insert(0, caffe_root + 'python')
import caffe

Download the data. Please put the downloaded data in the D:\wincaffe\caffe-master\examples\mnist\mnist_data directory. I use the absolute path. The mnist binary file needs to be converted to lmdb format. I put the original mnist file and lmdb file on the Baidu network disk mnist+lmdb (password: h7wh), and those that will not be converted can be downloaded.
file path

2. Creating the net

Now we create a variant of the classic convolutional network structure LeNet from 1989.
We need two additional files for easy output:

  • net prototxt : defines the network structure pointing to the train/test data.
  • solver prototxt : defines the network learning parameters.
from caffe import layers as L, params as P
def lenet(lmdb, batch_size):
    # our version of LeNet: a series of linear and simple nonlinear transformations
    n = caffe.NetSpec() # 见详解目录-1

    n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
                             transform_param=dict(scale=1./255), ntop=2)

    n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    n.fc1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    n.relu1 = L.ReLU(n.fc1, in_place=True)
    n.score = L.InnerProduct(n.relu1, num_output=10, weight_filler=dict(type='xavier'))
    n.loss =  L.SoftmaxWithLoss(n.score, n.label)

    return n.to_proto() #写入到prototxt文件

with open('D:/wincaffe/caffe-master/examples/mnist/lenet_auto_train.prototxt', 'w') as f:
    f.write(str(lenet('D:/wincaffe/caffe-master/examples/mnist/mnist_data/mnist_train_lmdb', 64)))

with open('D:/wincaffe/caffe-master/examples/mnist/lenet_auto_test.prototxt', 'w') as f:
    f.write(str(lenet('D:/wincaffe/caffe-master/examples/mnist/mnist_data/mnist_test_lmdb', 100)))

Let's take a look at the structure of the train net :

layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  transform_param {
    scale: 0.00392156862745
  }
  data_param {
    source: "D:/wincaffe/caffe-master/examples/mnist/mnist_data/mnist_train_lmdb"
    batch_size: 64
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  convolution_param {
    num_output: 20
    kernel_size: 5
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  convolution_param {
    num_output: 50
    kernel_size: 5
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "fc1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "fc1"
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "fc1"
  top: "fc1"
}
layer {
  name: "score"
  type: "InnerProduct"
  bottom: "fc1"
  top: "score"
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "score"
  bottom: "label"
  top: "loss"
}

And the structure of test net :

layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  transform_param {
    scale: 0.00392156862745
  }
  data_param {
    source: "D:/wincaffe/caffe-master/examples/mnist/mnist_data/mnist_test_lmdb"
    batch_size: 100
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  convolution_param {
    num_output: 20
    kernel_size: 5
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  convolution_param {
    num_output: 50
    kernel_size: 5
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "fc1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "fc1"
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "fc1"
  top: "fc1"
}
layer {
  name: "score"
  type: "InnerProduct"
  bottom: "fc1"
  top: "score"
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "xavier"
    }
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "score"
  bottom: "label"
  top: "loss"
}

Well, let me show you the structure of the solver. This file already exists in D:\wincaffe\caffe-master\examples\mnist. However, you need to modify the paths of train_net and test_net in the code according to the path.

# The train/test net protocol buffer definition
train_net: "D:/wincaffe/caffe-master/examples/mnist/lenet_auto_train.prototxt"
test_net: "D:/wincaffe/caffe-master/examples/mnist/lenet_auto_test.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 10000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "mnist/lenet"

3. Loading and checking the solver

Translator's Note: Select device and use GPU. (If you don't have CUDA+cuDnn installed, you can use caffe.set_mode_cpu(), test it yourself, and the speed is slow.)

caffe.set_device(0) #选择默认gpu
caffe.set_mode_gpu()    #使用gpu

### load the solver and create train and test nets
solver = None  # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.SGDSolver('C:/Users/Admin512/Desktop/MyStudy/caffe_python/LeNet/mnist/lenet_auto_solver.prototxt')

Take a look at the dimensions of the intermediate variables:

Code 1:

# each output is (batch size, feature dim, spatial dim)
[(k, v.data.shape) for k, v in solver.net.blobs.items()]
  • 1
  • 2

Output 1:

[('data', (64L, 1L, 28L, 28L)),
 ('label', (64L,)),
 ('conv1', (64L, 20L, 24L, 24L)),
 ('pool1', (64L, 20L, 12L, 12L)),
 ('conv2', (64L, 50L, 8L, 8L)),
 ('pool2', (64L, 50L, 4L, 4L)),
 ('fc1', (64L, 500L)),
 ('score', (64L, 10L)),
 ('loss', ())]

Code 2:

# just print the weight sizes (we'll omit the biases)
[(k, v[0].data.shape) for k, v in solver.net.params.items()]

Output 2:

[('conv1', (20, 1, 5, 5)),
 ('conv2', (50, 20, 5, 5)),
 ('fc1', (500, 800)),
 ('score', (10, 500))]

Before training, let's check if everything is in place:

Code 1:

# 见详解目录-2
solver.net.forward()  # train net 
solver.test_nets[0].forward()  # test net (there can be more than one)

Output 1:

{'loss': array(2.365971088409424, dtype=float32)}


Code 2:

# we use a little trick to tile the first eight images
imshow(solver.net.blobs['data'].data[:8, 0].transpose(1, 0, 2).reshape(28, 8*28), cmap='gray'); axis('off')
print 'train labels:', solver.net.blobs['label'].data[:8]

Output 2:

train labels: [ 5.  0.  4.  1.  9.  2.  1.  3.]

write picture description here


Code 3:

imshow(solver.test_nets[0].blobs['data'].data[:8, 0].transpose(1, 0, 2).reshape(28, 8*28), cmap='gray'); axis('off')
print 'test labels:', solver.test_nets[0].blobs['label'].data[:8]

Output 3:

test labels: [ 7.  2.  1.  0.  4.  1.  4.  9.]

write picture description here

4. Stepping the solver

Both the train and test networks appear to be loading data and have the correct labels.

  • Let's do a minibatch-SGD one step and see what happens:
# 见详解目录-2
solver.step(1)

Are we doing gradient propagation through our filters? Let's take a look at the update of the first layer, which shows a 4x5 grid of 5x5 filters.

Code 4:

imshow(solver.net.params['conv1'][0].diff[:, 0].reshape(4, 5, 5, 5)
       .transpose(0, 2, 1, 3).reshape(4*5, 5*5), cmap='gray'); axis('off')

Output 4:

(-0.5, 24.5, 19.5, -0.5)

write picture description here

5. Writing a custom training loop

Something is happening. We run the network for a while, and we keep track of something while it's running. Note that the process is the same as binary training via caffe. especially:

  • Logging will continue as normal.
  • Take snapshots at intervals specified by the solver prototxt (here, the interval is 5000 iterations)
  • Test at a specified interval (here, the interval takes 500 iterations)

Because we can control the loop with Python, we are free to compute additional stuff, as shown below. We can also do something else like:

  • Write a custom stopping criterion
  • Change the solution process by updating the network in the loop

Code 5:

%%time
niter = 200
test_interval = 25
# losses will also be stored in the log
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))
output = zeros((niter, 8, 10))

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe

    # store the train loss
    train_loss[it] = solver.net.blobs['loss'].data

    # store the output on the first test batch
    # (start the forward pass at conv1 to avoid loading new data)
    solver.test_nets[0].forward(start='conv1')
    output[it] = solver.test_nets[0].blobs['score'].data[:8]

    # run a full test every so often
    # (Caffe can also do this for us and write to a log, but we show here
    #  how to do it directly in Python, where more complicated things are easier.)
    if it % test_interval == 0:
        print 'Iteration', it, 'testing...'
        correct = 0
        for test_it in range(100):
            solver.test_nets[0].forward()
            correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)
                           == solver.test_nets[0].blobs['label'].data)
        test_acc[it // test_interval] = correct / 1e4

Output 5:
(Looks like my GPU is much better than theirs!)

Iteration 0 testing...
Iteration 25 testing...
Iteration 50 testing...
Iteration 75 testing...
Iteration 100 testing...
Iteration 125 testing...
Iteration 150 testing...
Iteration 175 testing...
Wall time: 2.3 s

Let's take a look at the training loss function and test accuracy

Code 6:

_, ax1 = subplots()
ax2 = ax1.twinx()
ax1.plot(arange(niter), train_loss)
ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')
ax1.set_xlabel('iteration')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
ax2.set_title('Test Accuracy: {:.2f}'.format(test_acc[-1]))

Output 6:

<matplotlib.text.Text at 0x32b3fb38>
  • 1

write picture description here

The loss function declines quickly and converges (except for stochasticity, I think this sentence is translated into addition to local randomness), and accordingly, the accuracy rate is rising. Hooray!


Because we saved the results in the first test batch, we can observe how our prediction scores evolve. The x-axis is the number of iterations and the y-axis is the labels.

Code 7:

for i in range(8):
    figure(figsize=(2, 2))
    imshow(solver.test_nets[0].blobs['data'].data[i, 0], cmap='gray')
    figure(figsize=(10, 2))
    imshow(output[:50, i].T, interpolation='nearest', cmap='gray')
    xlabel('iteration')
    ylabel('label')

Output 7:

write picture description here

We didn't know anything about these digits at first, but in the end we had the correct classification for each digit. As the classification progresses, you will find that it is very difficult to identify the final digit, and the slanted "9" is easily confused with the "4".


Note: The above are the raw output and not the probability vector calculated by softmax. As you can see below, the latter easily represents the confidence of our network (but it's hard to see the scores for hard-to-recognize numbers).

Code 8:

for i in range(8):
    figure(figsize=(2, 2))
    imshow(solver.test_nets[0].blobs['data'].data[i, 0], cmap='gray')
    figure(figsize=(10, 2))
    imshow(exp(output[:50, i].T) / exp(output[:50, i].T).sum(0), interpolation='nearest', cmap='gray')
    xlabel('iteration')
    ylabel('label')####

Output 8:

write picture description here


6. Experiment with architecture and optimization

Now that we have defined, trained and tested LeNet, there are many possible directions for what to do next:

  • Define new network structures for comparison
  • Tuned by setting base_lr and simply training longer.
  • Change SGD to an adaptive method like AdaDelta or Adam

Feel free to explore these directions by editing the versatile examples below. Look for the points of modification suggested by the comments for " EDIT HERE ".
By default, it defines a simple linear classifier as the benchmark.
In case your coffee hasn't kicked in and you'd like inspiration, try out! ! !

  1. Convert ReLU to nonlinear ELU or Sigmoid .
  2. Add more connections and non-linear layers
  3. Change the learning rate in multiples of 10×
  4. Change the solver type to Adam .
  5. Setting niter larger makes the training longer and makes the training result better.

Code 9:

train_net_path = 'mnist/custom_auto_train.prototxt'
test_net_path = 'mnist/custom_auto_test.prototxt'
solver_config_path = 'mnist/custom_auto_solver.prototxt'

### define net
def custom_net(lmdb, batch_size):
    # define your own net!
    n = caffe.NetSpec()

    # keep this data layer for all networks
    n.data, n.label = L.Data(batch_size=batch_size, backend=P.Data.LMDB, source=lmdb,
                             transform_param=dict(scale=1./255), ntop=2)

    # EDIT HERE to try different networks
    # this single layer defines a simple linear classifier
    # (in particular this defines a multiway logistic regression)
    n.score =   L.InnerProduct(n.data, num_output=10, weight_filler=dict(type='xavier'))

    # EDIT HERE this is the LeNet variant we have already tried
    # n.conv1 = L.Convolution(n.data, kernel_size=5, num_output=20, weight_filler=dict(type='xavier'))
    # n.pool1 = L.Pooling(n.conv1, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    # n.conv2 = L.Convolution(n.pool1, kernel_size=5, num_output=50, weight_filler=dict(type='xavier'))
    # n.pool2 = L.Pooling(n.conv2, kernel_size=2, stride=2, pool=P.Pooling.MAX)
    # n.fc1 =   L.InnerProduct(n.pool2, num_output=500, weight_filler=dict(type='xavier'))
    # EDIT HERE consider L.ELU or L.Sigmoid for the nonlinearity
    # n.relu1 = L.ReLU(n.fc1, in_place=True)
    # n.score =   L.InnerProduct(n.fc1, num_output=10, weight_filler=dict(type='xavier'))

    # keep this loss layer for all networks
    n.loss =  L.SoftmaxWithLoss(n.score, n.label)

    return n.to_proto()

with open(train_net_path, 'w') as f:
    f.write(str(custom_net('mnist/mnist_train_lmdb', 64)))    
with open(test_net_path, 'w') as f:
    f.write(str(custom_net('mnist/mnist_test_lmdb', 100)))

### define solver
from caffe.proto import caffe_pb2
s = caffe_pb2.SolverParameter()

# Set a seed for reproducible experiments:
# this controls for randomization in training.
s.random_seed = 0xCAFFE

# Specify locations of the train and (maybe) test networks.
s.train_net = train_net_path
s.test_net.append(test_net_path)
s.test_interval = 500  # Test after every 500 training iterations.
s.test_iter.append(100) # Test on 100 batches each time we test.

s.max_iter = 10000     # no. of times to update the net (training iterations)

# EDIT HERE to try different solvers
# solver types include "SGD", "Adam", and "Nesterov" among others.
s.type = "SGD"

# Set the initial learning rate for SGD.
s.base_lr = 0.01  # EDIT HERE to try different learning rates
# Set momentum to accelerate learning by
# taking weighted average of current and previous updates.
s.momentum = 0.9
# Set weight decay to regularize and prevent overfitting
s.weight_decay = 5e-4

# Set `lr_policy` to define how the learning rate changes during training.
# This is the same policy as our default LeNet.
s.lr_policy = 'inv'
s.gamma = 0.0001
s.power = 0.75
# EDIT HERE to try the fixed rate (and compare with adaptive solvers)
# `fixed` is the simplest policy that keeps the learning rate constant.
# s.lr_policy = 'fixed'

# Display the current training loss and accuracy every 1000 iterations.
s.display = 1000

# Snapshots are files used to store networks we've trained.
# We'll snapshot every 5K iterations -- twice during training.
s.snapshot = 5000
s.snapshot_prefix = 'mnist/custom_net'

# Train on the GPU
s.solver_mode = caffe_pb2.SolverParameter.GPU

# Write the solver to a temporary file and return its filename.
with open(solver_config_path, 'w') as f:
    f.write(str(s))

### load the solver and create train and test nets
solver = None  # ignore this workaround for lmdb data (can't instantiate two solvers on the same data)
solver = caffe.get_solver(solver_config_path)

### solve
niter = 250  # EDIT HERE increase to train for longer
test_interval = niter / 10
# losses will also be stored in the log
train_loss = zeros(niter)
test_acc = zeros(int(np.ceil(niter / test_interval)))

# the main solver loop
for it in range(niter):
    solver.step(1)  # SGD by Caffe

    # store the train loss
    train_loss[it] = solver.net.blobs['loss'].data

    # run a full test every so often
    # (Caffe can also do this for us and write to a log, but we show here
    #  how to do it directly in Python, where more complicated things are easier.)
    if it % test_interval == 0:
        print 'Iteration', it, 'testing...'
        correct = 0
        for test_it in range(100):
            solver.test_nets[0].forward()
            correct += sum(solver.test_nets[0].blobs['score'].data.argmax(1)
                           == solver.test_nets[0].blobs['label'].data)
        test_acc[it // test_interval] = correct / 1e4

_, ax1 = subplots()
ax2 = ax1.twinx()
ax1.plot(arange(niter), train_loss)
ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')
ax1.set_xlabel('iteration')
ax1.set_ylabel('train loss')
ax2.set_ylabel('test accuracy')
ax2.set_title('Custom Test Accuracy: {:.2f}'.format(test_acc[-1]))

Output 9:

Iteration 0 testing...
Iteration 25 testing...
Iteration 50 testing...
Iteration 75 testing...
Iteration 100 testing...
Iteration 125 testing...
Iteration 150 testing...
Iteration 175 testing...
Iteration 200 testing...
Iteration 225 testing...

<matplotlib.text.Text at 0x7f5199af9f50>

write picture description here

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325681531&siteId=291194637