用于训练的图片格式为28*28大小的黑白灰度图像
用Bengio封装好的mnist数据集下载地址:http://deeplearning.net/data/mnist/mnist.pkl.gz
此压缩包是训练集、测试集、验证集用pickle导出的文件压缩为gzip格式,所以用python的gzip模块可以直接当文件读取
1.将数据集转化为jpg图片的python代码如下:
import os
import pickle, gzip
import numpy
from matplotlib import pyplot
with gzip.open('mnist.pkl.gz','rb') as f:
train_set, valid_set, test_set = pickle.load(f)
imgs_dir = 'mnist'
os.system('mkdir -p {}'.format(imgs_dir))
datasets = {'train': train_set,'val':valid_set,'test':test_set}
for dataname,dataset in datasets.items():
data_dir = os.sep.join([imgs_dir,dataname])
os.system('mkdir -p {}'.format(data_dir))
for i, (img,label) in enumerate(zip(*dataset)):
filename = '{:0>6d}_{}.jpg'.format(i,label)
filepath = os.sep.join([data_dir,filename])
img = img.reshape((28,28))
pyplot.imsave(filepath,img,cmap = 'gray')
if (i+1) %10000 == 0:
print('{} imgs converted!'.format(format(i+1)))
将代码保存为1.py 并在下载目录下运行脚本
py 1.py
即可获得大量手写体图片
2.将下载下来的图片生成文件列表和对应标签
import os
import sys
input_path = sys.argv[1].rstrip(os.sep)
output_path = sys.argv[2]
filenames = os.listdir(input_path)
with open(output_path,'w') as f:
for filename in filenames:
filepath = os.sep.join([input_path,filename])
label = filename[:filename.rfind('.')].split('_')[1]
line = '{} {}\n'.format(filepath,label)
f.write(line)
保存这个文件为caffe_imglist.py
依次运行下面命令
>python caffe_imglist.py train train.txt
>python caffe_imglist.py val val.txt
>python caffe_imglist.py test test.txt
然后调用caffe自带的图片转lmdb格式的小工具
>/path/to/caffe/build/tools/convert_imageset ./ train.txt train_lmdb --gray --shuffle
>/path/to/caffe/built/tools/convert_imageset ./ val.txt val_lmdb --gray --shuffle
> /path/to/caffe/built/tools/convert_imageset ./ test.txt test_lmdb --gray --shuffle
3.训练LeNet-5
用于描述数据源和网络结构的lenet_train_val.prototxt:
name: "LeNet"
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
mean_value: 128
scale: 0.00390625
}
data_param {
source: "../data/train_lmdb"
batch_size: 50
backend: LMDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
mean_value: 128
scale: 0.00390625
}
data_param {
source: "../data/val_lmdb"
batch_size: 100
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip2"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip2"
bottom: "label"
top: "loss"
}
lenet_solver.prototxt:
# The train/validate net protocol buffer definition
net: "lenet_train_val.prototxt"
# test_iter specifies how many forward passes the test should carry out.
# In the case of MNIST, we have test batch size 100 and 100 test iterations,
# covering the full 10,000 testing images.
test_iter: 100
# Carry out testing every 500 training iterations.
test_interval: 500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
momentum: 0.9
weight_decay: 0.0005
# The learning rate policy
lr_policy: "inv"
gamma: 0.0001
power: 0.75
# Display every 100 iterations
display: 100
# The maximum number of iterations
max_iter: 36000
# snapshot intermediate results
snapshot: 5000
snapshot_prefix: "mnist_lenet"
# solver mode: CPU or GPU
solver_mode: GPU
调用下面命令即可进行训练:
/path/to/caffe/build/tools/caffe train -solver lenet_solver.prototxt -gpu 0 -log_dir ./
训练结束可以得到模型
4.利用训练好的模型去识别test中的图片,基于python接口实现:
import sys
sys.path.append('/path/to/caffe/python')
import numpy as np
import cv2
import caffe
MEAN = 128
SCALE = 0.00390625
imglist = sys.argv[1]
caffe.set_mode_gpu()
caffe.set_device(0)
net = caffe.Net('lenet.prototxt', 'lenet_iter_10000.caffemodel', caffe.TEST)
net.blobs['data'].reshape(1, 1, 28, 28)
with open(imglist, 'r') as f:
line = f.readline()
while line:
imgpath= line.split()[0]
line = f.readline()
image = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE).astype(np.float) - MEAN
image *= SCALE
net.blobs['data'].data[...] = image
output = net.forward()
pred_label = np.argmax(output['prob'][0])
print('Predicted digit for {} is {}'.format(imgpath, pred_label))
保存为recognize_digit.py
创建一个lenet.prototxt文件:
name: "LeNet"
layer {
name: "data"
type: "Input"
top: "data"
input_param {
shape: {
dim: 1
dim: 1
dim: 28
dim: 28
}
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 20
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 50
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool2"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "prob"
type: "Softmax"
bottom: "ip2"
top: "prob"
}
运行:
python recognize_digit.py test.txt