一,expand_mnist.py
from __future__ import print_function
import pickle
import gzip
import os.path
import random
import numpy as np
print("Expanding the MNIST training set")
if os.path.exists("E:/sxl_Programs/Python/MNIST_data/MNIST_data/mnist_expanded.pkl.gz"):
print("The expanded training set already exists.Exiting.")
else:
f=gzip.open("E:/sxl_Programs/Python/MNIST_data/MNIST_data/mnist.pkl.gz",'rb')
training_data,validation_data,test_data=pickle.load(f, encoding='bytes')
f.close()
expanded_training_pairs=[]
j=0
for x,y in zip(training_data[0],training_data[1]):
expanded_training_pairs.append((x,y))
image=np.reshape(x,(-1,28))
j+=1
if j% 1000==0:print("Expanding image number",j)
for d,axis,index_position,index in[
(1,0,"first",0),
(-1,0,"first",27),
(1,1,"last",0),
(-1,1,"last",27)]:
new_img=np.roll(image,d,axis)
if index_position=="first":
new_img[index,:]=np.zeros(28)
else:
new_img[:,index]=np.zeros(28)
expanded_training_pairs.append((np.reshape(new_img,784),y))
random.shuffle(expanded_training_pairs)
expanded_training_data=[list(d) for d in zip(*expanded_training_pairs)]
print("Saving expanded data. This may take a few minutes.")
f=gzip.open("E:/sxl_Programs/Python/MNIST_data/MNIST_data/mnist_expanded.pkl.gz","w")
pickle.dump((expanded_training_data,validation_data,test_data),f)
f.close()
print("close")
二,network3.py
import pickle
import gzip
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor.nnet import softmax
from theano.tensor import shared_randomstreams
from theano.tensor.signal import pool
def linear(z):return z
def ReLU(z):return T.maximum(0.0,z)
from theano.tensor.nnet import sigmoid
from theano.tensor import tanh
GPU=True
if GPU:
print("Trying to run under a GPU. If this is not desired,then modify"+\
"network3.py\nto set the GPU flag to False.")
try:theano.config.device='gpu'
except:pass
theano.config.floatX='float32'
else:
print("Running with a CPU. If this is not desired,then the modify"+ \
"network3.py\nto set the GPU flag to True.")
####下载 MNIST 数据
def load_data_shared(filename="../MNIST_data/MNIST_data/mnist.pkl.gz"):
f=gzip.open(filename,'rb')
training_data,validation_data,test_data=pickle.load(f, encoding='bytes')
f.close()
def shared(data):
shared_x=theano.shared(
np.asarray(data[0],dtype=theano.config.floatX),borrow=True)
shared_y=theano.shared(
np.asarray(data[1],dtype=theano.config.floatX),borrow=True)
return shared_x,T.cast(shared_y,"int32")
return [shared(training_data),shared(validation_data),shared(test_data)]
# 神经网络类
class Network(object):
# 构造函数
def __init__(self,layers,mini_batch_size):
self.layers=layers #神经网络层数
self.mini_batch_size=mini_batch_size
self.params=[param for layer in self.layers for param in layer.params]
self.x=T.matrix("x")
self.y=T.ivector("y")
init_layer=self.layers[0]
init_layer.set_inpt(self.x,self.x,self.mini_batch_size)
for j in range(1,len(self.layers)):
prev_layer,layer=self.layers[j-1],self.layers[j]
layer.set_inpt(
prev_layer.output,prev_layer.output_dropout,self.mini_batch_size)
self.output=self.layers[-1].output
self.output_dropout=self.layers[-1].output_dropout
#随机梯度下降算法
# 指向当前类,训练集,训练多少轮,最小训练集大小,学习率,测试数据集
def SGD(self,training_data, epochs, mini_batch_size, eta,
validation_data, test_data, lmbda=0.0):
training_x, training_y = training_data
validation_x, validation_y = validation_data
test_x, test_y = test_data
num_training_batches=size(training_data)//mini_batch_size
num_validation_batches=size(validation_data)//mini_batch_size
num_test_batches=size(test_data)//mini_batch_size
l2_norm_squared=sum([(layer.w**2).sum() for layer in self.layers])
cost=self.layers[-1].cost(self)+\
0.5*lmbda*l2_norm_squared/num_training_batches
grads=T.grad(cost,self.params)
updates=[(param,param-eta*grad)
for param,grad in zip(self.params,grads)]
i=T.lscalar()
train_mb=theano.function(
[i],cost,updates=updates,
givens={
self.x:
training_x[i*self.mini_batch_size:(i+1)*self.mini_batch_size],
self.y:
training_y[i*self.mini_batch_size:(i+1)*self.mini_batch_size]
})
validate_mb_accuracy=theano.function(
[i],self.layers[-1].accuracy(self.y),
givens={
self.x:
validation_x[i*self.mini_batch_size:(i+1)*self.mini_batch_size],
self.y:
validation_y[i*self.mini_batch_size:(i+1)*self.mini_batch_size]
})
test_mb_accuracy=theano.function(
[i],self.layers[-1].accuracy(self.y),
givens={
self.x:
test_x[i * self.mini_batch_size:(i + 1) * self.mini_batch_size],
self.y:
test_y[i * self.mini_batch_size:(i + 1) * self.mini_batch_size]
})
self.test_mb_predictions=theano.function(
[i],self.layers[-1].y_out,
givens={
self.x:
test_x[i * self.mini_batch_size:(i + 1) * self.mini_batch_size],
})
# Do the actual training
best_validation_accuracy=0.0
for epoch in range(epochs):
for minibatch_index in range(num_training_batches):
iteration=num_training_batches*epoch+minibatch_index
if iteration % 10==0:
print("Training mini-batch number {0}".format(iteration))
cost_ij=train_mb(minibatch_index)
if(iteration+1)% num_training_batches==0:
validation_accuracy=np.mean(
[validate_mb_accuracy(j) for j in range(num_validation_batches)])
print(("Epoch{0}:validation accuracy {1:.2%".format(
epoch,validation_accuracy)))
if validation_accuracy>=best_validation_accuracy:
print("This is the best validation accuracy to data.")
best_validation_accuracy=validation_accuracy
best_iteration=iteration
if test_data:
test_accuracy=np.mean(
[test_mb_accuracy(j) for j in range(num_test_batches)])
print(("The corresponding test accuracy is {0:.2%}".format(
test_accuracy)))
print("Finished training network.")
print("Best validation accuracy of {0:.2%} obtained at iteration {1}".foramt(
best_validation_accuracy,best_iteration))
print("Corresponding test accuracy of {0:.2%}".format(test_accuracy))
#### Define layer types
class ConvPoolLayer(object):
def __init__(self,filter_shape,image_shape,poolsize=(2,2),
activation_fn=sigmoid):
self.filter_shape=filter_shape
self.image_shape=image_shape
self.poolsize=poolsize
self.activation_fn=activation_fn
#初始化w,b
n_out=(filter_shape[0]*np.prod(filter_shape[2:])/np.prod(poolsize))
self.w=theano.shared(
np.asarray(
np.random.normal(loc=0,scale=np.sqrt(1.0/n_out),size=filter_shape),
dtype=theano.config.floatX),
borrow=True)
self.b=theano.shared(
np.asarray(
np.random.normal(loc=0,scale=1.0,size=(filter_shape[0],)),
dtype=theano.config.floatX),
borrow=True)
self.params=[self.w,self.b]
def set_inpt(self,inpt,inpt_dropout,mini_batch_size):
self.inpt=inpt.reshape(self.image_shape)
conv_out=conv.conv2d(
input=self.inpt,filters=self.w,filter_shape=self.filter_shape,
image_shape=self.image_shape)
pooled_out=pool.pool_2d(
input=conv_out,ds=self.poolsize,ignore_border=True)
self.output=self.activation_fn(
pooled_out+self.b.dimshuffle('x',0,'x','x'))
self.output_dropout=self.output
class FullyConnectedLayer(object):
def __init__(self,n_in,n_out,activation_fn=sigmoid,p_dropout=0.0):
self.n_in=n_in
self.n_out=n_out
self.activation_fn=activation_fn
self.p_dropout=p_dropout
#初始化w,b
self.w=theano.shared(
np.asarray(
np.random.normal(
loc=0.0,scale=np.sqrt(1.0/n_out),size=(n_in,n_out)),
dtype=theano.config.floatX),
name='w',borrow=True)
self.b=theano.shared(
np.asarray(np.random.normal(loc=0.0,scale=1.0,size=(n_out,)),
dtype=theano.config.floatX),
name='b',borrow=True)
self.params=[self.w,self.b]
def set_inpt(self,inpt,inpt_dropout,mini_batch_size):
self.inpt=inpt.reshape((mini_batch_size,self.n_in))
self.output=self.activation_fn(
(1-self.p_dropout)*T.dot(self.inpt,self.w)+self.b)
self.y_out=T.argmax(self.output,axis=1)
self.inpt_dropout=dropout_layer(
inpt_dropout.reshape((mini_batch_size,self.n_in)),self.p_dropout)
self.output_dropout=self.activation_fn(
T.dot(self.inpt_dropout,self.w)+self.b)
def accuracy(self,y):
return T.mean(T.eq(y,self.y_out))
class SoftmacLayer(object):
def __init__(self,n_in,n_out,p_dropout=0.0):
self.n_in=n_in
self.n_out=n_out
self.p_dropout=p_dropout
#初始化w,b
self.w=theano.shared(
np.zeros((n_in,n_out),dtype=theano.config.floatX),
name='w',borrow=True)
self.b=theano.shared(
np.zeros((n_out,),dtype=theano.config.floatX),
name='b',borrow=True)
self.params=[self.w,self.b]
def set_inpt(self,inpt,inpt_dropout,mini_batch_size):
self.inpt=inpt.reshape((mini_batch_size,self.n_in))
self.output=softmax((1-self.p_dropout)*T.dot(self.inpt,self.w)+self.b)
self.y_out=T.argmax(self.output,axis=1)
self.inpt_dropout=dropout_layer(
inpt_dropout.reshape((mini_batch_size,self.n_in)),self.p_dropout)
self.output_dropout=softmax(T.dot(self.inpt_dropout,self.w)+self.b)
def cost(self,net):
return -T.mean(T.log(self.output_dropout)[T.arange(net.y.shape[0]),net.y])
def accuracy(self,y):
return T.mean(T.eq(y,self.y_out))
def size(data):
return data[0].get_value(borrow=True).shape[0]
def dropout_layer(layer,p_dropout):
srng=shared_randomstreams.RandomStreams(
np.random.RandomState(0).randint(999999))
mask=srng.binomial(n=1,p=1-p_dropout,size=layer.shape)
return layer*T.cast(mask,theano.config.floatX)
三,demo.py
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
# import network
# import network2
import network3
from network3 import Network
from network3 import ConvPoolLayer,FullyConnectedLayer,SoftmacLayer
# from conv import mini_batch_size
import mnist_loader
import numpy as np
import theano
import theano.tensor as T
from theano.tensor.nnet import conv
from theano.tensor import shared_randomstreams
from theano.tensor.signal import pool
def ReLU(z):return T.maximum(0.0,z)
training_data,validation_data,test_data=network3.load_data_shared()
mini_batch_size=10
################load data#################################################
#训练集, 验证集, 测试集
# training_data,validation_data,test_data=mnist_loader.load_data_wrapper()
# print("training_data")
# print(type(training_data)) #数据类型
# print(len(training_data)) #数据长度
# print(training_data[0][0].shape) #第一维是元祖,第二维0是x
# print(training_data[0][1].shape) #第一维是元祖,第二维1是y
# print(training_data[0])
#
# print("validation_data")
# print(len(validation_data))
#
# print("test_data")
# print(len(test_data))
#
################load data#################################################
################network1#################################################
#第一层784个神经元,第二层30个神经元,输出10个
# net=network.Network([784,30,10])
# # 训练集,训练多少轮,最小训练集大小,学习率,测试数据集
# net.SGD(training_data,30,10,3.0,test_data=test_data)
#
# net=network.Network([784,100,10])
# net.SGD(training_data,30,10,3.0,test_data=test_data)
#
# net=network.Network([784,50,60,10])
# net.SGD(training_data,30,10,3.0,test_data=test_data)
################network1#################################################
################network2#################################################
# #第一层784个神经元,第二层30个神经元,输出10个
# print("666")
# net=network2.Network([784,30,30,10],cost=network2.CrossEntropyCost)
# # net.large_weight_initializer()
# # 训练集,训练多少轮,最小训练集大小,学习率,测试数据集
# net.SGD(training_data,30,10,0.1,5.0,evaluation_data=validation_data,monitor_evaluation_cost=True,monitor_evaluation_accuracy=True,
# monitor_training_cost = True, monitor_training_accuracy=True)
################network2#################################################
################network3#################################################
expanded_training_data,_,_=network3.load_data_shared("E:/sxl_Programs/Python/MNIST_data/MNIST_data/mnist_expanded.pkl.gz")
net =Network([
ConvPoolLayer(image_shape=(mini_batch_size,1,28,28),
filter_shape=(20,1,5,5),
poolsize=(2,2),
activation_fn=ReLU),
ConvPoolLayer(image_shape=(mini_batch_size,20,12,12),
filter_shape=(40,20,5,5),
poolsize=(2,2),
activation_fn=ReLU),
FullyConnectedLayer(
n_in=40*4*4,n_out=1000,activation_fn=ReLU,p_dropout=0.5),
FullyConnectedLayer(
n_in=1000,n_out=1000,activation_fn=ReLU,p_dropout=0.5),
SoftmacLayer(n_in=1000,n_out=10,p_dropout=0.5)],
mini_batch_size)
net.SGD(expanded_training_data,40,mini_batch_size,0.03,
validation_data,test_data)
################network3#################################################
四,运行结果
因为此代码运行较慢,抽时间把结果跑一下,代码已经跑通。