不用框架，python实现卷积神经网络

最近学习了卷积神经网络，推荐一些比较好的学习资源

1: https://www.zybuluo.com/hanbingtao/note/485480

2: http://blog.csdn.net/u010540396/article/details/52895074

对于网址，我大部分学习的资源和数学公式都是来源于此，强烈推荐学习。

对于网址2，我下面的代码就是在其基础上改写的，作者是用matlab实现的，这对于不会matlab的同学而言，会比较费时，毕竟，

我们要做的是搞懂卷积神经网络，而不是某一个编程语言。

而且最重要的是，我自己想弄明白CNN的前向网络和误差反向传播算法，自己亲自实现一遍，更有助于理解和记忆，哪怕是看着别人的代码学会的。

A:下面代码实现是LenNet-5的代码，但是只有一个卷积层，一个mean-pooling层，和一个全连接层，出来经过softmax层。

B:使用的数据集是MNIST,你可以到http://yann.lecun.com/exdb/mnist/

C:MNIST的数据解析，可以从我下面的analysisMNIST.py中修改路径，谢谢(http://blog.csdn.net/u014046170/article/details/47445919)然后取得到数据如下情况:

D:在C解析完之后，我把label文件的内容转置了，开始的时候是一行，我改成了一列。

注:代码里面的TODO是很多公式推导，我有空会敲出来，然后也作为超链接给弄出来，怕自己下次又给忘了。

我的总共有三个文件:

这是我定义的全局变量的文件 gParam.py

#! /usr/bin/env python
# -*- coding: utf-8 -*-

TOP_PATH = '/media/autumn/Work/data/MNIST/mnist-png/'
LAB_PATH = '/media/autumn/Work/data/MNIST/mnist-png/label1.txt'
C_SIZE = 5
F_NUM = 12
P_SIZE = 2
FILE_TYPE = '.png'
MAX_ITER_NUM = 50

这是我测试的文件myCnnTest.py

#! /usr/bin/env python
# -*- coding: utf-8 -*-

from numpy import *
import numpy as np
from myCnn import Ccnn
import math
import gParam

# code
cLyNum = 20
pLyNum = 20
fLyNum = 100
oLyNum = 10
train_num = 800

myCnn = Ccnn(cLyNum, pLyNum, fLyNum, oLyNum)
ylabel = myCnn.read_label(gParam.LAB_PATH)
for iter0 in range(gParam.MAX_ITER_NUM):
	for i in range(train_num):
		data = myCnn.read_pic_data(gParam.TOP_PATH, i)	
		#print shape(data)
		ylab = int(ylabel[i])
		d_m, d_n = shape(data)
		m_c = d_m - gParam.C_SIZE + 1
		n_c = d_n - gParam.C_SIZE + 1
		m_p = m_c/myCnn.pSize
		n_p = n_c/myCnn.pSize
		state_c = zeros((m_c, n_c,myCnn.cLyNum))
		state_p = zeros((m_p, n_p, myCnn.pLyNum))
		for n in range(myCnn.cLyNum):
			state_c[:,:,n] = myCnn.convolution(data, myCnn.kernel_c[:,:,n])
			#print shape(myCnn.cLyNum)
			tmp_bias = ones((m_c,n_c)) * myCnn.cLyBias[:,n]
			state_c[:,:,n] = np.tanh(state_c[:,:,n] + tmp_bias)# 加上偏置项然后过激活函数
			state_p[:,:,n] = myCnn.pooling(state_c[:,:,n],myCnn.pooling_a)
		state_f, state_f_pre = myCnn.convolution_f1(state_p,myCnn.kernel_f, myCnn.weight_f)
		#print shape(state_f), shape(state_f_pre)
		#进入激活函数
		state_fo = zeros((1,myCnn.fLyNum))#全连接层经过激活函数的结果	
		for n in range(myCnn.fLyNum):
				state_fo[:,n] = np.tanh(state_f[:,:,n] + myCnn.fLyBias[:,n])
		#进入softmax层
		output = myCnn.softmax_layer(state_fo)
		err = -output[:,ylab]				
		#计算误差
		y_pre = output.argmax(axis=1)
		#print output	
		#计算误差		
		#print err 
		myCnn.cnn_upweight(err,ylab,data,state_c,state_p,\
							state_fo, state_f_pre, output)
		# print myCnn.kernel_c
		# print myCnn.cLyBias
		# print myCnn.weight_f
		# print myCnn.kernel_f
		# print myCnn.fLyBias
		# print myCnn.weight_output						
		
# predict
test_num = []
for i in range(100):
	test_num.append(train_num+i+1)

for i in test_num:
	data = myCnn.read_pic_data(gParam.TOP_PATH, i)	
	#print shape(data)
	ylab = int(ylabel[i])
	d_m, d_n = shape(data)
	m_c = d_m - gParam.C_SIZE + 1
	n_c = d_n - gParam.C_SIZE + 1
	m_p = m_c/myCnn.pSize
	n_p = n_c/myCnn.pSize
	state_c = zeros((m_c, n_c,myCnn.cLyNum))
	state_p = zeros((m_p, n_p, myCnn.pLyNum))
	for n in range(myCnn.cLyNum):
		state_c[:,:,n] = myCnn.convolution(data, myCnn.kernel_c[:,:,n])
		#print shape(myCnn.cLyNum)
		tmp_bias = ones((m_c,n_c)) * myCnn.cLyBias[:,n]
		state_c[:,:,n] = np.tanh(state_c[:,:,n] + tmp_bias)# 加上偏置项然后过激活函数
		state_p[:,:,n] = myCnn.pooling(state_c[:,:,n],myCnn.pooling_a)
	state_f, state_f_pre = myCnn.convolution_f1(state_p,myCnn.kernel_f, myCnn.weight_f)
	#print shape(state_f), shape(state_f_pre)
	#进入激活函数
	state_fo = zeros((1,myCnn.fLyNum))#全连接层经过激活函数的结果	
	for n in range(myCnn.fLyNum):
			state_fo[:,n] = np.tanh(state_f[:,:,n] + myCnn.fLyBias[:,n])
	#进入softmax层
	output = myCnn.softmax_layer(state_fo)	
	#计算误差
	y_pre = output.argmax(axis=1)
	print '真实数字为%d',ylab, '预测数字是%d', y_pre

接下来是CNN的核心代码，里面有中文注释，文件名是myCnn.py

#! /usr/bin/env python
# -*- coding: utf-8 -*-

from numpy import *
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mgimg
import math
import gParam
import copy
import scipy.signal as signal


# createst uniform random array w/ values in [a,b) and shape args
# return value type is ndarray
def rand_arr(a, b, *args): 
    np.random.seed(0) 
    return np.random.rand(*args) * (b - a) + a

# Class Cnn     
class Ccnn:
	def __init__(self, cLyNum, pLyNum,fLyNum,oLyNum):
		self.cLyNum = cLyNum
		self.pLyNum = pLyNum
		self.fLyNum = fLyNum
		self.oLyNum = oLyNum
		self.pSize = gParam.P_SIZE
		self.yita = 0.01
		self.cLyBias = rand_arr(-0.1, 0.1, 1,cLyNum)
		self.fLyBias = rand_arr(-0.1, 0.1, 1,fLyNum)
		self.kernel_c = zeros((gParam.C_SIZE,gParam.C_SIZE,cLyNum))
		self.kernel_f = zeros((gParam.F_NUM,gParam.F_NUM,fLyNum))
		for i in range(cLyNum):
			self.kernel_c[:,:,i] = rand_arr(-0.1,0.1,gParam.C_SIZE,gParam.C_SIZE)
		for i in range(fLyNum):
			self.kernel_f[:,:,i] = rand_arr(-0.1,0.1,gParam.F_NUM,gParam.F_NUM)
		self.pooling_a = ones((self.pSize,self.pSize))/(self.pSize**2)	
		self.weight_f = rand_arr(-0.1,0.1, pLyNum, fLyNum)
		self.weight_output = rand_arr(-0.1,0.1,fLyNum,oLyNum)
	def read_pic_data(self, path, i):
		#print 'read_pic_data'
		data = np.array([])
		full_path = path + '%d'%i + gParam.FILE_TYPE
		try:
			data = mgimg.imread(full_path) #data is np.array
			data = (double)(data)
		except IOError:
			raise Exception('open file error in read_pic_data():', full_path)
		return data		
	def read_label(self, path):
		#print 'read_label'
		ylab = []
		try:
			fobj = open(path, 'r')
			for line in fobj:
				ylab.append(line.strip())
			fobj.close()
		except IOError:
			raise Exception('open file error in read_label():', path)		
		return ylab		
	#卷积层
	def convolution(self, data, kernel):
		data_row, data_col = shape(data)
		kernel_row, kernel_col = shape(kernel)
		n = data_col - kernel_col
		m = data_row - kernel_row
		state = zeros((m+1, n+1))
		for i in range(m+1):
			for j in range(n+1):
				temp = multiply(data[i:i+kernel_row,j:j+kernel_col], kernel)
				state[i][j] = temp.sum()
		return state		
	# 池化层			
	def pooling(self, data, pooling_a):		
		data_r, data_c = shape(data)
		p_r, p_c = shape(pooling_a)
		r0 = data_r/p_r
		c0 = data_c/p_c
		state = zeros((r0,c0))
		for i in range(c0):
			for j in range(r0):
				temp = multiply(data[p_r*i:p_r*i+1,p_c*j:p_c*j+1],pooling_a)
				state[i][j] = temp.sum()
		return state
	#全连接层
	def convolution_f1(self, state_p1, kernel_f1, weight_f1):
		#池化层出来的20个特征矩阵乘以池化层与全连接层的连接权重进行相加
		#wx(这里的偏置项=0),这个结果然后再和全连接层中的神经元的核
		#进行卷积,返回值:
		#1：全连接层卷积前,只和weight_f1相加之后的矩阵
		#2：和全连接层卷积完之后的矩阵		
		n_p0, n_f = shape(weight_f1)#n_p0=20(是Feature Map的个数);n_f是100(全连接层神经元个数)
		m_p, n_p, pCnt = shape(state_p1)#这个矩阵是三维的
		m_k_f1, n_k_f1,fCnt = shape(kernel_f1)#12*12*100
		state_f1_temp = zeros((m_p,n_p,n_f))
		state_f1 = zeros((m_p - m_k_f1 + 1,n_p - n_k_f1 + 1,n_f))	
		for n in range(n_f):
			count = 0
			for m in range(n_p0):
				temp = state_p1[:,:,m] * weight_f1[m][n] 
				count = count + temp
			state_f1_temp[:,:,n] = count
			state_f1[:,:,n] = self.convolution(state_f1_temp[:,:,n], kernel_f1[:,:,n])	
		return state_f1, state_f1_temp	
	# softmax 层
	def softmax_layer(self,state_f1):
		# print 'softmax_layer'
		output = zeros((1,self.oLyNum))
		t1 = (exp(np.dot(state_f1,self.weight_output))).sum()
		for i in range(self.oLyNum):
			t0 = exp(np.dot(state_f1,self.weight_output[:,i]))
			output[:,i]=t0/t1
		return output	
	#误差反向传播更新权值
	def cnn_upweight(self,err_cost, ylab, train_data,state_c1, \
					state_s1, state_f1, state_f1_temp, output):
		#print 'cnn_upweight'
			m_data, n_data = shape(train_data)
			# softmax的资料请查看 (TODO)
			label = zeros((1,self.oLyNum))
			label[:,ylab]	= 1 
			delta_layer_output = output - label
			weight_output_temp = copy.deepcopy(self.weight_output)
			delta_weight_output_temp = zeros((self.fLyNum, self.oLyNum))
			#print shape(state_f1)
			#更新weight_output			
			for n in range(self.oLyNum):
				delta_weight_output_temp[:,n] = delta_layer_output[:,n] * state_f1
			weight_output_temp = weight_output_temp - self.yita * delta_weight_output_temp
			
			#更新bais_f和kernel_f (推导公式请查看 TODO)	
			delta_layer_f1 = zeros((1, self.fLyNum))
			delta_bias_f1 = zeros((1,self.fLyNum))
			delta_kernel_f1_temp = zeros(shape(state_f1_temp))
			kernel_f_temp = copy.deepcopy(self.kernel_f)			
			for n in range(self.fLyNum):
				count = 0
				for m in range(self.oLyNum):
					count = count + delta_layer_output[:,m] * self.weight_output[n,m]
				delta_layer_f1[:,n] = np.dot(count, (1 - np.tanh(state_f1[:,n])**2))	
				delta_bias_f1[:,n] = delta_layer_f1[:,n]
				delta_kernel_f1_temp[:,:,n] = delta_layer_f1[:,n] * state_f1_temp[:,:,n]
			# 1
			self.fLyBias = self.fLyBias - self.yita * delta_bias_f1
			kernel_f_temp = kernel_f_temp - self.yita * delta_kernel_f1_temp
			 
			#更新weight_f1
			delta_layer_f1_temp = zeros((gParam.F_NUM,gParam.F_NUM,self.fLyNum))
			delta_weight_f1_temp = zeros(shape(self.weight_f))
			weight_f1_temp = copy.deepcopy(self.weight_f)
			for n in range(self.fLyNum):
				delta_layer_f1_temp[:,:,n] = delta_layer_f1[:,n] * self.kernel_f[:,:,n]
			for n in range(self.pLyNum):
				for m in range(self.fLyNum):
					temp = delta_layer_f1_temp[:,:,m] * state_s1[:,:,n]
					delta_weight_f1_temp[n,m] = temp.sum()
			weight_f1_temp = weight_f1_temp - self.yita * delta_weight_f1_temp

			# 更新bias_c1
			n_delta_c = m_data - gParam.C_SIZE + 1
			delta_layer_p = zeros((gParam.F_NUM,gParam.F_NUM,self.pLyNum))
			delta_layer_c = zeros((n_delta_c,n_delta_c,self.pLyNum))
			delta_bias_c = zeros((1,self.cLyNum))
			for n in range(self.pLyNum):
				count = 0
				for m in range(self.fLyNum):
					count = count + delta_layer_f1_temp[:,:,m] * self.weight_f[n,m]
				delta_layer_p[:,:,n] = count
				#print shape(np.kron(delta_layer_p[:,:,n], ones((2,2))/4))
				delta_layer_c[:,:,n] = np.kron(delta_layer_p[:,:,n], ones((2,2))/4) \
									  * (1 - np.tanh(state_c1[:,:,n])**2)
				delta_bias_c[:,n] = delta_layer_c[:,:,n].sum()
			# 2
			self.cLyBias = self.cLyBias - self.yita * delta_bias_c
			#更新 kernel_c1
			delta_kernel_c1_temp = zeros(shape(self.kernel_c))
			for n in range(self.cLyNum):
				temp = delta_layer_c[:,:,n]
				r1 = map(list,zip(*temp[::1]))#逆时针旋转90度			
				r2 = map(list,zip(*r1[::1]))#再逆时针旋转90度
				temp = signal.convolve2d(train_data, r2,'valid')
				temp1 = map(list,zip(*temp[::1]))
				delta_kernel_c1_temp[:,:,n] = map(list,zip(*temp1[::1]))
			self.kernel_c = self.kernel_c - self.yita * delta_kernel_c1_temp							  					
			self.weight_f = weight_f1_temp
			self.kernel_f = kernel_f_temp
			self.weight_output = weight_output_temp				
			
		# predict
	def cnn_predict(self,data):
		return

这是单独解析MNIST的脚本,analysisMNIST.py，修改相应的路径后，运行能成功

#!/usr/bin/env python
# -*- coding: utf-8 -*-


from PIL import Image
import struct


def read_image(filename):
  f = open(filename, 'rb')


  index = 0
  buf = f.read()


  f.close()


  magic, images, rows, columns = struct.unpack_from('>IIII' , buf , index)
  index += struct.calcsize('>IIII')


  for i in xrange(images):
  #for i in xrange(2000):
    image = Image.new('L', (columns, rows))


    for x in xrange(rows):
      for y in xrange(columns):
        image.putpixel((y, x), int(struct.unpack_from('>B', buf, index)[0]))
        index += struct.calcsize('>B')


    print 'save ' + str(i) + 'image'
    image.save('/media/autumn/Work/data/MNIST/mnist-png/' + str(i) + '.png')


def read_label(filename, saveFilename):
  f = open(filename, 'rb')
  index = 0
  buf = f.read()


  f.close()


  magic, labels = struct.unpack_from('>II' , buf , index)
  index += struct.calcsize('>II')
  
  labelArr = [0] * labels
  #labelArr = [0] * 2000


  for x in xrange(labels):
  #for x in xrange(2000):
    labelArr[x] = int(struct.unpack_from('>B', buf, index)[0])
    index += struct.calcsize('>B')


  save = open(saveFilename, 'w')


  save.write(','.join(map(lambda x: str(x), labelArr)))
  save.write('\n')


  save.close()
  print 'save labels success'


if __name__ == '__main__':
  read_image('/media/autumn/Work/data/MNIST/mnist/t10k-images.idx3-ubyte')
  read_label('/media/autumn/Work/data/MNIST/mnist/t10k-labels.idx1-ubyte', '/media/autumn/Work/data/MNIST/mnist-png/label.txt')

最后：如果您想直接跑程序，您可以通过以下方式获取我的数据和源程序。由于考虑到个人的人工成本，我形式上只收取2块钱的人工费，既是对我的支持，也是对我的鼓励。谢谢大家的理解。把订单后面6位号码发送给我，我把源码和数据给您呈上。谢谢。

1：扫如下支付宝或微信二维码,支付2元

2：把支付单号的后6位，以邮件发送到我的邮箱[email protected]

3：您也可以在下方留言，把订单号写上来，我会核实。

谢谢大家。

不用框架，python实现卷积神经网络

猜你喜欢