BP神经网络包括前向传播和反向传播两个部分,反向传播的推导是BP神经网络的核心。具体参考https://blog.csdn.net/weixin_40446651/article/details/81516944
本文将描述如何使用python编写一个简单的2层BP神经网络,完成一个二分类问题。给出一系列的二维坐标点,要求以为界,将这些点分为两类。
首先是训练数据的创建,在[0,10]的区间内,以0.5为步长取20个点,作为输入的横坐标x。对应每个x,在[0,20]的区间上随机取值,作为与x对应的y值。并以为界,给二维坐标点(x,y)分类。在边界以上的 label=1,在边界以下的label=0。并将数据写入txt文件中:
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 18:25:06 2018
@author: LongJun
"""
import os
import numpy as np
import random
root_dir = os.path.abspath('.') #得到绝对路径
filename = 'train' + '.txt' #训练数据集储存路径
filenmae = os.path.join(root_dir, filename)
X = []
Y = []
label = []
List = [x/2 for x in range (20)]
for x in List:
X.append (x)
y = random.randint(0,20)
Y.append (y)
if y> 0.2*x*x:
label.append(1)
else :
label.append(0)
with open(filename,"a") as f:
f.writelines(str(X) + "\n") #第一行储存着 X1的列表
f.writelines(str(Y) + "\n") #第二行储存着 Y的列表
f.writelines(str(label) + "\n")
BP神经网络的实现:
# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 18:44:52 2018
@author: LongJun
"""
import os
import numpy as np
import random
import pickle
lr = 0.01
def read_data (filename):
with open(filename, "r") as f:
train_datas = f.readlines() #f.readlies 返回的是一个list
train_datas=[train_data.strip() for train_data in train_datas] #strip()用法
X1 = [float(data) for data in train_datas[0].split(',')] #split元素 并且将data return成float型
X2 = [float(data) for data in train_datas[1].split(',')]
Y = [float(data) for data in train_datas[2].split(',')]
X= np.array([X1,X2]) #形成矩阵 [2,None的形式]
Y= np.array([Y])
return X, Y
def network_init():
W1= np.random.randn(10,2)*0.01 #np.random.randn 随机初始化 乘以0.01避免梯度爆炸
b1= np.zeros((10,1)) #b1 的shape要和W1匹配
W2= np.random.randn(1,10)*0.01
b2= 0
return W1,b1,W2,b2
def relu(x):
return np.maximum(x,0.0) #返回对应位置的最大值
def sigmod(x):
return 1.0/(1.0+np.exp(-x))
def d_sigmod(x):
return np.exp(-x)/(1.0+np.exp(-x))/(1.0+np.exp(-x))
def d_relu(x):
(len_x,len_y)=x.shape
d_x = np.zeros([len_x,len_y])
[inds_x,inds_y]=np.where(x>=0) #np.where和 np.zeros()的结合应用 找下表的方式
#print (inds_x)
# print (np.where(x>=0))
inds = np.stack((inds_x,inds_y), axis=-1)
#print( inds)
for ind in inds:
d_x[ind[0],ind[1]] = 1
return d_x
def forward_propagation(W1,b1,W2,b2,X):
Z1 = np.dot(W1,X)+b1 #矩阵乘法使用 np.dot
A1 = relu(Z1) #relu 激活函数
Z2 = np.dot(W2,A1)+b2
A2 = sigmod(Z2) #sigmod激活函数
forward_cache={'Z1':Z1, 'A1':A1, 'Z2':Z2, 'A2':A2}
return forward_cache
def get_loss(Y,Y_hat):
m = Y.shape[1]
loss = -Y*np.log10(Y_hat)-(1-Y)*np.log10((1-Y_hat)) #注意损失函数的格式 W2T*dZ2*d_relu(Z1)
return np.sum(loss)/m
def back_propagation(Y_hat,Y,W1,W2,Z1):
dZ2 = Y_hat- Y #反向传播
W2T = np.transpose(W2)
# print (W2.T.shape, dZ2.shape, Z1.shape)
dZ1 = np.dot(W2T, dZ2)* d_relu(Z1) #dl = w(l+1)T*d(l+1)*d_relu(Z1)
back_cache = {'dZ2':dZ2, 'dZ1':dZ1}
return back_cache #返回back_cache
def grdient_get(X,A1,dZ1,dZ2):
m = A1.shape[1]
A1T = np.transpose(A1)
# print (A1T.shape)
# print (dZ2.shape)
XT = np.transpose(X)
dW2 = np.dot(dZ2,A1T)/m
db2 = np.sum(dZ2, axis=1, keepdims= True)/m #对行求和
dW1 = np.dot(dZ1,XT)/m
db1 = np.sum(dZ2, axis=1, keepdims= True)/m
grdient = {'dW2':dW2, 'dW1':dW1, 'db1': db1, 'db2':db2}
return grdient
def prosscess(X, Y, W1, b1, W2, b2):
grdient = {} #初始化梯度dict
forward_cache = forward_propagation(W1,b1,W2,b2,X) #正向传播
back_cache = back_propagation(forward_cache['A2'],Y,W1,W2,forward_cache['Z1']) #反向传播
grdient = grdient_get(X, forward_cache['A1'], back_cache['dZ1'], back_cache['dZ2']) #计算梯度
loss = get_loss(Y, forward_cache['A2']) #计算loss
return grdient, loss
if __name__=='__main__':
cache = {}
root_dir = os.path.abspath('.')
filename = 'train' + '.txt'
filenmae = os.path.join(root_dir, filename)
X,Y = read_data(filename) #首先读取训练数据
W1,b1,W2,b2=network_init() #初始化参数矩阵
# d_relu(forward_cache['A2'])
# back_cache = back_propagation(forward_cache['A2'],Y,W1,W2,forward_cache['Z1'])
# grdient = grdient(X, forward_cache['A1'], back_cache['dZ1'], back_cache['dZ2'])
for iter in range (500): #开始训练
grdient, loss = prosscess(X, Y, W1, b1, W2, b2) #返回梯度和loss
W1 = W1-lr*grdient['dW1'] #梯度更新
W2 = W2-lr*grdient['dW2']
b2 = b2-lr*grdient['db2']
b1 = b1-lr*grdient['db1']
print ("The {:d} iter loss is {:2f}".format(iter, loss))
cache = {'W1':W1, 'W2':W2, 'b1':b1, 'b2':b2}
filename = 'data' + '.pkl' #将训练好的模型存入.pkl文件种
filenmae = os.path.join(root_dir, filename)
with open(filename,"wb") as f:
pickle.dump(W1,f)
pickle.dump(W2,f)
pickle.dump(b2,f)
pickle.dump(b1,f)
with open(filename,"rb") as r:
W1 = pickle.load(r)
W2 = pickle.load(r)
b2 = pickle.load(r)
b1 = pickle.load(r)
x_val = np.array([3,10])#验证集
x_val = x_val.reshape([2,1])
y_hat=forward_propagation(W1,b1,W2,b2,x_val)
print (y_hat['A2'])