BP神经网络的 python实现

BP神经网络包括前向传播和反向传播两个部分,反向传播的推导是BP神经网络的核心。具体参考https://blog.csdn.net/weixin_40446651/article/details/81516944

本文将描述如何使用python编写一个简单的2层BP神经网络,完成一个二分类问题。给出一系列的二维坐标点,要求以y=0.2x^{^{2}}为界,将这些点分为两类。

首先是训练数据的创建,在[0,10]的区间内,以0.5为步长取20个点,作为输入的横坐标x。对应每个x,在[0,20]的区间上随机取值,作为与x对应的y值。并以y=0.2x^{^{2}}为界,给二维坐标点(x,y)分类。在边界以上的 label=1,在边界以下的label=0。并将数据写入txt文件中:

# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 18:25:06 2018

@author: LongJun
"""
import os
import numpy as np
import random
root_dir = os.path.abspath('.') #得到绝对路径
filename = 'train' + '.txt'    #训练数据集储存路径
filenmae = os.path.join(root_dir, filename)
X = []
Y = []
label = []
List = [x/2 for x in range (20)]
for x in List:
    X.append (x)
    y = random.randint(0,20)
    Y.append (y)
    if y> 0.2*x*x:
        label.append(1)
    else :
        label.append(0)
        
with open(filename,"a") as f:
    f.writelines(str(X) + "\n")  #第一行储存着 X1的列表 
    f.writelines(str(Y) + "\n")  #第二行储存着 Y的列表
    f.writelines(str(label) + "\n")

BP神经网络的实现:

# -*- coding: utf-8 -*-
"""
Created on Fri Sep 28 18:44:52 2018

@author: LongJun
"""

import os
import numpy as np
import random
import pickle
lr = 0.01
def read_data (filename):
    with open(filename, "r") as f:
        train_datas = f.readlines() #f.readlies 返回的是一个list
        train_datas=[train_data.strip() for train_data in train_datas] #strip()用法
        X1 = [float(data) for data in train_datas[0].split(',')] #split元素 并且将data return成float型
        X2 = [float(data) for data in train_datas[1].split(',')]
        Y = [float(data) for data in train_datas[2].split(',')]
        X= np.array([X1,X2])  #形成矩阵 [2,None的形式]
        Y= np.array([Y])
    return X, Y

def network_init():
    W1= np.random.randn(10,2)*0.01   #np.random.randn 随机初始化 乘以0.01避免梯度爆炸
    b1= np.zeros((10,1))   #b1 的shape要和W1匹配
    W2= np.random.randn(1,10)*0.01
    b2= 0
    return W1,b1,W2,b2

def relu(x):
    return np.maximum(x,0.0)  #返回对应位置的最大值

def sigmod(x):
    return 1.0/(1.0+np.exp(-x))

def d_sigmod(x):
    return np.exp(-x)/(1.0+np.exp(-x))/(1.0+np.exp(-x))

def d_relu(x):
    (len_x,len_y)=x.shape
    d_x = np.zeros([len_x,len_y])
    [inds_x,inds_y]=np.where(x>=0)   #np.where和 np.zeros()的结合应用 找下表的方式
    #print (inds_x)
   # print (np.where(x>=0))
    inds = np.stack((inds_x,inds_y), axis=-1) 
    #print( inds)
    for ind in inds:
        d_x[ind[0],ind[1]] = 1
    return d_x

def forward_propagation(W1,b1,W2,b2,X):
    Z1 = np.dot(W1,X)+b1   #矩阵乘法使用 np.dot
    A1 = relu(Z1)  #relu 激活函数
    Z2 = np.dot(W2,A1)+b2 
    A2 = sigmod(Z2) #sigmod激活函数
    forward_cache={'Z1':Z1, 'A1':A1, 'Z2':Z2, 'A2':A2}
    return forward_cache

def get_loss(Y,Y_hat):
    m = Y.shape[1]
    loss = -Y*np.log10(Y_hat)-(1-Y)*np.log10((1-Y_hat)) #注意损失函数的格式 W2T*dZ2*d_relu(Z1) 
    return np.sum(loss)/m

def back_propagation(Y_hat,Y,W1,W2,Z1):
    dZ2 = Y_hat- Y  #反向传播
    W2T = np.transpose(W2)
   # print (W2.T.shape, dZ2.shape, Z1.shape)
    dZ1 = np.dot(W2T, dZ2)* d_relu(Z1)   #dl = w(l+1)T*d(l+1)*d_relu(Z1)
    back_cache = {'dZ2':dZ2, 'dZ1':dZ1}
    return back_cache #返回back_cache

def grdient_get(X,A1,dZ1,dZ2):
    m = A1.shape[1]
    A1T = np.transpose(A1)
   # print (A1T.shape)
   # print (dZ2.shape)
    XT = np.transpose(X)
    dW2 = np.dot(dZ2,A1T)/m
    db2 = np.sum(dZ2, axis=1, keepdims= True)/m #对行求和
    dW1 = np.dot(dZ1,XT)/m
    db1 = np.sum(dZ2, axis=1, keepdims= True)/m
    grdient = {'dW2':dW2, 'dW1':dW1, 'db1': db1, 'db2':db2}
    return grdient
def prosscess(X, Y, W1, b1, W2, b2):
    grdient = {}   #初始化梯度dict
    forward_cache = forward_propagation(W1,b1,W2,b2,X) #正向传播
    back_cache = back_propagation(forward_cache['A2'],Y,W1,W2,forward_cache['Z1']) #反向传播
    grdient = grdient_get(X, forward_cache['A1'], back_cache['dZ1'], back_cache['dZ2']) #计算梯度
    loss = get_loss(Y, forward_cache['A2']) #计算loss
    return grdient, loss
if __name__=='__main__':
    cache = {}
    root_dir = os.path.abspath('.')
    filename = 'train' + '.txt'
    filenmae = os.path.join(root_dir, filename)
    X,Y = read_data(filename) #首先读取训练数据
    W1,b1,W2,b2=network_init()  #初始化参数矩阵
   # d_relu(forward_cache['A2'])
   # back_cache = back_propagation(forward_cache['A2'],Y,W1,W2,forward_cache['Z1'])
   # grdient = grdient(X, forward_cache['A1'], back_cache['dZ1'], back_cache['dZ2'])
    for iter in range (500):   #开始训练
        grdient, loss = prosscess(X, Y, W1, b1, W2, b2) #返回梯度和loss
        W1 = W1-lr*grdient['dW1'] #梯度更新
        W2 = W2-lr*grdient['dW2']
        b2 = b2-lr*grdient['db2']
        b1 = b1-lr*grdient['db1']
        print ("The {:d} iter loss is {:2f}".format(iter, loss))
        cache = {'W1':W1, 'W2':W2, 'b1':b1, 'b2':b2}
    filename = 'data' + '.pkl' #将训练好的模型存入.pkl文件种
    filenmae = os.path.join(root_dir, filename)
    with open(filename,"wb") as f:
        pickle.dump(W1,f)
        pickle.dump(W2,f)
        pickle.dump(b2,f)
        pickle.dump(b1,f)
    with open(filename,"rb") as r:
        W1 = pickle.load(r)
        W2 = pickle.load(r)
        b2 = pickle.load(r)
        b1 = pickle.load(r)
    x_val = np.array([3,10])#验证集
    x_val = x_val.reshape([2,1])
    y_hat=forward_propagation(W1,b1,W2,b2,x_val) 
    print (y_hat['A2'])    

猜你喜欢

转载自blog.csdn.net/weixin_40446651/article/details/82963663