轻松学习神经网络3:构建前向传播和反向传播预测结果

coding: utf-8

正向传播和反向传播的测试

import numpy as np

trace = False
trace_forward = False

1、构建正向传播和方向传播的类

class FC:
”’
This class is not thread safe.
”’
#初始化输入和输出的维度,初始化w、b和lr值
def init(self, in_num, out_num, lr=0.1):
self._in_num = in_num
self._out_num = out_num
self.w = np.random.randn(in_num, out_num)
#self.w = np.ones((in_num, out_num))
self.b = np.zeros((out_num, 1))
self.lr = lr

#构建sigmoid函数
def _sigmoid(self, in_data):
    return 1 / (1 + np.exp(-in_data))
    #return in_data

#实现前向传播
def forward(self, in_data):
    self.topVal = self._sigmoid(np.dot(self.w.T, in_data) + self.b)
    if trace_forward:
        print('=== topVal {0} ==='.format(self.topVal.shape))
        print(self.topVal)
    self.bottomVal = in_data
    return self.topVal

#实现反向传播
def backward(self, loss):
    residual_z = loss * self.topVal * (1 - self.topVal)#计算residual_z的值
    grad_w = np.dot(self.bottomVal, residual_z.T)
    grad_b = np.sum(residual_z)
    self.w -= self.lr * grad_w
    self.b -= self.lr * grad_b
    residual_x = np.dot(self.w, residual_z)
    if trace:
        print('=== z {0}==='.format(residual_z.shape))
        print(residual_z)
        print('=== grad_w {0}==='.format(grad_w.shape))
        print(grad_w)
        print('=== grad_b {0}==='.format(grad_b.shape))
        print(grad_b)
        print('=== self.w {0}==='.format(self.w.shape))
        print(self.w)
        print('=== self.b {0} ==='.format(self.b.shape))
        print(self.b)
        print('=== residual {0} ==='.format(residual_x.shape))
        print(residual_x)
    return residual_x

2、构建损失函数类

class SquareLoss:
”’
Same as above, not thread safe
”’
#损失函数的前向传播
def forward(self, y, t):
self.loss = y - t
if trace:
print(‘=== Loss ===’.format(self.loss.shape))
print(self.loss)
#num1=np.sum(self.loss * self.loss)
#num2=self.loss.shape[1]
#num3=num1/num2/2
return np.sum(self.loss * self.loss) / self.loss.shape[1] / 2

#后向传播
def backward(self):
    if trace:
        print('=== loss {0} ==='.format(self.loss.shape))
        print(self.loss)
    return self.loss

3、构建神经网络

class Net:
def init(self, input_num=2, hidden_num=4, out_num=1, lr=0.1):
self.fc1 = FC(input_num, hidden_num, lr)#类的嵌套
self.fc2 = FC(hidden_num, out_num, lr)
self.loss = SquareLoss()
#print(“input_num:”,input_num)
#print(“hidden_num:”,hidden_num)
#print(“out_num:”,out_num)
#print(“lr:”,lr)
#print(“self.loss:”,self.loss)

#训练神经网络,迭代10000实现前向传播和反向传播
def train(self, X, y):  # X are arranged by col
    for i in range(10000):
        # forward step
        layer1out = self.fc1.forward(X)
        layer2out = self.fc2.forward(layer1out)
        loss = self.loss.forward(layer2out, y)
        """
        第一次处理过程可以描述如下:sigmod我设置为原值输出
        1、第一层X(已经转置)的初值为[[0.05][0.1]]是2行1列的矩阵;
           Y(已经转置)的初值为[[0.01][0.99]]也是2行1列的矩阵;
        作为输入层的两个元素记为A,B,权值W1为[0.15, 0.25], [0.2, 0.3];
        偏移量b1为[[0.35], [0.35]],第二层隐含层有两个元素记为C,D;
        经过第一次layer1out=fc1.forward(X)处理,self._sigmoid(np.dot(self.w.T, in_data)
        W1转置后的值为[0.15,0.2],[0.25,0.3]在计算如下:
            C=W11*X11+b11=0.15*0.05+0.2*0.1+0.35=0.3775
            D=W12*X12+b12=0.25*0.05+0.3*0.1+0.35=0.3925
        2、第二层的两个元素为C,D,权值W2为[[0.4, 0.5], [0.45, 0.55]],
        self._sigmoid(np.dot(self.w.T, in_data),W2转置后的值为[[0.4,0.45],[0.5,0.55]]
        偏移量b2为[[0.6], [0.6]]
        第三层是输出层记为E,F,经过layer2out = fc2.forward(layer1out)处理后,计算如下:
            E=W21*X21+b21=0.4*0.3775+0.45*0.3925+0.6=0.927625
            F=W22*X22+b22=0.5*0.3775+0.55*0.3925+0.6=1.004625
        3、计算loss,通过loss.forward(layer2out, y),传值给forward(self, y, t),
        即y=layer2out[0.927625,1.004625],t=y=[[0.01][0.99]],则loss=y-t=[[0.917625][0.014625]]
        而最终np.sum(self.loss * self.loss) / self.loss.shape[1] / 2结果self.loss*self.loss=0.84224953125
        self.loss.shape[1]为,最终结果为0.4214456015624
        """
        if i % 1000 == 0:
            print('iter = {0}, loss ={1}'.format(i, loss))
            print('=== Label vs Prediction ===')
            print('t={0}'.format(y))
            print('y={0}'.format(layer2out))
        # backward step
        layer2loss = self.loss.backward()#layer2loss的值为2*1,[[0.917625][0.014625]]
        layer1loss = self.fc2.backward(layer2loss)#layer1loss值为2*1,[[0.02389223][0.02694069]]
        saliency = self.fc1.backward(layer1loss)#saliency值为2*1,[0.00244632,0.00304642]
        """
        执行过程为,从第三层向前推,先计算residual_z,计算grad_w和grad_b,更新第二层的w,b,更新的幅度为0.1
        在继续向前传播
        """
    layer1out = self.fc1.forward(X)#layer1out值为[[0.52307341][0.53805742]]
    layer2out = self.fc2.forward(layer1out)#layer2out值为[[0.01][0.99]]
    print('=== Final ===')
    print('X={0}'.format(X))
    print('t={0}'.format(y))
    print('y={0}'.format(layer2out))

4、初始化第一层是2层,隐藏层为2层, 输出层是2,梯度是0.5来测试网络

w1,b1,w2,b2的用初始化的值处理

example from https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/

X = np.array([[0.05, 0.1]]).T
y = np.array([[0.01, 0.99]]).T

net = Net(2, 2, 2, 0.5)
net.fc1.w = np.array([[0.15, 0.25], [0.2, 0.3]])
net.fc1.b = np.array([[0.35], [0.35]])
net.fc2.w = np.array([[0.4, 0.5], [0.45, 0.55]])
net.fc2.b = np.array([[0.6], [0.6]])
net.train(X, y)

5、以下是输入2层,隐藏2层,输出2层的神经网络的处理过程

“””
以上是输入神经网络的输入2层,隐藏层2层,输出层为2,梯度为0.5
input_num: 2
hidden_num: 2
out_num: 2
lr: 0.5

iter = 0, loss =0.2983711087600027
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.75136507][ 0.77292847]]

iter = 1000, loss =0.0003463797126897323
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.02873778][ 0.97151609]]

iter = 2000, loss =0.00011952921590301041
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.0210022 ][ 0.97913675]]

iter = 3000, loss =6.0258461253032044e-05
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.01781127][ 0.98228631]]

iter = 4000, loss =3.549686832171309e-05
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.01599555][ 0.98407994]]

iter = 5000, loss =2.2784906266629515e-05
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.01480398][ 0.98525747]]

iter = 6000, loss =1.5445159008963103e-05
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.01395575][ 0.98609585]]

iter = 7000, loss =1.0872148832092369e-05
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.01331933][ 0.98672489]]

iter = 8000, loss =7.86715065902372e-06
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.01282401][ 0.98721445]]

iter = 9000, loss =5.8135203632353775e-06
=== Label vs Prediction ===
t=[[ 0.01][ 0.99]]
y=[[ 0.01242797][ 0.98760584]]

=== Final ===
X=[[ 0.05][ 0.1 ]]
t=[[ 0.01][ 0.99]]
y=[[ 0.01210474][ 0.98792522]]
“”“

6、构建一个输出2层,隐藏层4层,输出层1层,梯度为0.1

and operation

X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
y = np.array([[0], [0], [0], [1]]).T

net = Net(2, 4, 1, 0.1)
net.train(X, y)

7、以下是输入2层,隐藏4层,输出1层神经网络的输出结果:

“””
以上是输入神经网络的输入2层,隐藏层4层,输出层为1,梯度为0.1
input_num: 2
hidden_num: 4
out_num: 1
lr: 0.1
self.loss: <main.SquareLoss object at 0x0000019AFED6AA90>

iter = 0, loss =0.11370495173847857
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.44256396 0.40601667 0.43202629 0.39810258]]

iter = 1000, loss =0.019908498658312943
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.01999465 0.18705608 0.17795415 0.6963381 ]]

iter = 2000, loss =0.004393130212643646
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00321885 0.08785577 0.08515639 0.85799847]]

iter = 3000, loss =0.0019684667494827383
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.0016368 0.05869203 0.05678466 0.904733 ]]

iter = 4000, loss =0.0011840825820895176
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.00113453 0.04538541 0.04395886 0.92597867]]

iter = 5000, loss =0.0008219418220594369
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 8.91038477e-04 3.77127131e-02 3.65884739e-02 9.38244227e-01]]

iter = 6000, loss =0.0006196337637206246
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 7.46561558e-04 3.26706508e-02 3.17487793e-02 9.46323596e-01]]

iter = 7000, loss =0.000492583134778812
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 6.50227836e-04 2.90741376e-02 2.82955985e-02 9.52101191e-01]]

iter = 8000, loss =0.00040625946286834886
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 5.80963111e-04 2.63613412e-02 2.56890084e-02 9.56469634e-01]]

iter = 9000, loss =0.0003442038001962817
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 5.28476065e-04 2.42308265e-02 2.36400522e-02 9.59908030e-01]]

=== Final ===
X=[[0 0 1 1][0 1 0 1]]
t=[[0 0 0 1]]
y=[[ 4.87139729e-04 2.25058438e-02 2.19795167e-02 9.62697572e-01]]
“”“

猜你喜欢

转载自blog.csdn.net/junchengberry/article/details/80873309
今日推荐