Neural networks realize Basics with numpy

In this post, we only use numpy to build a simple, hidden layer, neural network output layer contains the input layer, we choose sigmoid as activation function, select the mean square error loss function, and finally use mnist data sets training and test.

1, formula derivation

Forward propagation:
\ [^ Z ^ W is L = L-La. 1} ^ {+ A ^ B ^ \\ L = L \ Sigma (Z ^ L) \]
back-propagation procedure:
\ [\ FRAC {\ partial J} {\ partial W ^ l} = \ delta ^ l (a ^ {l-1}) ^ T \\ \ frac {\ partial J} {\ partial b ^ l} = \ delta ^ l \\ \ delta ^ l = [(W ^ {l + 1}) ^ T \ delta ^ {l + 1}] \ odot \ sigma '(z ^ l) \\ \ delta ^ L = (a ^ Ly) \ odot \ sigma '(z ^ L) \ ]

2, some utility functions

Comprising a sigmoid activation function and its first derivative, and the label will be a function of one-hot encoded as follows:

# 标签one-hot处理
def onehot(targets, num):
    result = np.zeros((num, 10))
    for i in range(num):
        result[i][targets[i]] = 1
    return result

# sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# sigmoid的一阶导数
def Dsigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))

3, neural network realization

And d1 self.d2 code representative of formula \ (\ Delta \) , the other variables are named according to the formula directly, as follows:

class NN(object):
    def __init__(self, l0, l1, l2):
        self.lr = 0.1                                        # 学习率
        self.W1 = np.random.randn(l0, l1) * 0.01             # 初始化
        self.b1 = np.random.randn(l1) * 0.01
        self.W2 = np.random.randn(l1, l2) * 0.01
        self.b2 = np.random.randn(l2) * 0.01

    # 前向传播
    def forward(self, X, y):
        self.X = X                                           # m x 784
        self.z1 = np.dot(X, self.W1) + self.b1               # m x 500, 500是中间层层数
        self.a1 = sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2         # m x 10
        self.a2 = sigmoid(self.z2)
        loss = np.sum((self.a2 - y) * (self.a2 - y)) / 2     # 均方差
        self.d2 = (self.a2 - y) * Dsigmoid(self.z2)          # m x 10 , 用于误差反向传播
        return loss, self.a2

    # 反向传播
    def backward(self):
        dW2 = np.dot(self.a1.T, self.d2) / 3                  # 500 x 10, batchsize=3
        db2 = np.sum(self.d2, axis=0) / 3                     # 10
        d1 = np.dot(self.d2, self.W2.T) * Dsigmoid(self.z1)   # m x 500, 用于误差反向传播
        dW1 = np.dot(self.X.T, d1) / 3                        # 784x 500
        db1 = np.sum(d1, axis=0) / 3                          # 500

        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

4, training and testing

We used the direct integration of mnist torchvision data set, saved to a file in the weight parameters after training, and then read from the file parameter weights during testing, and finally we tested the accuracy rate of 96.48%.

def train():
    nn = NN(784, 500, 10)

    for epoch in range(10):
        for i in range(0, 60000, 3):
            X = train_data.data[i:i + 3]
            y = train_data.targets[i:i + 3]
            loss, _ = nn.forward(X, y)
            print("Epoch:", epoch, "-", i, ":", "{:.3f}".format(loss))
            nn.backward()
        np.savez("data.npz", w1=nn.W1, b1=nn.b1, w2=nn.W2, b2=nn.b2)

def test():
    r = np.load("data.npz")
    nn = NN(784, 500, 10)
    nn.W1 = r["w1"]
    nn.b1 = r["b1"]
    nn.W2 = r["w2"]
    nn.b2 = r["b2"]
    _, result = nn.forward(test_data.data, test_data.targets2)
    result = np.argmax(result, axis=1)
    precison = np.sum(result==test_data.targets) / 10000
    print("Precison:", precison)

# Mnist手写数字集
train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, download=False)
test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
train_data.data = train_data.data.numpy()         # [60000,28,28]
train_data.targets = train_data.targets.numpy()   # [60000]
test_data.data = test_data.data.numpy()           # [10000,28,28]
test_data.targets = test_data.targets.numpy()     # [10000]

# 输入向量处理
train_data.data = train_data.data.reshape(60000, 28 * 28) / 255.  # (60000, 784)
test_data.data = test_data.data.reshape(10000, 28 * 28) / 255.

# 标签one-hot处理
train_data.targets = onehot(train_data.targets, 60000) # (60000, 10)
test_data.targets2 = onehot(test_data.targets, 10000)  # 用于前向传播

train()
#test()

5, complete code

import torchvision
import numpy as np

# 标签one-hot处理
def onehot(targets, num):
    result = np.zeros((num, 10))
    for i in range(num):
        result[i][targets[i]] = 1
    return result

# sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# sigmoid的一阶导数
def Dsigmoid(x):
    return sigmoid(x)*(1-sigmoid(x))


class NN(object):
    def __init__(self, l0, l1, l2):
        self.lr = 0.1                                        # 学习率
        self.W1 = np.random.randn(l0, l1) * 0.01             # 初始化
        self.b1 = np.random.randn(l1) * 0.01
        self.W2 = np.random.randn(l1, l2) * 0.01
        self.b2 = np.random.randn(l2) * 0.01

    # 前向传播
    def forward(self, X, y):
        self.X = X                                           # m x 784
        self.z1 = np.dot(X, self.W1) + self.b1               # m x 500, 等于中间层层数
        self.a1 = sigmoid(self.z1)
        self.z2 = np.dot(self.a1, self.W2) + self.b2         # m x 10
        self.a2 = sigmoid(self.z2)
        loss = np.sum((self.a2 - y) * (self.a2 - y)) / 2     # 均方差
        self.d2 = (self.a2 - y) * Dsigmoid(self.z2)          # m x 10 , 用于反向传播
        return loss, self.a2

    # 反向传播
    def backward(self):
        dW2 = np.dot(self.a1.T, self.d2) / 3                  # 500 x 10, batchsize=3
        db2 = np.sum(self.d2, axis=0) / 3                     # 10
        d1 = np.dot(self.d2, self.W2.T) * Dsigmoid(self.z1)   # m x 500, 用于反向传播
        dW1 = np.dot(self.X.T, d1) / 3                        # 784x 500
        db1 = np.sum(d1, axis=0) / 3                          # 500

        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1


def train():
    nn = NN(784, 500, 10)

    for epoch in range(10):
        for i in range(0, 60000, 3):
            X = train_data.data[i:i + 3]
            y = train_data.targets[i:i + 3]
            loss, _ = nn.forward(X, y)
            print("Epoch:", epoch, "-", i, ":", "{:.3f}".format(loss))
            nn.backward()
        np.savez("data.npz", w1=nn.W1, b1=nn.b1, w2=nn.W2, b2=nn.b2)

def test():
    r = np.load("data.npz")
    nn = NN(784, 500, 10)
    nn.W1 = r["w1"]
    nn.b1 = r["b1"]
    nn.W2 = r["w2"]
    nn.b2 = r["b2"]
    _, result = nn.forward(test_data.data, test_data.targets2)
    result = np.argmax(result, axis=1)
    precison = np.sum(result==test_data.targets) / 10000
    print("Precison:", precison)

if __name__ == '__main__':

    # Mnist手写数字集
    train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, download=False)
    test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
    train_data.data = train_data.data.numpy()         # [60000,28,28]
    train_data.targets = train_data.targets.numpy()   # [60000]
    test_data.data = test_data.data.numpy()           # [10000,28,28]
    test_data.targets = test_data.targets.numpy()     # [10000]

    # 输入向量处理
    train_data.data = train_data.data.reshape(60000, 28 * 28) / 255.  # (60000, 784)
    test_data.data = test_data.data.reshape(10000, 28 * 28) / 255.

    # 标签one-hot处理
    train_data.targets = onehot(train_data.targets, 60000) # (60000, 10)
    test_data.targets2 = onehot(test_data.targets, 10000)  # 用于前向传播

    train()
    #test()

Guess you like

Origin www.cnblogs.com/qxcheng/p/11267072.html