In this post, we only use numpy to build a simple, hidden layer, neural network output layer contains the input layer, we choose sigmoid as activation function, select the mean square error loss function, and finally use mnist data sets training and test.
1, formula derivation
Forward propagation:
\ [^ Z ^ W is L = L-La. 1} ^ {+ A ^ B ^ \\ L = L \ Sigma (Z ^ L) \]
back-propagation procedure:
\ [\ FRAC {\ partial J} {\ partial W ^ l} = \ delta ^ l (a ^ {l-1}) ^ T \\ \ frac {\ partial J} {\ partial b ^ l} = \ delta ^ l \\ \ delta ^ l = [(W ^ {l + 1}) ^ T \ delta ^ {l + 1}] \ odot \ sigma '(z ^ l) \\ \ delta ^ L = (a ^ Ly) \ odot \ sigma '(z ^ L) \ ]
2, some utility functions
Comprising a sigmoid activation function and its first derivative, and the label will be a function of one-hot encoded as follows:
# 标签one-hot处理
def onehot(targets, num):
result = np.zeros((num, 10))
for i in range(num):
result[i][targets[i]] = 1
return result
# sigmoid
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# sigmoid的一阶导数
def Dsigmoid(x):
return sigmoid(x)*(1-sigmoid(x))
3, neural network realization
And d1 self.d2 code representative of formula \ (\ Delta \) , the other variables are named according to the formula directly, as follows:
class NN(object):
def __init__(self, l0, l1, l2):
self.lr = 0.1 # 学习率
self.W1 = np.random.randn(l0, l1) * 0.01 # 初始化
self.b1 = np.random.randn(l1) * 0.01
self.W2 = np.random.randn(l1, l2) * 0.01
self.b2 = np.random.randn(l2) * 0.01
# 前向传播
def forward(self, X, y):
self.X = X # m x 784
self.z1 = np.dot(X, self.W1) + self.b1 # m x 500, 500是中间层层数
self.a1 = sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.W2) + self.b2 # m x 10
self.a2 = sigmoid(self.z2)
loss = np.sum((self.a2 - y) * (self.a2 - y)) / 2 # 均方差
self.d2 = (self.a2 - y) * Dsigmoid(self.z2) # m x 10 , 用于误差反向传播
return loss, self.a2
# 反向传播
def backward(self):
dW2 = np.dot(self.a1.T, self.d2) / 3 # 500 x 10, batchsize=3
db2 = np.sum(self.d2, axis=0) / 3 # 10
d1 = np.dot(self.d2, self.W2.T) * Dsigmoid(self.z1) # m x 500, 用于误差反向传播
dW1 = np.dot(self.X.T, d1) / 3 # 784x 500
db1 = np.sum(d1, axis=0) / 3 # 500
self.W2 -= self.lr * dW2
self.b2 -= self.lr * db2
self.W1 -= self.lr * dW1
self.b1 -= self.lr * db1
4, training and testing
We used the direct integration of mnist torchvision data set, saved to a file in the weight parameters after training, and then read from the file parameter weights during testing, and finally we tested the accuracy rate of 96.48%.
def train():
nn = NN(784, 500, 10)
for epoch in range(10):
for i in range(0, 60000, 3):
X = train_data.data[i:i + 3]
y = train_data.targets[i:i + 3]
loss, _ = nn.forward(X, y)
print("Epoch:", epoch, "-", i, ":", "{:.3f}".format(loss))
nn.backward()
np.savez("data.npz", w1=nn.W1, b1=nn.b1, w2=nn.W2, b2=nn.b2)
def test():
r = np.load("data.npz")
nn = NN(784, 500, 10)
nn.W1 = r["w1"]
nn.b1 = r["b1"]
nn.W2 = r["w2"]
nn.b2 = r["b2"]
_, result = nn.forward(test_data.data, test_data.targets2)
result = np.argmax(result, axis=1)
precison = np.sum(result==test_data.targets) / 10000
print("Precison:", precison)
# Mnist手写数字集
train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, download=False)
test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
train_data.data = train_data.data.numpy() # [60000,28,28]
train_data.targets = train_data.targets.numpy() # [60000]
test_data.data = test_data.data.numpy() # [10000,28,28]
test_data.targets = test_data.targets.numpy() # [10000]
# 输入向量处理
train_data.data = train_data.data.reshape(60000, 28 * 28) / 255. # (60000, 784)
test_data.data = test_data.data.reshape(10000, 28 * 28) / 255.
# 标签one-hot处理
train_data.targets = onehot(train_data.targets, 60000) # (60000, 10)
test_data.targets2 = onehot(test_data.targets, 10000) # 用于前向传播
train()
#test()
5, complete code
import torchvision
import numpy as np
# 标签one-hot处理
def onehot(targets, num):
result = np.zeros((num, 10))
for i in range(num):
result[i][targets[i]] = 1
return result
# sigmoid
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# sigmoid的一阶导数
def Dsigmoid(x):
return sigmoid(x)*(1-sigmoid(x))
class NN(object):
def __init__(self, l0, l1, l2):
self.lr = 0.1 # 学习率
self.W1 = np.random.randn(l0, l1) * 0.01 # 初始化
self.b1 = np.random.randn(l1) * 0.01
self.W2 = np.random.randn(l1, l2) * 0.01
self.b2 = np.random.randn(l2) * 0.01
# 前向传播
def forward(self, X, y):
self.X = X # m x 784
self.z1 = np.dot(X, self.W1) + self.b1 # m x 500, 等于中间层层数
self.a1 = sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.W2) + self.b2 # m x 10
self.a2 = sigmoid(self.z2)
loss = np.sum((self.a2 - y) * (self.a2 - y)) / 2 # 均方差
self.d2 = (self.a2 - y) * Dsigmoid(self.z2) # m x 10 , 用于反向传播
return loss, self.a2
# 反向传播
def backward(self):
dW2 = np.dot(self.a1.T, self.d2) / 3 # 500 x 10, batchsize=3
db2 = np.sum(self.d2, axis=0) / 3 # 10
d1 = np.dot(self.d2, self.W2.T) * Dsigmoid(self.z1) # m x 500, 用于反向传播
dW1 = np.dot(self.X.T, d1) / 3 # 784x 500
db1 = np.sum(d1, axis=0) / 3 # 500
self.W2 -= self.lr * dW2
self.b2 -= self.lr * db2
self.W1 -= self.lr * dW1
self.b1 -= self.lr * db1
def train():
nn = NN(784, 500, 10)
for epoch in range(10):
for i in range(0, 60000, 3):
X = train_data.data[i:i + 3]
y = train_data.targets[i:i + 3]
loss, _ = nn.forward(X, y)
print("Epoch:", epoch, "-", i, ":", "{:.3f}".format(loss))
nn.backward()
np.savez("data.npz", w1=nn.W1, b1=nn.b1, w2=nn.W2, b2=nn.b2)
def test():
r = np.load("data.npz")
nn = NN(784, 500, 10)
nn.W1 = r["w1"]
nn.b1 = r["b1"]
nn.W2 = r["w2"]
nn.b2 = r["b2"]
_, result = nn.forward(test_data.data, test_data.targets2)
result = np.argmax(result, axis=1)
precison = np.sum(result==test_data.targets) / 10000
print("Precison:", precison)
if __name__ == '__main__':
# Mnist手写数字集
train_data = torchvision.datasets.MNIST(root='./mnist/', train=True, download=False)
test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
train_data.data = train_data.data.numpy() # [60000,28,28]
train_data.targets = train_data.targets.numpy() # [60000]
test_data.data = test_data.data.numpy() # [10000,28,28]
test_data.targets = test_data.targets.numpy() # [10000]
# 输入向量处理
train_data.data = train_data.data.reshape(60000, 28 * 28) / 255. # (60000, 784)
test_data.data = test_data.data.reshape(10000, 28 * 28) / 255.
# 标签one-hot处理
train_data.targets = onehot(train_data.targets, 60000) # (60000, 10)
test_data.targets2 = onehot(test_data.targets, 10000) # 用于前向传播
train()
#test()