A numpy to build a neural network

This is the 13th day of my participation in the August Update Challenge. For details of the event, please check: August Update Challenge

NumPy to build my first neural network

foreword

​ Using pure numpy to realize gesture recognition, the first is the overall network composition, and then the code part is displayed. This is my first neural network.

Full code: GitHub

The network generally reflects: input layer, hidden layer, output layer. It is already known that the output layer has ten results, which is the probability of 10 numbers.insert image description here

About training set, validation set, test setinsert image description here

Gradient descent on hand calculation for the solution of insert image description herethree parametersinsert image description here

The first parameter is hand-calculated detailed processinsert image description here

code section

The activation function, and the derivative of the activation function

def tanh(x):
    return np.tanh(x)

def bypass(x):
    return x

def softmax(x):
    exp=np.exp(x-x.max())   # 为了防止指数爆炸(指数函数)
    # 所以现在就要将其中的减去最大值,变成负数,但是最后是相处,
    #结果都是一样的。
    return exp/exp.sum()

def d_softmax(data):
    sm = softmax(data)
    return np.diag(sm) - np.outer(sm, sm)

def d_tanh(data):  # 返回值返回向量
    return 1 / (np.cosh(data)) ** 2

def d_bypass(x):
    return 1

differential = {softmax: d_softmax, tanh: d_tanh,bypass:d_bypass}
d_type = {bypass:'times',softmax:'dot',tanh:'times'}
复制代码

Initialization parameters

dimensions=[28*28,100,10]  # 28*28个神经元,100个神经元是隐藏层,输出的10个数字
# 28*28个神经元先是连接到100个神经元中再连接到10个输出层中
activation =[bypass,tanh,softmax]  # bypass  两个激活函数
distribution=[   # 初始化过程
    {},# 空着
    {'b':[0,0],'w':[-math.sqrt(6/(dimensions[0]+dimensions[1])),math.sqrt(6/(dimensions[0]+dimensions[1]))]},
    {'b':[0,0],'w':[-math.sqrt(6/(dimensions[1]+dimensions[2])),math.sqrt(6/(dimensions[1]+dimensions[2]))]},
]
复制代码

initialization parameter function

def init_parameters_b(layer):  # 初始化b
    dist= distribution[layer]['b']
    return np.random.rand(dimensions[layer])*(dist[1]-dist[0])+dist[0]    

def init_parameters_w(layer):
    dist=distribution[layer]['w']
    return np.random.rand(dimensions[layer-1],dimensions[layer])*(dist[1]-dist[0])+dist[0]

def init_parameters():
    parameter=[]   # 迭代每一次的结果
    for i in range (len(distribution)):
        layer_parameter={}   # 存放每一次迭代的数据,并且一直存储起来
        for j in distribution[i].keys():
            if j=='b':
                layer_parameter['b']=init_parameters_b(i)
                continue
            if j=='w':
                layer_parameter['w']=init_parameters_w(i)
                continue
        parameter.append(layer_parameter)
    return parameter
复制代码

Initial initialization parameters (untrained)

parameters=init_parameters()
复制代码

prediction function

def predict(img,parameters):
    # 参数:图片,参数
    l_in=img
    l_out=activation[0](l_in)  # 第一层的初始化
    for layer in range(1,len(dimensions)):  
        l_in = np.dot(l_out,parameters[layer]['w'])+parameters[layer]['b']  # 不断地进行迭代
        l_out = activation[layer](l_in)
    return l_out
复制代码

The first prediction (this prediction is unstable because it uses an initialized model)

predict(train_img[0],init_parameters())

//结果:
array([0.07210171, 0.07957606, 0.13152407, 0.05420442, 0.08498909,
       0.12788144, 0.14911174, 0.14570486, 0.08225591, 0.07265069])
复制代码

training set, validation set, test set

dataset_path=Path('D:/Desktop/MNIST')

train_img_path=dataset_path/'train-images-idx3-ubyte/train-images.idx3-ubyte'
train_lab_path=dataset_path/'train-labels-idx1-ubyte/train-labels.idx1-ubyte'
test_img_path=dataset_path/'t10k-images-idx3-ubyte/t10k-images.idx3-ubyte'
test_lab_path=dataset_path/'t10k-labels-idx1-ubyte/t10k-labels.idx1-ubyte'
复制代码

separate sets

train_num = 50000  # 训练
valid_num = 10000  # 验证
test_num = 10000  # 测试

with open(train_img_path, 'rb') as f:
    struct.unpack('>4i', f.read(16))
    temp_img = np.fromfile(f, dtype=np.uint8).reshape(-1, 28 * 28) / 255
    train_img = temp_img[:train_num]  # 将训练集中的数据分了1w出去给验证
    valid_img = temp_img[train_num:]

with open(test_img_path, 'rb') as f:
    struct.unpack('>4i', f.read(16))
    test_img = np.fromfile(f, dtype=np.uint8).reshape(-1, 28 * 28) / 255

with open(train_lab_path, 'rb') as f:
    struct.unpack('>2i', f.read(8))
    temp_lab = np.fromfile(f, dtype=np.uint8)
    train_lab = temp_lab[:train_num]
    valid_lab = temp_lab[train_num:]

with open(test_lab_path, 'rb') as f:
    struct.unpack('>2i', f.read(8))
    test_lab = np.fromfile(f, dtype=np.uint8)
复制代码

Show image tag

def show_train(index):
    plt.imshow(train_img[index].reshape(28, 28), cmap='gray')
    pylab.show()
    print('label:{}'.format(train_lab[index]))


def show_valid(index):
    plt.imshow(valid_img[index].reshape(28, 28), cmap='gray')
    pylab.show()
    print('label:{}'.format(valid_lab[index]))


def show_test(index):
    plt.imshow(test_img[index].reshape(28, 28), cmap='gray')
    pylab.show()
    print('test:{}'.format(test_lab[index]))
复制代码

Use random numbers to predict outcomes

predict(np.random.rand(784),parameters)

//结果:
array([0.0942381 , 0.11644771, 0.05850607, 0.23711087, 0.02732923,
       0.0176975 , 0.19317991, 0.14196864, 0.08510021, 0.02842176])
复制代码

Verify that your derivative is written correctly, and use the definition to verify, otherwise, if the derivative is wrong, it will be in vain.

h = 0.0001
func = softmax
input_len = 4
for i in range(input_len):  # 两种求导数的方法
    # 一种是定义方法,与一种是直接求导法
    test_input = np.random.rand(input_len)
    derivative = differential[func](test_input)
    value1 = func(test_input)
    test_input[i] += h
    value2 = func(test_input)
    # print((value2 - value1) / h)
    # print(derivative[i] - (value2 - value1) / h)  # 差值

onehot = np.identity(dimensions[-1])  # 10个数字

复制代码

Gradient descent, here is to pass in reverse (in simple terms, chain derivation), in order to make the value of loss_function as small as possible, it is to make the value of y1-y0 as close as possible, so it is to make y1 closer to the true value , and then the initial value is updated once, and the update is repeated continuously.

def sqr_loss(img, lab, parameters):
    y_pred = predict(img, parameters)
    y = onehot[lab]
    diff = y - y_pred
    return np.dot(diff, diff)

def grad_parameters(img, lab, parameters):
    # 参数:图片,参数
    l_in_list=[img] # 第一个参数是0所以img+0是等于img
    l_out_list=[activation[0](l_in_list[0])]  # 第一个out也是初始化之后的out
    for layer in range(1,len(dimensions)):  
        l_in = np.dot(l_in_list[layer-1], parameters[layer]['w']) + parameters[layer]['b']
        l_out = activation[layer](l_in)
        l_in_list.append(l_in)
        l_out_list.append(l_out)
        
    d_layer =-2*(onehot[lab] - l_out_list[-1])
    grad_result=[None]*len(dimensions)
    for layer in range(len(dimensions)-1,0,-1):  # 反向传播
        if d_type[activation[layer]]=='times':
            d_layer = differential[activation[layer]](l_in_list[layer])*d_layer
        if d_type[activation[layer]]=='dot':
            d_layer = np.dot(differential[activation[layer]](l_in_list[layer]), d_layer)
        # 参数一:
        grad_result[layer]={}
        grad_result[layer]['b'] = d_layer
        grad_result[layer]['w'] = np.outer(l_out_list[layer-1], d_layer)  # 上一层的结果
        d_layer = np.dot(parameters[layer]['w'],d_layer)
    return grad_result 
复制代码

Parameter results after backpropagation

grad_parameters(train_img[0],train_lab[0],init_parameters())
复制代码

Verify that the partial derivative of backpropagation is correct

# 验证参数b
h = 0.00001  					# 验证反向传递求的对不对,求导过程。
layer=2
parameters = init_parameters()
pname='b'
for i in range(len(parameters[layer][pname])):  	# 两种求导数的方法
    # 一种是定义方法,与一种是直接求导法
    img_i = np.random.randint(train_num)  			# 随机找数字图片
    test_parameters = init_parameters()  			# 随机找
    derivative = grad_parameters(train_img[img_i], train_lab[img_i], test_parameters)[layer][pname]
    value1 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
    test_parameters[layer][pname][i] += h
    value2 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
    print(derivative[i]-(value2-value1)/h)

# 验证参数w
h = 0.00001  # 验证方向传递求的对不对,求导过程。
layer=1
parameters = init_parameters()
pname='w'
grad_list=[]
for i in range(len(parameters[layer][pname])):  # 两种求导数的方法
    for j in range(len(parameters[layer][pname][0])): 
        img_i = np.random.randint(train_num)  # 随机找数字图片
        test_parameters = init_parameters()  # 随机找
        derivative = grad_parameters(train_img[img_i], train_lab[img_i], test_parameters)[layer][pname]
        value1 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
        test_parameters[layer][pname][i][j] += h
        value2 = sqr_loss(train_img[img_i], train_lab[img_i], test_parameters)
        grad_list.append(derivative[i][j]-(value2-value1)/h)
np.abs(grad_list).max()
复制代码

loss function

def valid_loss(parameters):  # 验证集函数
    loss_accu = 0
    for img_i in range(valid_num):
        loss_accu += sqr_loss(valid_img[img_i], valid_lab[img_i], parameters)
    return loss_accu / (valid_num / 10000)  # 每1w张图片的loss_accu
    # 进行归一化,1w个图片为整体

    
def valid_accuracy(parameters):  # 准确率
    correct = [predict(valid_img[img_i], parameters).argmax() == valid_lab[img_i] for img_i in range(valid_num)]
    return correct.count(True) / len(correct)


def train_loss(parameters):   # 验证集函数
    loss_accu = 0
    for img_i in range(train_num):
        loss_accu += sqr_loss(train_img[img_i], train_lab[img_i], parameters)
    return loss_accu / (train_num / 10000)


def train_accuracy(parameters):  # 准确率
    correct = [predict(train_img[img_i], parameters).argmax() == train_lab[img_i] for img_i in range(train_num)]
    return correct.count(True) / len(correct)

def test_accuracy(parameters):  # 准确率
    correct = [predict(test_img[img_i], parameters).argmax() == test_lab[img_i] for img_i in range(test_num)]
    return correct.count(True) / len(correct)

复制代码
def grad_add(grad1,grad2):
    for layer in range(1,len(grad1)):
        for pname in grad1[layer].keys():
            grad1[layer][pname]+=grad2[layer][pname]
    return grad1
    
def grad_divide(grad,denominator):
    for layer in range(1,len(grad)):
        for pname in grad[layer].keys():
            grad[layer][pname]/=denominator
    return grad
    
def combine_parameters(parameters, grad, learn_rate):  # 新的参数的形成
    parameter_tmp = copy.deepcopy(parameters)
    for layer in range(len(parameter_tmp)):
        for pname in parameter_tmp[layer].keys():
            parameter_tmp[layer][pname] -= learn_rate * grad[layer][pname]
    return parameter_tmp
复制代码

a whole of training volume

batch_size = 100  	# 一百张图片为整体
                    # 100张图片当一小块
                    # 训练一小块
def train_batch(current_batch, parameters):  	# 将这一百张图片的梯度计算起来,拿到一个平均值!
    grad_accu = grad_parameters(train_img[current_batch * batch_size], train_lab[current_batch * batch_size],
                                parameters)
    for img_i in range(1, batch_size):
        grad_temp = grad_parameters(train_img[current_batch * batch_size + img_i],
                                    train_lab[current_batch * batch_size + img_i], parameters)
        grad_add(grad_accu,grad_temp)
            # 获得累加的梯度
    grad_divide(grad_accu,batch_size)    				# 拿到方向
    return grad_accu

parameters = init_parameters()
复制代码

training process

from tqdm import tqdm_notebook
current_epoch = 0
train_loss_list = []  # 存储loss的数值
valid_loss_list = []  # 存储验证的loss的数值
train_accu_list = []  # 训练的正确性
valid_accu_list = []  # 验证集的正确性
learn_rate = 10**-0.3  # 学习率到最后要变小
epoch_num = 5  # 训练的次数  训练完一次叫一个epoch
for epoch in tqdm_notebook(range(epoch_num)):
    for i in range(train_num // batch_size):
#         if i % 100 == 99:
#             print('running batch{}/{}'.format(i + 1, train_num // batch_size))
        grad_tmp = train_batch(i, parameters)
        parameters = combine_parameters(parameters, grad_tmp, learn_rate)
    current_epoch += 1
    train_loss_list.append(train_loss(parameters))
    train_accu_list.append(train_accuracy(parameters))
    valid_loss_list.append(valid_loss(parameters))
    valid_accu_list.append(valid_accuracy(parameters))

    
valid_accuracy(parameters)
复制代码

loss for validation and training sets

lower = -0
plt.plot(valid_loss_list[lower:], color='black', label='validation loss')
plt.plot(train_loss_list[lower:], color='red', label='train loss')
plt.show()
复制代码

Accuracy on validation and training sets

plt.plot(valid_accu_list[lower:], color='black', label='validation accuracy')
plt.plot(train_accu_list[lower:], color='red', label='train accuracy')
plt.show()
复制代码

at last

Xiaosheng Fanyi, looking forward to your attention.

Guess you like

Origin juejin.im/post/6995728018748653604