深度学习基础课程2笔记-手写字体识别源码(MNIST)

一,network.py

import random   #产生随机数

import numpy as np


# 神经网络类
class Network(object):
    # 构造函数
    def __init__(self,sizes):
        self.num_layers=len(sizes) #神经网络层数
        self.sizes=sizes           #每层参数, sizes每层神经元的个数,net=Network{[2,3,1]}
        self.biases=[np.random.randn(y,1) for y in sizes[1:]]  #偏移值
        # np.random.randn(y, 1) 随机从正态分布(均值为0,方差为1)中生成
        self.weights=[np.random.randn(y,x)
                      for x,y in zip(sizes[:-1],sizes[1:])]    #权重

    #输入层向输出层更新
    def feedforward(self,a):
        for b,w in zip(self.biases,self.weights):
            a=sigmoid(np.dot(w,a)+b)  # a=w1*a1+w2*a2...+b
        return a

    #随机梯度下降算法
    #      指向当前类,训练集,训练多少轮,最小训练集大小,学习率,测试数据集
    def SGD(self,training_data,epochs,mini_batch_size,eta,
            test_data=None):
        if test_data:n_test=len(test_data) #如果test_data不为空,求出长度
        n=len(training_data)   #求训练集的长度
        for j in range(epochs):  #轮数循环
            random.shuffle(training_data) #随机打乱训练集
            #将训练集分成小块,比如训练集有1000个,最小训练集大小为100,则将训练集分为10个大小为100的小训练集

            #取最小块数据集 0-99,100-199,200-299,300-399...900-999
            mini_batches=[training_data[k:k+mini_batch_size] for k in range(0,n,mini_batch_size)]
            #针对分好的小训练集,逐个选取单个小训练集
            for mini_batch in mini_batches:
                #关键步骤
                self.update_mini_batch(mini_batch,eta) #更新参数w,b
            if test_data: #j是轮数,self.evaluate(test_data)测试集准确数
                print("Epoch{0}:{1}/{2}".format(
                    j,self.evaluate(test_data),n_test))
            else:
                print("Epoch{0} complete".format(j))

    #更新w,b                #单个块数据集
    def update_mini_batch(self,mini_batch,eta):

           #初始化两个0矩阵,类型和w,b一样
            nabla_b=[np.zeros(b.shape) for b in self.biases]
            nabla_w=[np.zeros(w.shape) for w in self.weights]

            #取小数据集中的每一行x,y
            for x,y in mini_batch:
                #算出w,b的偏导数
                delta_nabla_b,delta_nabla_w=self.backprop(x,y)
                #更新w b
                nabla_b=[nb+dnb for nb,dnb in zip(nabla_b,delta_nabla_b)]
                nabla_w=[nw+dnw for nw,dnw in zip(nabla_w,delta_nabla_w)]
            self.weights=[w-(eta/len(mini_batch))*nw
                          for w,nw in zip(self.weights,nabla_w)]
            self.biases=[b-(eta/len(mini_batch))*nb
                         for b,nb in zip(self.biases,nabla_b)]  #self.biases.nabla_b

    # 算出关于w,b的偏导数
    def backprop(self,x,y):
        nabla_b=[np.zeros(b.shape) for b in self.biases]
        nabla_w=[np.zeros(w.shape) for w in self.weights]
        #feedforward
        activation=x
        activations=[x]
        zs=[]
        for b,w in zip(self.biases,self.weights):
            z=np.dot(w,activation)+b
            zs.append(z)
            activation=sigmoid(z)
            activations.append(activation)
        #backward pass
        delta=self.cost_derivative(activations[-1],y)*\
            sigmoid_prime(zs[-1])
        nabla_b[-1]=delta
        nabla_w[-1]=np.dot(delta,activations[-2].transpose())
            #note that the variable l in the loop below is used a little...
        for l in range(2,self.num_layers):
            z = zs[-l]  #z = zs[-1]  写成123的1,实际应该是L的小写l
            sp=sigmoid_prime(z)
            delta=np.dot(self.weights[-l+1].transpose(),delta)*sp
            nabla_b[-l]=delta
            nabla_w[-l]=np.dot(delta,activations[-l-1].transpose())
        return (nabla_b,nabla_w)

    #计算准确率
    def evaluate(self, test_data):
        # argmax返回的是最大数的索引
        test_results = [(np.argmax(self.feedforward(x)), y)
                        for (x, y) in test_data]
        # 返回x和y相等的个数
        return sum(int(x == y) for (x, y) in test_results)

    def cost_derivative(self, output_activations, y):
        """Return the vector of partial derivatives \partial C_x /
        \partial a for the output activations."""
        return (output_activations-y)

#激励函数
def sigmoid(z):
    """The sigmoid function."""
    return 1.0/(1.0+np.exp(-z))

def sigmoid_prime(z):
    """Derivative of the sigmoid function."""
    return sigmoid(z)*(1-sigmoid(z))

二,mnist_loader.py

# -*- coding: utf-8 -*-
# from __future__ import print_function,division
import pickle
import gzip
import numpy as np

#从数据集中载入数据
def load_data():
    file=gzip.open('../MNIST_data/MNIST_data/mnist.pkl.gz','rb' )
    training_data,validation_data,test_data=pickle.load(file,encoding='bytes')
    file.close()
    return training_data,validation_data,test_data

#改编数据集的格式
def load_data_wrapper():
    tr_d,va_d,te_d=load_data()
    #训练集
    training_inputs=[np.reshape(x,(784,1)) for x in tr_d[0]]
    training_labels=[vectorized_label(x) for x in tr_d[1]]
    training_data=list(zip(training_inputs,training_labels))

    #验证集
    validation_inputs=[np.reshape(x,(784,1)) for x in va_d[0]]
    validation_data=list(zip(validation_inputs,va_d[1]))

    #测试集
    test_inputs=[np.reshape(x,(784,1)) for x in te_d[0]]
    test_data=list(zip(test_inputs,te_d[1]))

    return (training_data, validation_data, test_data)

def vectorized_label(j):
    #形状为10行1列
    e=np.zeros((10,1))
    e[j]=1.0
    return e

二,demo.py

import mnist_loader
import network

#训练集,     验证集,       测试集
training_data,validation_data,test_data=mnist_loader.load_data_wrapper()
# print("training_data")
# print(type(training_data))  #数据类型
# print(len(training_data))   #数据长度
# print(training_data[0][0].shape)   #第一维是元祖,第二维0是x
# print(training_data[0][1].shape)   #第一维是元祖,第二维1是y
# print(training_data[0])
#
# print("validation_data")
# print(len(validation_data))
#
# print("test_data")
# print(len(test_data))
#
                #第一层784个神经元,第二层30个神经元,输出10个
net=network.Network([784,30,10])
#      训练集,训练多少轮,最小训练集大小,学习率,测试数据集
net.SGD(training_data,30,10,3.0,test_data=test_data)
#
# net=network.Network([784,100,10])
# net.SGD(training_data,30,10,3.0,test_data=test_data)

# net=network.Network([784,50,60,10])
# net.SGD(training_data,30,10,3.0,test_data=test_data)

四,运行结果
这里写图片描述

猜你喜欢

转载自blog.csdn.net/sxlsxl119/article/details/81027185