# encoding:utf-8
import numpy as np
import random
class Network(object):
def __init__(self, sizes):
self.num_layers = len(sizes)
print("self.num_layers", self.num_layers)
self.sizes = sizes
self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
print("biases", self.biases)
# biases [array([[-0.55676967],
# [-1.60486518],
# [ 0.28285971],
# [ 0.50856141]]), array([[-1.02563044]])]
print("sizes[1:]", sizes[1:])
# [4, 1]
self.weights = [np.random.randn(y, x)
for x, y in zip(sizes[:-1], sizes[1:])]
print("weights", self.weights)
print("sizes[:-1]", sizes[:-1])
# [3, 4]
# [array([[-0.60968692, 2.86107499, 0.25978906],
# [ 0.20304469, -1.42226893, 1.66564988],
# [-0.73320158, 0.53635735, 1.49307876],
# [-0.27301879, -1.42973446, -0.17142598]]), array([[ 0.39620414, -1.0758598 ,
# 0.17447957, -0.67224015]])]
def backprop(self, x, y):
"""return a tuple
"""
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# feedforward
activation = x
activations = [x] # 存放激活值
zs = [] # list用来存放z 向量
# 前向传递
for b, w in zip(self.biases, self.weights):
z = np.dot(w, activation) + b
zs.append(z)
activation = self.sigmoid(z)
activations.append(activation)
# 后向传递
delta = self.cost_derivative(activations[-1], y) * self.sigmoid(zs[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta, activations[-2].transpose())
for l in range(2, self.num_layers):
z = zs[-l]
sp = self.sigmoid_prime(z)
delta = np.dot(self.weights[-l + 1].transpose(), delta) * sp
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())
return (nabla_b, nabla_w)
def evaluate(self, test_data):
"""
return the number of test inputs for which is correct
"""
test_results = [(np.argmax(self.feedforward(x)), y)
for (x, y) in test_data]
return sum(int(x == y) for (x, y) in test_results)
def sigmoid(self, z):
"""sigmoid函数"""
return 1.0 / (1.0 + np.exp(-z))
def sigmoid_prime(self, z):
"""求导"""
return self.sigmoid(z) * (1 - self.sigmoid(z))
def cost_derivative(self, output_activations, y):
return (output_activations - y)
def feedforward(self, a):
"""
Return the output of the network if "a " is input
"""
for b, w in zip(self.biases, self.weights):
a = self.sigmoid(np.dot(w, a) + b)
return a
def update_mini_batch(self, mini_batch, eta):
"""
update the networks' weights and biases by applying gradient descent using
bp to a single mini batch
"""
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
for x, y in mini_batch:
delta_nabla_b, delta_nabla_w = self.backprop(x, y)
nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
self.weights = [w - (eta / len(mini_batch)) *
nw for w, nw in zip(self.weights, nabla_w)]
self.biases = [b - (eta / len(mini_batch)) *
nb for b, nb in zip(self.biases, nabla_b)]
def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None):
"""
Train the neural network using mini-batch stochastic
gradient descent, the "training_data" is a list of tuples
(x,y) representing the training inputs and the desired outputs.
the other non-optional params are self-explanatory
"""
# TypeError: object of type 'zip' has no len()
if test_data:
n_test = len(test_data)
n = len(training_data) # 50000
for j in range(epochs): # epochs迭代
random.shuffle(training_data) # 打散
mini_batches = [ # 10个数据一次迭代:mini_batch_size,以 mini_batch_size为步长
training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)
]
for mini_batch in mini_batches: # 分成很多分mini_batch进行更新
self.update_mini_batch(mini_batch, eta)
if test_data:
print("Epoch {0}:{1} / {2}".format(j, self.evaluate(test_data), n_test))
else:
print("Epoch {0} complete".format(j))
if __name__ == "__main__":
nn = Network([3, 4, 1])
# start end step
a = [k for k in range(0, 500, 50)]
print("a", a)
# a [0, 50, 100, 150, 200, 250, 300, 350, 400, 450]
print("[np.zeros(b.shape) for b in nn.biases]:", [np.zeros(b.shape) for b in nn.biases])
# [array([[0.],
# [0.],
# [0.],
# [0.]]), array([[0.]])]
activation = np.random.randn(3, 1)
activations = [activation]
print("activations", activations)
# [array([[ 0.33711679],
# [-0.69154523],
# [-0.1337307 ]])]
zs = []
for b, w in zip(nn.biases, nn.weights):
z = np.dot(w, activation) + b
print("z", z)
# [[1.65554762]]
# [[0.83963942]]
zs.append(z)
activation = nn.sigmoid(z)
print(activation)
activations.append(activation)
print("zs", zs)
# [array([[ 1.32353394],
# [ 0.01295639],
# [-2.3796809 ],
# [-0.97347364]]), array([[1.65554762]])]
print("activ", activations)
# [array([[-1.62859403],
# [ 0.35390197],
# [ 0.44954864]]), array([[0.78976906],
# [0.50323905],
# [0.08473531],
# [0.27418867]]), array([[0.83963942]])]
import bp
import mnist_loader
net = bp.Network([784, 100, 10])
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
"""
mnist_loader
A library to load the MNIST image data.
"""
# ### Libraries
# Standard library
# import cPickle
import pickle
import gzip
# Third-party libraries
import numpy as np
def load_data():
"""Return the MNIST data as a tuple containing the training data,
the validation data, and the test data.
The ``training_data`` is returned as a tuple with two entries.
The first entry contains the actual training images. This is a
numpy ndarray with 50,000 entries. Each entry is, in turn, a
numpy ndarray with 784 values, representing the 28 * 28 = 784
pixels in a single MNIST image.
The second entry in the ``training_data`` tuple is a numpy ndarray
containing 50,000 entries. Those entries are just the digit
values (0...9) for the corresponding images contained in the first
entry of the tuple.
The ``validation_data`` and ``test_data`` are similar, except
each contains only 10,000 images.
This is a nice data format, but for use in neural networks it's
helpful to modify the format of the ``training_data`` a little.
That's done in the wrapper function ``load_data_wrapper()``, see
below.
"""
f = gzip.open('data/mnist.pkl.gz', 'rb')
training_data, validation_data, test_data = pickle.load(f, encoding='bytes')
f.close()
return (training_data, validation_data, test_data)
def load_data_wrapper():
"""Return a tuple containing ``(training_data, validation_data,
test_data)``. Based on ``load_data``, but the format is more
convenient for use in our implementation of neural networks.
In particular, ``training_data`` is a list containing 50,000
2-tuples ``(x, y)``. ``x`` is a 784-dimensional numpy.ndarray
containing the input image. ``y`` is a 10-dimensional
numpy.ndarray representing the unit vector corresponding to the
correct digit for ``x``.
``validation_data`` and ``test_data`` are lists containing 10,000
2-tuples ``(x, y)``. In each case, ``x`` is a 784-dimensional
numpy.ndarry containing the input image, and ``y`` is the
corresponding classification, i.e., the digit values (integers)
corresponding to ``x``.
Obviously, this means we're using slightly different formats for
the training data and the validation / test data. These formats
turn out to be the most convenient for use in our neural network
code."""
tr_d, va_d, te_d = load_data()
training_inputs = [np.reshape(x, (784, 1)) for x in tr_d[0]]
training_results = [vectorized_result(y) for y in tr_d[1]]
training_data = zip(training_inputs, training_results)
validation_inputs = [np.reshape(x, (784, 1)) for x in va_d[0]]
validation_data = zip(validation_inputs, va_d[1])
test_inputs = [np.reshape(x, (784, 1)) for x in te_d[0]]
test_data = zip(test_inputs, te_d[1])
return (list(training_data), list(validation_data), list(test_data))
def vectorized_result(j):
"""Return a 10-dimensional unit vector with a 1.0 in the jth
position and zeroes elsewhere. This is used to convert a digit
(0...9) into a corresponding desired output from the neural
network."""
e = np.zeros((10, 1))
e[j] = 1.0
return e