import numpy as np import matplotlib.pyplot as plt class MLP: " Multi-layer perceptron " def __init__(self, sizes, beta=1, momentum=0.9): """ sizes is a list of length four. The first element is the number of features in each samples. In the MNIST dataset, this is 784 (28*28). The second and the third elements are the number of neurons in the first and the second hidden layers, respectively. The fourth element is the number of neurons in the output layer which is determined by the number of classes. For example, if the sizes list is [784, 5, 7, 10], this means the first hidden layer has 5 neurons and the second layer has 7 neurons. beta is a scalar used in the sigmoid function momentum is a scalar used for the gradient descent with momentum """ self.beta = beta self.momentum = momentum self.nin = sizes[0] # number of features in each sample self.nhidden1 = sizes[1] # number of neurons in the first hidden layer self.nhidden2 = sizes[2] # number of neurons in the second hidden layer self.nout = sizes[3] # number of classes / the number of neurons in the output layer # Initialise the network of two hidden layers self.weights1 = (np.random.rand(self.nin + 1, self.nhidden1) - 0.5) * 2 / np.sqrt(self.nin) # hidden layer 1 self.weights2 = (np.random.rand(self.nhidden1 + 1, self.nhidden2) - 0.5) * 2 / np.sqrt( self.nhidden1) # hidden layer 2 self.weights3 = (np.random.rand(self.nhidden2 + 1, self.nout) - 0.5) * 2 / np.sqrt( self.nhidden2) # output layer def train(self, inputs, targets, eta, niterations): """ inputs is a numpy array of shape (num_train, D) containing the training images consisting of num_train samples each of dimension D. targets is a numpy array of shape (num_train, D) containing the training labels consisting of num_train samples each of dimension D. eta is the learning rate for optimization niterations is the number of iterations for updating the weights """ ndata = np.shape(inputs)[0] # number of data samples # adding the bias inputs = np.concatenate((inputs, -np.ones((ndata, 1))), axis=1) # numpy array to store the update weights updatew1 = np.zeros((np.shape(self.weights1))) updatew2 = np.zeros((np.shape(self.weights2))) updatew3 = np.zeros((np.shape(self.weights3))) self.loss = [] for n in range(niterations): ############################################################################# # TODO: implement the training phase of one iteration which consists of two phases: # the forward phase and the backward phase. you will implement the forward phase in # the self.forwardPass method and return the outputs to self.outputs. Then compute # the error (hints: similar to what we did in the lab). Next is to implement the # backward phase where you will compute the derivative of the layers and update # their weights. ############################################################################# # forward phase self.outputs = self.forwardPass(inputs) # Error using the sum-of-squares error function loss = 0.5 * np.sum((self.outputs - targets) ** 2) if (np.mod(n, 100) == 0): self.loss.append(loss) print("Iteration: ", n, " Loss: ", loss) # backward phase # Compute the derivative of the output layer. NOTE: you will need to compute the derivative of # the softmax function. Hints: equation 4.55 in the book. deltao = (self.outputs - targets) * self.outputs * (1 - self.outputs) # compute the derivative of the second hidden layer deltah2 = self.beta * self.delta_sigmoid(self.hidden2,deltao,self.weights3) # compute the derivative of the first hidden layer # deltah1 = self.beta * self.delta_sigmoid(self.hidden1,deltah2,self.weights2) deltah1 = self.beta * self.hidden1 * (1.0 - self.hidden1) * (np.dot(deltah2[:, :-1], np.transpose(self.weights2))) # update the weights of the three layers: self.weights1, self.weights2 and self.weights3 # here you can update the weights as we did in the week 4 lab (using gradient descent) # but you can also add the momentum updatew1 = self.update_weights(updatew1,inputs,deltah1,eta,self.momentum) updatew2 = self.update_weights(updatew2,self.hidden1,deltah2,eta,self.momentum) # updatew3 = self.update_weights(updatew3,self.hidden2,deltao,eta,self.momentum) updatew3 = eta * np.dot(np.transpose(self.hidden2), deltao) + self.momentum * updatew3 self.weights1 -= updatew1 self.weights2 -= updatew2 self.weights3 -= updatew3 ############################################################################# # END of YOUR CODE ############################################################################# def forwardPass(self, inputs): """ inputs is a numpy array of shape (num_train, D) containing the training images consisting of num_train samples each of dimension D. """ ############################################################################# # TODO: Implement the forward phase of the model. It has two hidden layers # and the output layer. The activation function of the two hidden layers is # sigmoid function. The output layer activation function is the softmax function # because we are working with multi-class classification. ############################################################################# # layer 1 # compute the forward pass on the first hidden layer with the sigmoid function self.hidden1 = np.dot(inputs, self.weights1) # (size[0],785) . (785,size[1]) = (size[0],size[1]) # add bias b1 = -np.zeros((np.shape(inputs)[0], 1)) # sigmoid self.hidden1 = self.sigmoid_fun(self.hidden1) # (size[0],size[1]) self.hidden1 = np.concatenate((self.hidden1, b1), axis=1) # (size[0],size[1]+1) # layer 2 # compute the forward pass on the second hidden layer with the sigmoid function self.hidden2 = np.dot(self.hidden1, self.weights2) # (size[0],size[1]) . (size[1],size[2]) = (size[0],size[2]) # add bias b2 = -np.zeros((np.shape(self.hidden1)[0], 1)) # sigmoid self.hidden2 = self.sigmoid_fun(self.hidden2) # (size[0],size[2]) self.hidden2 = np.concatenate((self.hidden2, b2), axis=1) # (size[0],size[2]+1) # output layer # compute the forward pass on the output layer with softmax function outputs = np.dot(self.hidden2, self.weights3) # (9000,10) outputs = self.softmax_fun(outputs) ############################################################################# # END of YOUR CODE ############################################################################# return outputs def evaluate(self, X, y): """ this method is to evaluate our model on unseen samples it computes the confusion matrix and the accuracy X is a numpy array of shape (num_train, D) containing the testing images consisting of num_train samples each of dimension D. y is a numpy array of shape (num_train, D) containing the testing labels consisting of num_train samples each of dimension D. """ inputs = np.concatenate((X, -np.ones((np.shape(X)[0], 1))), axis=1) outputs = self.forwardPass(inputs) nclasses = np.shape(y)[1] # 1-of-N encoding outputs = np.argmax(outputs, 1) targets = np.argmax(y, 1) cm = np.zeros((nclasses, nclasses)) for i in range(nclasses): for j in range(nclasses): cm[i, j] = np.sum(np.where(outputs == i, 1, 0) * np.where(targets == j, 1, 0)) print("The confusion matrix is:") print(cm) self.accuracy = np.trace(cm) / np.sum(cm) * 100 print("The accuracy is ", np.trace(cm) / np.sum(cm) * 100) return cm # 激活函数 def sigmoid_fun(self, x): x = self.beta * x x = 1.0 / (1.0 + np.exp(-x)) return x # 分类器 def softmax_fun(self, x): len = np.shape(x)[0] N = np.sum(np.exp(x), axis=1) * np.ones((1, len)) x = np.exp(x) x = np.transpose(x) / N x = np.transpose(x) return x # sigmoid求导 def delta_sigmoid(self,x,delta,w): delta_ = self.beta * x * (1.0 - x) * (np.dot(delta,np.transpose(w))) return delta_ # 更新梯度 def update_weights(self,updatew,x,delta,lr,momentum): updatew = lr * np.dot(np.transpose(x), delta[:, :-1]) + momentum * updatew return updatew if __name__ == '__main__': import pickle, gzip # 绘制损失曲线 def plot_error(niter, Y): X = [x for x in range(0, int(niter), 100)] plt.plot(X, Y, 'g*-') # 保存最好的参数 def save_params(file,params): # TODO: run the following code to save the best parameters and # the weights of the network that achieves the desired accuracy with open(file, 'wb') as handle: pickle.dump(params, handle, protocol=pickle.HIGHEST_PROTOCOL) # 加载数据 f = gzip.open('mnist.pkl.gz', 'rb') tset, vset, teset = pickle.load(f, encoding='latin1') print(tset[0].shape, vset[0].shape, teset[0].shape) f.close() # Just use the first 9000 images for training tread = 9000 train_in = tset[0][:tread, :] # This is a little bit of work -- 1 of N encoding # Make sure you understand how it does it train_tgt = np.zeros((tread, 10)) for i in range(tread): train_tgt[i, tset[1][i]] = 1 # and use 1000 images for testing teread = 1000 test_in = teset[0][:teread, :] test_tgt = np.zeros((teread, 10)) for i in range(teread): test_tgt[i, teset[1][i]] = 1 # 设置参数进行训练 best_sizes = [784, 50, 30, 10] best_beta = 2 best_momentum = 0.5 best_lr = 0.001 # best learning rate best_niterations = 1200 best_classifier = MLP(sizes=best_sizes, beta=best_beta, momentum=best_momentum) best_classifier.train(train_in, train_tgt, best_lr, best_niterations) best_classifier.evaluate(test_in, test_tgt) # 绘制训练损失曲线 plot_error(best_niterations,best_classifier.loss) plt.xlabel('the number of iterations') plt.ylabel('the errors') accuracy = round(best_classifier.accuracy, 2) plt.text(best_niterations / 2, 4000, r'$accuracy:\ ' + str(accuracy) + '\%$', fontdict={'size': '12', 'color': 'r'}) plt.title('sizes:{}, beta:{}, momentum:{}, lr:{}, ' 'niter:{}'.format(best_sizes, best_beta, best_momentum, best_lr, best_niterations)) plt.show() # 保存参数 file = "best_parameters.pkl" best_parameters = { 'sizes': best_sizes, 'beta': best_beta, 'momentum': best_momentum, 'lr': best_lr, 'niterations': best_niterations, 'weights_1': best_classifier.weights1, 'weights_2': best_classifier.weights2, 'weights_3': best_classifier.weights3, } save_params(file,best_parameters)
【机器学习】——numpy实现多层感知机(MLP)进行MNIST识别
猜你喜欢
转载自blog.csdn.net/qq_45769063/article/details/121916828
今日推荐
周排行