机器学习自学总结笔记——多分类逻辑回归

参考内容：https://blog.csdn.net/Cowry5/article/details/80367832

 1 import numpy as np
 2 import pandas as pd
 3 import matplotlib.pyplot as plt
 4 from scipy.io import loadmat
 5 from scipy.optimize import minimize
 6 def load_data(path):
 7     data=loadmat(path)
 8     X=data['X']
 9     y=data['y']
10     # print(type(X),type(y))
11     return X,y
12 path=r'C:\Users\Earth\Desktop\Coursera-ML-using-matlab-python-master\machine-learning-ex3\ex3\ex3data1.mat'
13 X,y=load_data(path)
14 print(np.unique(y))
15 print(X.shape,y.shape)
16 print(len(X))
17 
18 def plotanimage():
19     pick_one=np.random.randint(0,5000)
20     image=X[pick_one,:]
21     # 选择pick_one这一行的所有列
22     fig,ax=plt.subplots(figsize=(1,1))
23     ax.matshow(image.reshape((20,20)),cmap='gray_r')
24     plt.xticks([])
25     plt.yticks([])
26     plt.show()
27     print('this should be {}'.format(y[pick_one]))
28 
29 # plotanimage()
30 def sigmoid(z):
31     return 1/(1+np.exp(-z))
32 
33 def costReg(theta,X,y,lam):
34     first=np.mean(-y*np.log(sigmoid(X@theta))-(1-y)*np.log(1-sigmoid(X@theta)))
35     theta1=theta[1:]
36     second=theta1.T@theta1*lam/(2*len(X))
37     # 此处的second需要得到的是一个标量而非矩阵，所以用θ1的转置乘以它自身可以得到所有对应元素的平方和,如果用θ1*θ1得到的则是一个向量
38     return first+second
39 def gradient(theta,X,y,lam):
40     first=(X.T@(sigmoid(X@theta)-y))/len(X)
41     theta1=theta[1:]
42     second=np.concatenate([np.array([0]),lam*theta1/len(X)])
43     return first+second
44 def one_vs_all(X,y,lam,K):
45     all_theta=np.zeros((K,X.shape[1]))
46     for i in range(1,K+1):
47         theta=np.zeros(X.shape[1])
48         y_i=np.array([1 if label==i else 0 for label in y])
49         ret=minimize(fun=costReg,x0=theta,args=(X,y_i,lam),method='TNC',jac=gradient,options={'disp':True})
50         all_theta[i-1,:]=ret.x
51         # 序列为i-1的行，所有列替换成ret.x
52     return all_theta
53 def predict_all(X,all_theta):
54     h=sigmoid(X@all_theta.T)
55     h_argmax=np.argmax(h,axis=1)
56     h_argmax=h_argmax+1
57     print('the type of argmax is ',type(h_argmax))
58     print('the shape of argmax is',h_argmax.shape)
59     return h_argmax
60 raw_X,raw_y=load_data(path)
61 X=np.insert(raw_X,0,1,axis=1)
62 print('the shape of X is',X.shape)
63 # raw_X 要修改的对象，0 要在第0行或列前插入对象，要插入的值，axis=1 要插入的维度，如果为0则插入的是行，为1则插入的是列
64 y=raw_y.flatten()
65 print('the shape of y is',y.shape)
66 all_theta=one_vs_all(X,y,1,10)
67 print('the shape of all theta is ',all_theta.shape)
68 result=predict_all(X,all_theta)
69 print(result.shape)
70 def cal_accuracy(result,y):
71     res=[1 if result[i]==y[i] else 0 for i in range(len(y))]
72     return np.mean(res)
73 print(result==y)
74 # print(cal_accuracy(result,y))
75 # theta2=np.zeros((10,X.shape[1]))
76 # theta2 = theta2[1:]
77 # print(theta2.shape)
78 # t=(theta2@theta2) / (2*len(X))
79 # print(t.shape)
80 # t1=theta2*theta2
81 # print(t1.shape)

思路总结：

1、首先从指定的数据中读取到X、y，根据需要初始化一个θ

2、写出假设函数和代价函数，其中需要注意的是正规化的代价函数不惩罚第一项θ₀，因此需要将θ进行变化处理

3、根据公式写出梯度函数

4、用高级算法对θ进行优化，需要注意的是多分类问题根据每种分类分别去优化，最后再把值赋给θ，中间可以用for循环来实现

5、训练好θ以后将其代假设函数中，对于多分类问题，认为概率最大的值即为预测出的类别

6、对预测结果进行统计整理，再和实际的值进行比较，计算准确率

在练习过程中发现对numpy的一些常用函数掌握得还不够深刻，包括切片、降维等操作，另外对画图这一块更生疏，需要查漏补缺

套用公式矩阵相乘时，对于谁和谁相乘，是否需要转置有时候反应不过来，最好的办法就是打出它们的shape根据矩阵相乘的特点进行操作。

扫描二维码关注公众号，回复： 6382405 查看本文章

机器学习自学总结笔记——多分类逻辑回归

猜你喜欢