"Watermelon book" Chapter III, Linear Regression

▶ Use linear regression for classification as a casual point

● Code

  1 import numpy as np
  2 import matplotlib.pyplot as plt
  3 from mpl_toolkits.mplot3d import Axes3D
  4 from mpl_toolkits.mplot3d.art3d import Poly3DCollection
  5 from matplotlib.patches import Rectangle
  6 
  7 dataSize = 10000
  8 trainRatio = 0.3
  9 colors = [[0.5,0.25,0],[1,0,0],[0,0.5,0],[0,0,1],[1,0.5,0]] # 棕红绿蓝橙
 10 trans = 0.5
 11 
 12 defdataSplit (Data, Part):                                   # The data set is divided into a training set and test set 
13 is      return Data [0: Part,:], Data [Part:,:]
 14  
15  DEF function (X, para):                                        # continuous regression function 
16      return np.sum (para * X [0]) + para [. 1 ]
 . 17  
18 is  DEF Judge (X, para):                                          # classification function, as the jump point by 0.5 
. 19      return int (function (X, para)> 0.5 )
 20 is  
21 is  DEF createData (Dim, len):                                    # generate test data 
22     np.random.seed(103)
 23     output=np.zeros([len,dim+1])
 24     
 25     if dim == 1:        
 26         temp = 2 * np.random.rand(len)
 27         output[:,0] = temp
 28         output[:,1] = list(map(lambda x : int(x > 1), temp))        
 29         #print(output, "\n", np.sum(output[:,-1])/len)
 30         return output
 31     if dim == 2:
 32         output[:,0] = 2 * np.random.rand(len)
 33         output[:,1] = 2 * np.random.rand(len)
 34         output[:,2] = list(map(lambda x,y : int(y > 0.5 * (x + 1)), output[:,0], output[:,1]))
 35         #print(output, "\n", np.sum(output[:,-1])/len)
 36         return output
 37     if dim == 3:
 38         output[:,0] = 2 * np.random.rand(len)
 39         output[:,1] = 2 * np.random.rand(len)
 40         output[:,2] = 2 * np.random.rand(len)
 41         output[:,3] = list(map(lambda x,y,z : int(-3 * x + 2 * y + 2 * z > 0), output[:,0], output[:,1], output[:,2]))
 42         #print(output, "\n", np.sum(output[:,-1])/len)
 43         return output   
 44     else:
 45         for i in range(dim):
 46             output[:,i] = 2 * np.random.rand(len)        
 47         output[:,dim] = list(map(lambda x : int(x > 1), (3 - 2 * dim)*output[:,0] + 2 * np.sum(output[:,1:dim], 1)))
 48         #print(output, "\n", np.sum(output[:,-1])/len)
 49         return output   
 50 
 51 def linearRegression(data):                                 # 线性回归
 52     len = np.shape(data)[0]
 53     dim = np.shape(data)[1] - 1
 54     if(dim) == 1:                                           # 一元       
 55         sumX = np.sum(data[:,0])
 56         sumY = np.sum(data[:,1])
 57         sumXY = np.sum([x*y for x,y in data])
 58         sumXX = np.sum([x*x for x in data[:,0]])
 59         = W (sumXY * len - SUMX sumY *) / (sumXX * len - SUMX * SUMX)
 60          B = (sumY - SUMX * W) / len
 61 is          return (W, B)
 62 is      the else :                                                    # binary and above, temporary do not consider the problem of rank reduction 
63 is          DataE np.concatenate = ((Data [0 :,: -1], np.ones (len) [:, np.newaxis]), Axis =. 1 )
 64          W = np.matmul ( np.matmul (np.linalg.inv (np.matmul (dataE.T, DataE)), dataE.T), Data [:, -. 1]) # W = (T * X-X-^) ^ (-. 1) X-Y ^ * T * 
65          return (W [0: -1], W [-1 ])
 66  
67  DEF test(dim):                                              # 测试函数
 68     allData = createData(dim, dataSize)
 69     trainData, testData = dataSplit(allData, int(dataSize * trainRatio))
 70 
 71     para = linearRegression(trainData)
 72     
 73     myResult = [ judge(i[0:dim], para) for i in testData ]   
 74     errorRatio = np.sum((np.array(myResult) - testData[:,-1].astype(int))**2) / (dataSize*(1-trainRatio))
 75     print("dim = "+ str(dim) + ", ErrorRatio = " + STR (round (errorRatio,. 4 )))
 76      IF Dim> =. 4:                                             # . 4 dimension than does drawing, only the output of the error ratio 
77          return 
78  
79      errorP = []                                              # drawing section, the test data set points an error class, class 0 and class 1 
80      Class1 = []
 81      Class0 = []
 82      for I in Range (np.shape (testData) [0]):
 83          IF ! The myResult [I] = testData [I, -1 ]:
 84              errorP.append (testData [I])
 85          elif myResult[i] == 1:
 86             class1.append(testData[i])
 87         else:
 88             class0.append(testData[i])
 89     errorP = np.array(errorP)
 90     class1 = np.array(class1)
 91     class0 = np.array(class0)
 92 
 93     fig = plt.figure(figsize=(10, 8))                
 94     
 95     if dim == 1:
 96         plt.xlim(0.0,2.0)
 97         plt.ylim(-0.5,1.25)
 98         plt.plot([1, 1], [-0.5, 1.25], color = colors[0],label = "realBoundary")        
 99         xx = np.arange(0,2,0.2)        
100         plt.plot(xx, [function(i, para) for i in xx],color = colors[4], label = "myF")        
101         plt.scatter(class1[:,0], class1[:,1],color = colors[1], s = 2,label = "class1Data")                
102         plt.scatter(class0[:,0], class0[:,1],color = colors[2], s = 2,label = "class0Data")                
103         plt.scatter(errorP[:,0], errorP[:,1],color = colors[3], s = 16,label = "errorData")        
104         plt.text(0.4, 1.12, "realBoundary: 2x = 1\nmyF(x) = " + str(round(para[0],2)) + " x + " + str(round(para[1],2)) + "\n errorRatio = " + str(round(errorRatio,4)),\
105             size=15, ha="center", va="center", bbox=dict(boxstyle="round", ec=(1., 0.5, 0.5), fc=(1., 1., 1.)))
106         R = [Rectangle((0,0),0,0, color = colors[k]) for k in range(5)]
107         plt.legend(R, ["realBoundary", "class1Data", "class0Data", "errorData", "myF"], loc=[0.81, 0.2], ncol=1, numpoints=1, framealpha = 1)        
108     
109     if dim == 2:        
110         plt.xlim(0.0,2.0)
111         plt.ylim(0.0,2.0)
112         xx = np.arange(0, 2 + 0.2, 0.2)        
113         plt.plot(xx, [function(i,(0.5,0.5)) for i in xx], color = colors[0],label = "realBoundary")        
114         X,Y = np.meshgrid(xx, xx)
115         contour = plt.contour(X, Y, [ [ function((X[i,j],Y[i,j]), para) for j in range(11)] for i in range(11) ])
116         plt.clabel(contour, fontsize = 10,colors='k')
117         plt.scatter(class1[:,0], class1[:,1],color = colors[1], s = 2,label = "class1Data")        
118         plt.scatter(class0[:,0], class0[:,1],color = colors[2], s = 2,label = "class0Data")        
119         plt.scatter(errorP[:,0], errorP[:,1],color = colors[3], s = 8,label = "errorData")        
120         plt.text(1.48, 1.85, "realBoundary: -x + 2y = 1\nmyF(x,y) = " + str(round(para[0][0],2)) + " x + " + str(round(para[0][1],2)) + " y + " + str(round(para[1],2)) + "\n errorRatio = " + str(round(errorRatio,4)), \
121             size = 15, ha="center", va="center", bbox=dict(boxstyle="round", ec=(1., 0.5, 0.5), fc=(1., 1., 1.)))
122         R = [Rectangle((0,0),0,0, color = colors[k]) for k in range(4)]
123         plt.legend(R, ["realBoundary", "class1Data", "class0Data", "errorData"], loc=[0.81, 0.2], ncol=1, numpoints=1, framealpha = 1)     
124 
125     if dim == 3:        
126         ax = Axes3D(fig)
127         ax.set_xlim3d(0.0, 2.0)
128         ax.set_ylim3d(0.0, 2.0)
129         ax.set_zlim3d(0.0, 2.0)
130         ax.set_xlabel('X', fontdict={'size': 15, 'color': 'k'})
131         ax.set_ylabel('Y', fontdict={'size': 15, 'color': 'k'})
132         ax.set_zlabel('W', fontdict={'size': 15, 'color': 'k'})
133         v = [(0, 0, 0.5), (0, 0.5, 0), (1, 2, 0), (2, 2, 1.5), (2, 1.5, 2), (1, 0, 2)]
134         f = [[0,1,2,3,4,5          poly3d = [[v [i]135]]
for i in j] for j in f]
136         ax.add_collection3d(Poly3DCollection(poly3d, edgecolor = 'k', facecolors = colors[0]+[trans], linewidths=1))        
137         ax.scatter(class1[:,0], class1[:,1],class1[:,2], color = colors[1], s = 2, label = "class1")                       
138         ax.scatter(class0[:,0], class0[:,1],class0[:,2], color = colors[2], s = 2, label = "class0")                       
139         ax.scatter(errorP[:,0], errorP[:,1],errorP[:,2], color = colors[3], s = 8, label = "errorData")                
140         ax.text3D(1.62, 2, 2.35, "realBoundary: -3x + 2y +2z = 1\nmyF(x,y,z) = " + str(round(para[0][0],2)) + " x + " + \
141             str(round(para[0][1],2)) + " y + " + str(round(para[0][2],2)) + " z + " + str(round(para[1],2)) + "\n errorRatio = " + str(round(errorRatio,4)), \
142             size = 12, ha="center", va="center", bbox=dict(boxstyle="round", ec=(1, 0.5, 0.5), fc=(1, 1, 1)))
143         R = [Rectangle((0,0),0,0, color = colors[k]) for k in range(4)]
144         plt.legend(R, ["realBoundary", "class1Data", "class0Data", "errorData"], loc=[0.83, 0.1], ncol=1, numpoints=1, framealpha = 1)
145         
146     fig.savefig("R:\\dim" + str(dim) + ".png")
147     plt.close()        
148 
149 if __name__ == '__main__':
150     test(1)
151     test(2)    
152     test(3)
153     test(4)    

● output

dim = 1, errorRatio = 0.003
dim = 2, errorRatio = 0.0307
dim = 3, errorRatio = 0.0186
dim = 4, errorRatio = 0.0349

Guess you like

Origin www.cnblogs.com/cuancuancuanhao/p/11111014.html