import numpy as np
import matplotlib.pyplot as plt
def loadDataSet():
dataMat = []; labelMat = []
fr = open('testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return dataMat,labelMat
def sigmoid(inX):
return 1.0/(1+np.exp(-inX))
def gradAscent(dataMatIn,classLabels):
dataMatrix=np.mat(dataMatIn)
labelMat=np.mat(classLabels).transpose()
m,n=np.shape(dataMatrix)
alpha=0.001
maxCycles=500
weigths=np.ones((n,1))
for k in range(maxCycles):
h=sigmoid(np.dot(dataMatrix,weigths))
error=(labelMat-h)
weigths=weigths+alpha*dataMatrix.transpose()*error
return weigths
def plotBestFit(weights):
dataMat, labelMat=loadDataSet()
dataArr=np.array(dataMat)
n=np.shape(dataArr)[0]
xcord1=[];ycord1=[]
xcord2=[];ycord2=[]
m=np.shape(dataArr)[0]
for i in range(m):
if int(labelMat[i])==1:
xcord1.append(dataArr[i,1]);ycord1.append(dataArr[i,2])
else:
xcord2.append(dataArr[i,1]);ycord2.append(dataArr[i,2])
fig=plt.figure()
ax=fig.add_subplot(111)
ax.scatter(xcord1,ycord1,s=30,c='orange',marker='s')
ax.scatter(xcord2,ycord2,s=30)
x=np.mat(np.arange(-3.0,3.0,0.1)).tolist()
y=np.mat((-weights[0]-weights[1]*x)/weights[2]).tolist()
ax.plot(x[0],y[0])
plt.xlabel('x1');plt.ylabel('x2')
plt.show()
if __name__=='__main__':
dataMat, labelMat=loadDataSet()
plotBestFit(gradAscent(dataMat,labelMat))
最佳拟合直线如下所示: