版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Elvirangel/article/details/84196304
问题:编程实现对数几率回归,并给出西瓜数据集3.0a上的结果
方法一:使用sklearn实现对数几率回归
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
data = np.array([[0.697, 0.460, 1],
[0.774, 0.376, 1],
[0.634, 0.264, 1],
[0.608, 0.318, 1],
[0.556, 0.215, 1],
[0.403, 0.237, 1],
[0.481, 0.149, 1],
[0.437, 0.211, 1],
[0.666, 0.091, 0],
[0.243, 0.267, 0],
[0.245, 0.057, 0],
[0.343, 0.099, 0],
[0.639, 0.161, 0],
[0.657, 0.198, 0],
[0.360, 0.370, 0],
[0.593, 0.042, 0],
[0.719, 0.103, 0]])
X=data[:,0:2]
Y=data[:,-1]
#绘制数据集
f1=plt.figure(1)
plt.title("watermelon_3a")
plt.xlabel("密度")
plt.ylabel("含糖量")
plt.scatter(X[Y==0,0],X[Y==0,1],marker='o',color='k',s=100,label='bad')
plt.scatter(X[Y==1,0],X[Y==1,1],marker='o',color='g',s=100,label='good')
plt.legend(loc='upper right')
plt.show()
#使用sklearn
X_train,X_test,Y_train,Y_test=model_selection.train_test_split(X,Y,test_size = 0.5,random_state = 0)
log_model=LogisticRegression()
log_model.fit(X_train,Y_train)
Y_pred=log_model.predict(X_test)
print(metrics.confusion_matrix(Y_test,Y_pred))
print(metrics.classification_report(Y_test,Y_pred))
log_model.fit(X,Y)
Y_pred=log_model.predict(X)
print(Y_pred)
right=0
for i in range(17):
if Y_pred[i]==Y[i]:
right+=1
print("accuect is :",right/17*100)
方法二:自己用python实现对数几率回归
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
data = np.array([[0.697, 0.460, 1],
[0.774, 0.376, 1],
[0.634, 0.264, 1],
[0.608, 0.318, 1],
[0.556, 0.215, 1],
[0.403, 0.237, 1],
[0.481, 0.149, 1],
[0.437, 0.211, 1],
[0.666, 0.091, 0],
[0.243, 0.267, 0],
[0.245, 0.057, 0],
[0.343, 0.099, 0],
[0.639, 0.161, 0],
[0.657, 0.198, 0],
[0.360, 0.370, 0],
[0.593, 0.042, 0],
[0.719, 0.103, 0]])
X=data[:,0:2]
Y=data[:,-1]
#绘制数据集
f1=plt.figure(1)
plt.title("watermelon_3a")
plt.xlabel("密度")
plt.ylabel("含糖量")
plt.scatter(X[Y==0,0],X[Y==0,1],marker='o',color='k',s=100,label='bad')
plt.scatter(X[Y==1,0],X[Y==1,1],marker='o',color='g',s=100,label='good')
plt.legend(loc='upper right')
plt.show()
# 批量梯度下降法
def logit(beta,x,y):
#print(np.shape(beta))
#print(np.shape(x))
l=-y*(np.dot(beta.T,x))+np.math.log(1+np.math.exp(np.dot(beta.T,x)))
return l
def logitFunction(beta,X,Y):
m,n=np.shape(X)
sum=0
for i in range(m):
sum+=logit(beta,X[i],Y[i])
return sum
def gradDescent(X,Y):
h=0.1
max_iters=500
m,n=np.shape(X)
beta=np.zeros(n)
deta_beta=np.ones(n)
llh=0
llh_temp=0
old_beta=beta
for i in range(max_iters):
beta_temp=beta
for j in range(n):
beta[j]+=deta_beta[j]
llh_temp=logitFunction(beta,X,Y)
deta_beta[j]=-h*(llh_temp-llh)/deta_beta[j]
beta[j]=beta_temp[j]
beta+=deta_beta
llh=logitFunction(beta,X,Y)
old_beta=np.row_stack((old_beta,beta))
return beta,old_beta
if __name__=='__main__':
X=np.column_stack((X,np.ones(np.shape(X)[0])))
#X = np.mat(X)
# Y = np.mat(Y)
#print(X)
#print(Y)
beta,old_beta=gradDescent(X,Y)
print(beta)
#绘制beta[0],beta[1],beta[2](=b)的迭代图像
f2=plt.figure(2)
m,n=np.shape(old_beta)
print(m,n)
print(old_beta)
plt.subplot(311)
plt.plot(np.arange(0,m-1,1),old_beta[1:,0])
plt.xlabel("deta1")
plt.ylabel("iters")
plt.subplot(312)
plt.plot(np.arange(0,m-1,1),old_beta[1:,1])
plt.xlabel("deta2")
plt.ylabel("iters")
plt.subplot(313)
plt.plot(np.arange(0,m-1,1),old_beta[1:,2])
plt.xlabel("b")
plt.ylabel("iters")
plt.show()
#测试对数几率回归模型,计算精度
Y_pre=np.zeros((17,1))
for i in range(17):
Y_pre[i]=1/(1+np.math.exp(-1*(np.dot(beta,X[i]))))
if(Y_pre[i]>0.5):
Y_pre[i]=1
else:
Y_pre[i]=0
print(Y_pre)
right=0
for i in range(17):
if Y_pre[i]==Y[i]:
right+=1
print("accuect is :",right/17*100)
#画出决策边界,最佳拟合直线
#令 beta[0]*x1+beta[1]*x2+b=0,得出x1和x2的关系,在图中显示
# 由sigmod函数的性质,1/(1+exp(-z)), z=0是两个类别的分界处
f3 = plt.figure(3)
plt.title("watermelon_3a")
plt.xlabel("密度")
plt.ylabel("含糖量")
plt.scatter(X[Y == 0, 0], X[Y == 0, 1], marker = 'o', color = 'k', s = 100, label = 'bad')
plt.scatter(X[Y == 1, 0], X[Y == 1, 1], marker = 'o', color = 'g', s = 100, label = 'good')
plt.legend(loc = 'upper right')
xcord=np.arange(0,1,0.001)
ycord= (-beta[0]*X - beta[2])/beta[1]
plt.plot(X, ycord)
plt.xlabel("x1")
plt.ylabel("x2")
plt.show()
方法二的运行结果:
beta的值如下:beta[0]、beta[1]、beta[2](=参数b)