Python实现LogisticRegression

数据集:

链接:https://pan.baidu.com/s/1Hp8MYa6Q6QBzexlJ2z9mDw&shfl=sharepset 
提取码:azbe 

引入库,加载文件

 1 import matplotlib.pyplot as plt
 2 import numpy as np
 3 import pandas as pd
 4 from scipy import optimize
 5 
 6 def loadtxtAndcsv(fileName,split,datatype):
 7     return np.loadtxt(fileName,delimiter=split,dtype= datatype)
 8 
 9 def loadnpy(fileName):
10     return np.load(fileName)

画图函数

def plotData(X,y):
    pos = np.where(y == 1);
    neg = np.where(y == 0);
    
    plt.plot(X[pos,0],X[pos,1],'+')
    plt.plot(X[neg,0],X[neg,1],'o')
    plt.title("Two kinds of data's comparison")
    plt.show()

逻辑回归Sigmoid函数,后面在神经网络中,作为激活函数:

def sigmoid(z):
    return 1/(1+np.exp(-z))

代价函数和梯度的计算:

def costFunction(initial_theta,X,y,inital_lambda):    
  m = len(y)

  J
= 0 h = sigmoid(np.dot(X,initial_theta)) # 计算h(z) theta1 = initial_theta.copy() # 因为正则化j=1从1开始,不包含0,所以复制一份,前theta(0)值为0 theta1[0] = 0 temp = np.dot(np.transpose(theta1),theta1) J = (-np.dot(np.transpose(y),np.log(h))-np.dot(np.transpose(1-y),np.log(1-h))+temp*inital_lambda/2)/m
    # 正则化的代价方程 return J # 计算梯度 def gradient(initial_theta,X,y,inital_lambda): m = len(y) grad = np.zeros((initial_theta.shape[0])) h = sigmoid(np.dot(X,initial_theta))# 计算h(z) theta1 = initial_theta.copy() theta1[0] = 0 grad = np.dot(np.transpose(X),h-y)/m+inital_lambda/m*theta1 #正则化的梯度 return grad

画出边界曲线:

def plotDecisionBoundary(theta,X,y):
    pos = np.where(y == 1);
    neg = np.where(y == 0);
    
    plt.plot(X[pos,0],X[pos,1],'+')
    plt.plot(X[neg,0],X[neg,1],'o')
    plt.title("DecisionBoundary")
    plot_x = np.array([np.min(X[:,1])-2,np.max(X[:,1])+2])
   #只需要2个点来定义一条线,所以选择两个端点 plot_y = (-1/theta[2])*(theta[1]*plot_x + theta[0]) #计算决策边界线 plt.plot(plot_x, plot_y) plt.show()

预测:

def predict(X,theta):
    m = X.shape[0]
    p = np.zeros((m,1))
    p = sigmoid(np.dot(X,theta))    # 预测的结果,是个概率值
    
    for i in range(m):
        if p[i] > 0.5:  #概率大于0.5预测为1,否则预测为0
            p[i] = 1
        else:
            p[i] = 0
    return p

主函数:

def LogisticRegression():
    data = loadtxtAndcsv("ex2data1.txt",",",np.float64);
    X = data[:,0:-1]
    y = data[:,-1]
    y = np.transpose(y)
    print("得到数据的图像")
    plotData(X,y) 
    
    
    [m,n] = np.shape(X)
    X = np.hstack((np.ones((m,1)),X))
    initial_theta = np.zeros((n+1,1))
    cost = costFunction(initial_theta,X,y,0)
    grad = gradient(initial_theta,X,y,0)
    result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,y,0))    
    
    p = (predict(X,result)) #预测结果值
    print(u'在训练集上的准确度为%f%%'%np.mean(np.float64(p==y)*100))
    
    X = data[:,0:-1]        #画出决策边界
    y = data[:,-1] 
    plotDecisionBoundary(result,X,y)
    
if __name__ == "__main__":
    LogisticRegression()

猜你喜欢

转载自www.cnblogs.com/boniface/p/11705049.html
今日推荐