【机器学习】求解逻辑回归参数（三种方法代码实现）

简单回顾

什么是逻辑回归？
逻辑回归有哪些作用？
如何求解逻辑回归的参数?

链接：
逻辑回归模型
 求解逻辑回归参数

数据准备

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(12)
num_observations = 5000

# 依据均值和协方差生成数据
# np.random.multivariate_normal方法用于根据实际情况生成一个多元正太分布矩阵
x1 = np.random.multivariate_normal([0, 0], [[1, .75],[.75, 1]], num_observations)
x2 = np.random.multivariate_normal([1, 4], [[1, .75],[.75, 1]], num_observations)


simulated_separableish_features = np.vstack((x1, x2)).astype(np.float32)
simulated_labels = np.hstack((np.zeros(num_observations),np.ones(num_observations)))

plt.figure(figsize=(12,8))
plt.scatter(simulated_separableish_features[:, 0], simulated_separableish_features[:, 1],
            c = simulated_labels, alpha = .4)

这里写图片描述

方法一：用sklearn自带函数求解逻辑回归参数

# 1. 引用
from sklearn.linear_model import LogisticRegression

# 2. 调整参数
clf = LogisticRegression(fit_intercept=True, C = 1e15)

# 3. 拟合，训练
clf.fit(simulated_separableish_features, simulated_labels)

# 4. 预测
##  这里不用预测 

print(clf.coef_,clf.intercept_,)

这里写图片描述

方法二：使用梯度下降法求解逻辑回归参数

# 逻辑回归
def sigmoid_eg(x1,x2, theta_1, theta_2,theta_3):                                                        
    z = (theta_1*x1+ theta_2*x2+theta_3).astype("float_")                                              
    return 1.0 / (1.0 + np.exp(-z)) 

def gradient_eg(x1,x2, y, theta_1, theta_2,theta_3):                                                         
    sigmoid_probs = sigmoid_eg(x1,x2,theta_1, theta_2,theta_3)                                        
    return 1/len(y)*np.sum((y - sigmoid_probs)*x1), 1/len(y)*np.sum((y - sigmoid_probs)*x2), 1/len(y)*np.sum((y - sigmoid_probs))       

def GradDe_eg(x1,x2,y,Max_Loop=20, alpha=0.1):
    #alpha = 0.00000001
    #Max_Loop = 200
    # 初始值
    theta_1 = 0.1
    theta_2 = -0.4
    theta_3 = 0.56
    for l in range(Max_Loop):
        delta1,delta2,delta3 = gradient_eg(x1,x2, y, theta_1, theta_2,theta_3)
        theta_1 = theta_1 + alpha*delta1
        theta_2 = theta_2 + alpha*delta2
        theta_3 = theta_3 + alpha*delta3
        if l%1000==0:
            print('delta%d ='%(l),[delta1,delta2,delta3])
            print('theta%d ='%(l),[theta_1, theta_2,theta_3],'\n')
    return [theta_1, theta_2,theta_3]

weights = GradDe_eg(simulated_separableish_features[:,0],simulated_separableish_features[:,1],simulated_labels,200000,0.9)

方法三：牛顿法

def sigmoid_eg(x1,x2, theta_1, theta_2,theta_3):                                                        
    z = (theta_1*x1+ theta_2*x2+theta_3).astype("float_")                                              
    return 1.0 / (1.0 + np.exp(-z)) 

def Cost(x1,x2, y, theta_1, theta_2,theta_3):                                                                
    sigmoid_probs = sigmoid_eg(x1,x2, theta_1, theta_2,theta_3)                                        
    return -np.mean(y * np.log(sigmoid_probs) + (1 - y) * np.log(1 - sigmoid_probs)) 

def gradient_nt(x1,x2, y, theta_1, theta_2,theta_3):                                                         
    sigmoid_probs = sigmoid_eg(x1,x2, theta_1, theta_2,theta_3)                                        
    return np.array([np.sum((y - sigmoid_probs) * x1),                        
                     np.sum((y - sigmoid_probs) * x2),
                     np.sum(y - sigmoid_probs)])  
# 三阶海塞矩阵求解公式                     
def hessian(x1,x2, y, theta_1, theta_2,theta_3):                                                          
    sigmoid_probs = sigmoid_eg(x1,x2, theta_1,theta_2,theta_3)                                        
    d1 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 * x1)                  
    d2 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 * x2)                  
    d3 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 )                  
    d4 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 * x1)                  
    d5 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 * x2)                  
    d6 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 )                  
    d7 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x1 )                  
    d8 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x2 )                  
    d9 = np.sum((sigmoid_probs * (1 - sigmoid_probs)))                  
    H = np.array([[d1, d2,d3],[d4, d5,d6],[d7,d8,d9]])                                           
    return H 

theta_1 = 0.001                                                                     
theta_2 = -0.4 
theta_3 = 0.6
sigmoid_probs = sigmoid_eg(simulated_separableish_features[:,0],simulated_separableish_features[:,1],theta_1,theta_2,theta_3)                                        
sigmoid_probs

def newtons_method(x1,x2, y):                                                             
    # Initialize Cost & parameters                                                                   
    theta_1 = 0.001                                                                     
    theta_2 = -0.4 
    theta_3 = 0.6
    delta_l = np.Infinity                                                                
    l = Cost(x1,x2, y, theta_1, theta_2,theta_3)                                                                 
    # Convergence Conditions                                                        
    δ = .0000000001                                                                 
    max_iterations = 15                                                            
    i = 0                                                                           
    while abs(delta_l) > δ and i < max_iterations:                                       
        i += 1                                                                      
        g = gradient_nt(x1,x2, y, theta_1, theta_2,theta_3)                                                      
        hess = hessian(x1,x2, y, theta_1, theta_2,theta_3)                                                 
        H_inv = np.linalg.inv(hess)                                                 
        # @ is syntactic sugar for np.dot(H_inv, g.T)¹
        delta = H_inv @ g.T                                                             
        delta_theta_1 = delta[0]                                                             
        delta_theta_2 = delta[1]  
        delta_theta_3 = delta[2]
        print(theta_1,theta_2,theta_3,l,g)

        # Perform our update step                                                    
        theta_1 += delta_theta_1                                                                 
        theta_2 += delta_theta_2   
        theta_3 += delta_theta_3

        # Update the log-likelihood at each iteration                                     
        l_new = Cost(x1,x2, y, theta_1, theta_2,theta_3)                                                      
        delta_l = l - l_new                                                           
        l = l_new                                                                
    return np.array([theta_1, theta_2,theta_3])   
weights_nt = newtons_method(simulated_separableish_features[:,0],simulated_separableish_features[:,1],simulated_labels)

三种方法求得参数对比

这里写图片描述

从上面结果可以看出来，三种方法求出的参数都差不多。如果想看看哪种求解比较快，可以适当调整代码。

注:
里面的函数定义我未给出注释，但都是由公式定义而来，可以根据这里的公式，对照着看
文中难免会有错误，还望海涵，博主也在努力学习中。。。

【机器学习】求解逻辑回归参数（三种方法代码实现）

简单回顾

数据准备

方法一：用sklearn自带函数求解逻辑回归参数

方法二：使用梯度下降法求解逻辑回归参数

方法三：牛顿法

三种方法求得参数对比

猜你喜欢