手写逻辑回归算法

import numpy as np
#数据集读取函数
def get_data(file_name,sep='\t'):
    data_open = open(file_name)
    lines = data_open.readlines()
    data = []    
    for line in lines:
        line = line.replace('\n','')
        line = line.split(sep)
        a=[]        
        for value in line:
            value = float(value)
            a.append(value)
        data.append(a)    
    return np.array(data)
#训练集读取
horse_train = get_data(u'...\\horseColicTraining.txt')
horse_train_x = horse_train[:,range(horse_train.shape[1]-1)]
horse_train_y = horse_train[:,horse_train.shape[1]-1]
#测试集读取
horse_test = get_data(u'...\\horseColicTest.txt')
horse_test_x = horse_test[:,range(horse_test.shape[1]-1)]
horse_test_y = horse_test[:,horse_test.shape[1]-1]

#梯度上升求解LR#L= (Yln(1/(1+exp(-XK)))+(1-Y)*ln(1-1/(1+exp(-XK)))) ==> d(L)/d(K) = X.T•(Y-1/(1+exp(-XK)))#K_n+1 = K_n + d(L)/d(K_n) * s
class Logistic_Regression():
    def log_fun(self,x): #sigmoid函数
        return 1/(1+np.exp(-x))        
    def fit_gd(self,x,y,iter=1000,s=0.01): #逻辑回归参数求解,iter为迭代次数，s为学习率，？e_break = 0.0001
        x = np.matrix(np.hstack((np.ones((x.shape[0],1)),x)))#在每个样本参数前加上β=1
        length = x.shape[1]
        K_init = np.matrix(np.random.rand(length)-0.5).T #初始化系数
        y = np.matrix(y).T
        i = 1
        while i <= iter:#梯度上升迭代
            g = x.T.dot(np.array((y - self.log_fun(x.dot(K_init)))))
            K_init = K_init + g*s
            i += 1
        self.a = K_init
        return self.a
    def predict_prob(self,x): #预测函数（概率）
        return np.array(self.log_fun(x.dot(self.a[1:,:])+self.a[0,0]).T)[0,:]    
    def predict_type(self,x,thre_var=0.5): #预测函数（类别）
        type_pre = self.predict_prob(x)        
        for i in range(type_pre.shape[0]):            
            if type_pre[i] > thre_var:
                type_pre[i] = 1
            else:
                type_pre[i] = 0
        #type_pre = np.array([1 if i > thre_var else 0 for i in type_pre])
        return type_pre        
    def predict_accuracy(self,x,y,thre_var=0.5): #预测准确度函数
        y_pre = self.predict_type(x,thre_var)
        accuracy = sum(y_pre==y)/float(y.shape[0])        
        return accuracy


logistic_model = Logistic_Regression() #实例化模型对象
logistic_model.fit_gd(horse_train_x,horse_train_y,iter=10000,s=0.001) #模型训练
horse_pred = logistic_model.predict_type(horse_test_x) #预测
logistic_model.predict_accuracy(horse_test_x,horse_test_y) #准确度
手写逻辑回归算法

猜你喜欢