一元线性回归实例

 
 
#coding:utf-8

#一元线性回归实例

import sys
import numpy as np
from sklearn import linear_model
import matplotlib.pyplot as plt
import sklearn.metrics as sm #计算误差的模块
import cPickle as pickle

#获取数据
def getXY():
    filename = "D:\\develop\\python_workspace\\test\\datas\\data_singlevar.txt"
    X = []
    Y = []
    with open(filename,'r') as f:
        for line in f.readlines():
            xt,yt = [float(i) for i in line.split(',')]
            X.append(xt)
            Y.append(yt)
    return X,Y

#把上面获取到数据的80%作为训练数据
def getTrainData(X,Y):
    num_training = int(0.8*len(X))
    X_training = np.array(X[:num_training]).reshape((num_training,1))
    Y_training = np.array(Y[:num_training])
    return X_training,Y_training

#把上面获取到数据的20%作为测试数据
def getTestData(X,Y):
    num_training = int(0.8 * len(X))
    num_test = len(X) - num_training
    X_test = np.array(X[num_training:]).reshape((num_test,1))
    Y_test = np.array(Y[num_training:])
    return X_test,Y_test

#获取模型函数
def getPredict(X_train,Y_train):
    linear_regressor = linear_model.LinearRegression()
    linear_regressor.fit(X_train,Y_train)
    Y_predict = linear_regressor.predict(X_train)
    return linear_regressor,Y_predict

#画出训练回归直线
def showLR(X_train,Y_train,Y_train_predict):
    plt.figure()
    plt.scatter(X_train,Y_train,color="green")
    plt.plot(X_train,Y_train_predict,color="red",linewidth=4)
    plt.title("Training data")
    plt.show()

#测试模型
def getTestPredict(linear_regressor,X_test):
    Y_test_predict = linear_regressor.predict(X_test)
    return Y_test_predict

#画出测试回归直线
def showTestLR(X_test,Y_test,Y_test_predict):
    plt.scatter(X_test,Y_test,color="green")
    plt.plot(X_test,Y_test_predict,color="red",linewidth=4)
    plt.title("Test Data")
    plt.show()

#计算回归准确性,求误差error大小
def getError(Y_test,Y_test_predict):
    #平均绝对误差
    mean_absolute_error = round(sm.mean_absolute_error(Y_test,Y_test_predict),2)
    print("mean_absolute_error : ",mean_absolute_error)

    #均方误差
    mean_squared_error = round(sm.mean_squared_error(Y_test,Y_test_predict),2)
    print("mean_squared_error : ", mean_squared_error)

    #中位数绝对误差
    median_absolute_error = round(sm.median_absolute_error(Y_test,Y_test_predict),2)
    print("median_absolute_error : ", median_absolute_error)

    #解释方差
    explained_variance_score = round(sm.explained_variance_score(Y_test,Y_test_predict),2)
    print("explained_variance_score : ", explained_variance_score)

    #R方
    R2_score = round(sm.r2_score(Y_test,Y_test_predict),2)
    print("R2_score : ", R2_score)

#保存模型
def saveLR(linear_regressor):
    out_model_file = "D:\\develop\\python_workspace\\test\\datas\\saved_model.pkl"
    with open(out_model_file,'w') as f:
        pickle.dump(linear_regressor,f)

#加载模型
def loadLR():
    model_file = "D:\\develop\\python_workspace\\test\\datas\\saved_model.pkl"
    with open(model_file,'r') as f:
        model_linregr = pickle.load(f)
    return model_linregr


if __name__ == "__main__":
    X,Y = getXY()
    X_train,Y_train = getTrainData(X,Y)
    X_test,Y_test = getTestData(X,Y)
    linear_regressor,Y_train_predict = getPredict(X_train,Y_train)
    #showLR(X_train,Y_train,Y_train_predict)
    Y_test_predict = getTestPredict(linear_regressor,X_test)
    #showTestLR(X_test,Y_test,Y_test_predict)
    getError(Y_test,Y_test_predict)
    saveLR(linear_regressor)
    model_linear = loadLR()








 

猜你喜欢

转载自blog.csdn.net/u012592062/article/details/78377866