菜鸟,刚入机器学习不久,数据是导师给的
import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn import metrics from sklearn.cross_validation import KFold def mape_cal(originalValue, predictValue, length): "mape值计算" temp = (originalValue - predictValue) / originalValue mape_cal = sum(pd.Series(temp).abs()) / length return mape_cal data = pd.read_csv("east训练集.csv") data2 = pd.read_csv("测试集(1).csv") cols = data.shape[1] # 获取列数 row = data.shape[0] predictors = ['x1', 'x2', 'x3', 'x4', 'x5'] X_train = data.values[:, 0:cols-1] # 获取最后一列之前的所有数据 y_train = data.values[:, cols-1:cols] X_test = data2.values[:, 0:cols-1] # 获取最后一列之前的所有数据 y_test = data2.values[:, cols-1:cols] lr = LinearRegression() kf = KFold(row, n_folds=3, random_state=1) predictions = [] # kf:一个存储所有随机组合的数据集的列表 for train, test in kf: # 取训练集的特征数据 train_predictors = (data[predictors].loc[train, :]) # 取训练集的标记数据 train_target = data['y'].loc[train] # 模型训练 lr.fit(train_predictors, train_target) # 预测并返回预测值 test_predictions = lr.predict(data2[predictors]) predictions.append(test_predictions) y_predict = np.array(predictions).mean(axis=0) print(y_predict) #模型拟合测试集 print("-----预测结果-----") # y_predict = lr.predict(X_test) print("均方差MSE:",metrics.mean_squared_error(y_test, y_predict)) # 用scikit-learn计算均方根差RMSE print("均根方差RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_predict))) print("MAE:", metrics.mean_absolute_error(y_test, y_predict)) mapeValue = mape_cal(y_test.ravel(), y_predict.ravel(), X_test.shape[0]) print("mape值:", mapeValue) temp = pd.Series(y_predict.ravel()) temp.to_csv('test2.csv') fig = plt.figure(figsize=(15, 7)) ax1 = fig.add_subplot(2, 1, 1) ax1.scatter(y_test, y_predict, linewidths=4, c='blue') ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4, c='red') ax1.set_xlabel('Measured') ax1.set_ylabel('Predicted') ax3 = fig.add_subplot(2, 1, 2) ax3.plot(range(X_test.shape[0]), y_predict, c='red', label='predictValue', lw=2) ax3.plot(range(X_test.shape[0]), y_test, c='blue', label='originalValue', lw=2) ax3.legend(loc='upper left') plt.show()
预测结果:
-----预测结果----- 均方差MSE: 1413.927330961531 均根方差RMSE: 37.60222507992753 MAE: 30.085778687356473 mape值: 0.18161724802211374