基于线性回归的交叉路口车流量数据预测模型

菜鸟,刚入机器学习不久,数据是导师给的

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.cross_validation import KFold


def mape_cal(originalValue, predictValue, length):
    "mape值计算"
    temp = (originalValue - predictValue) / originalValue
    mape_cal = sum(pd.Series(temp).abs()) / length
    return mape_cal


data = pd.read_csv("east训练集.csv")
data2 = pd.read_csv("测试集(1).csv")

cols = data.shape[1]   # 获取列数

row = data.shape[0]

predictors = ['x1', 'x2', 'x3', 'x4', 'x5']

X_train = data.values[:, 0:cols-1]  # 获取最后一列之前的所有数据
y_train = data.values[:, cols-1:cols]
X_test = data2.values[:, 0:cols-1]  # 获取最后一列之前的所有数据
y_test = data2.values[:, cols-1:cols]

lr = LinearRegression()

kf = KFold(row, n_folds=3, random_state=1)
predictions = []
# kf:一个存储所有随机组合的数据集的列表
for train, test in kf:
    # 取训练集的特征数据
    train_predictors = (data[predictors].loc[train, :])
    # 取训练集的标记数据
    train_target = data['y'].loc[train]
    # 模型训练
    lr.fit(train_predictors, train_target)
    # 预测并返回预测值
    test_predictions = lr.predict(data2[predictors])
    predictions.append(test_predictions)

y_predict = np.array(predictions).mean(axis=0)
print(y_predict)

#模型拟合测试集
print("-----预测结果-----")
# y_predict = lr.predict(X_test)
print("均方差MSE:",metrics.mean_squared_error(y_test, y_predict))
# 用scikit-learn计算均方根差RMSE
print("均根方差RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_predict)))
print("MAE:", metrics.mean_absolute_error(y_test, y_predict))


mapeValue = mape_cal(y_test.ravel(), y_predict.ravel(), X_test.shape[0])
print("mape值:", mapeValue)

temp = pd.Series(y_predict.ravel())
temp.to_csv('test2.csv')

fig = plt.figure(figsize=(15, 7))

ax1 = fig.add_subplot(2, 1, 1)
ax1.scatter(y_test, y_predict, linewidths=4, c='blue')
ax1.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=4, c='red')
ax1.set_xlabel('Measured')
ax1.set_ylabel('Predicted')

ax3 = fig.add_subplot(2, 1, 2)
ax3.plot(range(X_test.shape[0]), y_predict, c='red', label='predictValue', lw=2)
ax3.plot(range(X_test.shape[0]), y_test, c='blue', label='originalValue', lw=2)
ax3.legend(loc='upper left')

plt.show()

  预测结果:

-----预测结果-----
均方差MSE: 1413.927330961531
均根方差RMSE: 37.60222507992753
MAE: 30.085778687356473
mape值: 0.18161724802211374

  

猜你喜欢

转载自www.cnblogs.com/octopuszy/p/9657171.html
今日推荐