线性回归模型总结

先插入代码

 
 
 1 import numpy as np
 2 import matplotlib.pyplot as plt
 3 import pandas as pd
 4 from sklearn.model_selection import train_test_split
 5 from sklearn.linear_model import Lasso, Ridge
 6 from sklearn.model_selection import GridSearchCV
 7 
 8 
 9 if __name__ == "__main__":
10     # pandas读入
11     data = pd.read_csv('8.Advertising.csv')    # TV、Radio、Newspaper、Sales
12     x = data[['TV', 'Radio', 'Newspaper']]
13     # x = data[['TV', 'Radio']]
14     y = data['Sales']
15     print x
16     print y
17 
18     x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)
19     # print x_train, y_train
20     model = Lasso()
21     # model = Ridge()
22 
23     alpha_can = np.logspace(-3, 2, 10)
24     lasso_model = GridSearchCV(model, param_grid={'alpha': alpha_can}, cv=5)
25     lasso_model.fit(x, y)
26     print '验证参数:\n', lasso_model.best_params_
27 
28     y_hat = lasso_model.predict(np.array(x_test))
29     mse = np.average((y_hat - np.array(y_test)) ** 2)  # Mean Squared Error
30     rmse = np.sqrt(mse)  # Root Mean Squared Error
31     print mse, rmse
32 
33     t = np.arange(len(x_test))
34     plt.plot(t, y_test, 'r-', linewidth=2, label='Test')
35     plt.plot(t, y_hat, 'g-', linewidth=2, label='Predict')
36     plt.legend(loc='upper right')
37     plt.grid()
38     plt.show()

代码解析(以行号为基准)

  11行:读取csv数据,n行4列,(4列分别为TV、Radio、Newspaper、Sales)

  12行:选取(TV、Radio、Newspaper)这三个数据为特征量,

  14行:sales为对应的数值(公式:y(sales) 0x0 +  θ1x(tv) + θ2x(radio) + θ3x(np)

  

猜你喜欢

转载自www.cnblogs.com/bianjing/p/10039306.html