In [8]:
# Importing the libraries 导入库
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# 使图像能够调整
%matplotlib notebook
#中文字体显示
plt.rc('font', family='SimHei', size=8)
In [38]:
# 根据各项开支预测利润
dataset = pd.read_csv('./50_Startups.csv')
X = dataset.iloc[:, :-1].values # 选取自变量
y = dataset.iloc[:, 4].values # 选取因变量
dataset
Out[38]:
R&D Spend | Administration | Marketing Spend | State | Profit | |
---|---|---|---|---|---|
0 | 165349.20 | 136897.80 | 471784.10 | New York | 192261.83 |
1 | 162597.70 | 151377.59 | 443898.53 | California | 191792.06 |
2 | 153441.51 | 101145.55 | 407934.54 | Florida | 191050.39 |
3 | 144372.41 | 118671.85 | 383199.62 | New York | 182901.99 |
4 | 142107.34 | 91391.77 | 366168.42 | Florida | 166187.94 |
5 | 131876.90 | 99814.71 | 362861.36 | New York | 156991.12 |
6 | 134615.46 | 147198.87 | 127716.82 | California | 156122.51 |
7 | 130298.13 | 145530.06 | 323876.68 | Florida | 155752.60 |
8 | 120542.52 | 148718.95 | 311613.29 | New York | 152211.77 |
9 | 123334.88 | 108679.17 | 304981.62 | California | 149759.96 |
10 | 101913.08 | 110594.11 | 229160.95 | Florida | 146121.95 |
11 | 100671.96 | 91790.61 | 249744.55 | California | 144259.40 |
12 | 93863.75 | 127320.38 | 249839.44 | Florida | 141585.52 |
13 | 91992.39 | 135495.07 | 252664.93 | California | 134307.35 |
14 | 119943.24 | 156547.42 | 256512.92 | Florida | 132602.65 |
15 | 114523.61 | 122616.84 | 261776.23 | New York | 129917.04 |
16 | 78013.11 | 121597.55 | 264346.06 | California | 126992.93 |
17 | 94657.16 | 145077.58 | 282574.31 | New York | 125370.37 |
18 | 91749.16 | 114175.79 | 294919.57 | Florida | 124266.90 |
19 | 86419.70 | 153514.11 | 0.00 | New York | 122776.86 |
20 | 76253.86 | 113867.30 | 298664.47 | California | 118474.03 |
21 | 78389.47 | 153773.43 | 299737.29 | New York | 111313.02 |
22 | 73994.56 | 122782.75 | 303319.26 | Florida | 110352.25 |
23 | 67532.53 | 105751.03 | 304768.73 | Florida | 108733.99 |
24 | 77044.01 | 99281.34 | 140574.81 | New York | 108552.04 |
25 | 64664.71 | 139553.16 | 137962.62 | California | 107404.34 |
26 | 75328.87 | 144135.98 | 134050.07 | Florida | 105733.54 |
27 | 72107.60 | 127864.55 | 353183.81 | New York | 105008.31 |
28 | 66051.52 | 182645.56 | 118148.20 | Florida | 103282.38 |
29 | 65605.48 | 153032.06 | 107138.38 | New York | 101004.64 |
30 | 61994.48 | 115641.28 | 91131.24 | Florida | 99937.59 |
31 | 61136.38 | 152701.92 | 88218.23 | New York | 97483.56 |
32 | 63408.86 | 129219.61 | 46085.25 | California | 97427.84 |
33 | 55493.95 | 103057.49 | 214634.81 | Florida | 96778.92 |
34 | 46426.07 | 157693.92 | 210797.67 | California | 96712.80 |
35 | 46014.02 | 85047.44 | 205517.64 | New York | 96479.51 |
36 | 28663.76 | 127056.21 | 201126.82 | Florida | 90708.19 |
37 | 44069.95 | 51283.14 | 197029.42 | California | 89949.14 |
38 | 20229.59 | 65947.93 | 185265.10 | New York | 81229.06 |
39 | 38558.51 | 82982.09 | 174999.30 | California | 81005.76 |
40 | 28754.33 | 118546.05 | 172795.67 | California | 78239.91 |
41 | 27892.92 | 84710.77 | 164470.71 | Florida | 77798.83 |
42 | 23640.93 | 96189.63 | 148001.11 | California | 71498.49 |
43 | 15505.73 | 127382.30 | 35534.17 | New York | 69758.98 |
44 | 22177.74 | 154806.14 | 28334.72 | California | 65200.33 |
45 | 1000.23 | 124153.04 | 1903.93 | New York | 64926.08 |
46 | 1315.46 | 115816.21 | 297114.46 | Florida | 49490.75 |
47 | 0.00 | 135426.92 | 0.00 | California | 42559.73 |
48 | 542.05 | 51743.15 | 0.00 | New York | 35673.41 |
49 | 0.00 | 116983.80 | 45173.06 | California | 14681.40 |
In [39]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X = LabelEncoder()
X[:, 3] = labelencoder_X.fit_transform(X[:, 3]) # 将地区变为数字
onehotencoder = OneHotEncoder(categorical_features = [3])
X = onehotencoder.fit_transform(X).toarray() # 将地区变为虚拟变量
X
Out[39]:
array([[ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.65349200e+05, 1.36897800e+05, 4.71784100e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.62597700e+05, 1.51377590e+05, 4.43898530e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.53441510e+05, 1.01145550e+05, 4.07934540e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.44372410e+05, 1.18671850e+05, 3.83199620e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.42107340e+05, 9.13917700e+04, 3.66168420e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.31876900e+05, 9.98147100e+04, 3.62861360e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.34615460e+05, 1.47198870e+05, 1.27716820e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.30298130e+05, 1.45530060e+05, 3.23876680e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.20542520e+05, 1.48718950e+05, 3.11613290e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.23334880e+05, 1.08679170e+05, 3.04981620e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.01913080e+05, 1.10594110e+05, 2.29160950e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00671960e+05, 9.17906100e+04, 2.49744550e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 9.38637500e+04, 1.27320380e+05, 2.49839440e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 9.19923900e+04, 1.35495070e+05, 2.52664930e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.19943240e+05, 1.56547420e+05, 2.56512920e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.14523610e+05, 1.22616840e+05, 2.61776230e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.80131100e+04, 1.21597550e+05, 2.64346060e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 9.46571600e+04, 1.45077580e+05, 2.82574310e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 9.17491600e+04, 1.14175790e+05, 2.94919570e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 8.64197000e+04, 1.53514110e+05, 0.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 7.62538600e+04, 1.13867300e+05, 2.98664470e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.83894700e+04, 1.53773430e+05, 2.99737290e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 7.39945600e+04, 1.22782750e+05, 3.03319260e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.75325300e+04, 1.05751030e+05, 3.04768730e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.70440100e+04, 9.92813400e+04, 1.40574810e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 6.46647100e+04, 1.39553160e+05, 1.37962620e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 7.53288700e+04, 1.44135980e+05, 1.34050070e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 7.21076000e+04, 1.27864550e+05, 3.53183810e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.60515200e+04, 1.82645560e+05, 1.18148200e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 6.56054800e+04, 1.53032060e+05, 1.07138380e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 6.19944800e+04, 1.15641280e+05, 9.11312400e+04], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 6.11363800e+04, 1.52701920e+05, 8.82182300e+04], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 6.34088600e+04, 1.29219610e+05, 4.60852500e+04], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 5.54939500e+04, 1.03057490e+05, 2.14634810e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.64260700e+04, 1.57693920e+05, 2.10797670e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 4.60140200e+04, 8.50474400e+04, 2.05517640e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 2.86637600e+04, 1.27056210e+05, 2.01126820e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.40699500e+04, 5.12831400e+04, 1.97029420e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 2.02295900e+04, 6.59479300e+04, 1.85265100e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 3.85585100e+04, 8.29820900e+04, 1.74999300e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.87543300e+04, 1.18546050e+05, 1.72795670e+05], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 2.78929200e+04, 8.47107700e+04, 1.64470710e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.36409300e+04, 9.61896300e+04, 1.48001110e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.55057300e+04, 1.27382300e+05, 3.55341700e+04], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.21777400e+04, 1.54806140e+05, 2.83347200e+04], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00023000e+03, 1.24153040e+05, 1.90393000e+03], [ 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.31546000e+03, 1.15816210e+05, 2.97114460e+05], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.35426920e+05, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 5.42050000e+02, 5.17431500e+04, 0.00000000e+00], [ 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.16983800e+05, 4.51730600e+04]])
In [40]:
X = X[:,1:]
X
Out[40]:
array([[ 0.00000000e+00, 1.00000000e+00, 1.65349200e+05, 1.36897800e+05, 4.71784100e+05], [ 0.00000000e+00, 0.00000000e+00, 1.62597700e+05, 1.51377590e+05, 4.43898530e+05], [ 1.00000000e+00, 0.00000000e+00, 1.53441510e+05, 1.01145550e+05, 4.07934540e+05], [ 0.00000000e+00, 1.00000000e+00, 1.44372410e+05, 1.18671850e+05, 3.83199620e+05], [ 1.00000000e+00, 0.00000000e+00, 1.42107340e+05, 9.13917700e+04, 3.66168420e+05], [ 0.00000000e+00, 1.00000000e+00, 1.31876900e+05, 9.98147100e+04, 3.62861360e+05], [ 0.00000000e+00, 0.00000000e+00, 1.34615460e+05, 1.47198870e+05, 1.27716820e+05], [ 1.00000000e+00, 0.00000000e+00, 1.30298130e+05, 1.45530060e+05, 3.23876680e+05], [ 0.00000000e+00, 1.00000000e+00, 1.20542520e+05, 1.48718950e+05, 3.11613290e+05], [ 0.00000000e+00, 0.00000000e+00, 1.23334880e+05, 1.08679170e+05, 3.04981620e+05], [ 1.00000000e+00, 0.00000000e+00, 1.01913080e+05, 1.10594110e+05, 2.29160950e+05], [ 0.00000000e+00, 0.00000000e+00, 1.00671960e+05, 9.17906100e+04, 2.49744550e+05], [ 1.00000000e+00, 0.00000000e+00, 9.38637500e+04, 1.27320380e+05, 2.49839440e+05], [ 0.00000000e+00, 0.00000000e+00, 9.19923900e+04, 1.35495070e+05, 2.52664930e+05], [ 1.00000000e+00, 0.00000000e+00, 1.19943240e+05, 1.56547420e+05, 2.56512920e+05], [ 0.00000000e+00, 1.00000000e+00, 1.14523610e+05, 1.22616840e+05, 2.61776230e+05], [ 0.00000000e+00, 0.00000000e+00, 7.80131100e+04, 1.21597550e+05, 2.64346060e+05], [ 0.00000000e+00, 1.00000000e+00, 9.46571600e+04, 1.45077580e+05, 2.82574310e+05], [ 1.00000000e+00, 0.00000000e+00, 9.17491600e+04, 1.14175790e+05, 2.94919570e+05], [ 0.00000000e+00, 1.00000000e+00, 8.64197000e+04, 1.53514110e+05, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 7.62538600e+04, 1.13867300e+05, 2.98664470e+05], [ 0.00000000e+00, 1.00000000e+00, 7.83894700e+04, 1.53773430e+05, 2.99737290e+05], [ 1.00000000e+00, 0.00000000e+00, 7.39945600e+04, 1.22782750e+05, 3.03319260e+05], [ 1.00000000e+00, 0.00000000e+00, 6.75325300e+04, 1.05751030e+05, 3.04768730e+05], [ 0.00000000e+00, 1.00000000e+00, 7.70440100e+04, 9.92813400e+04, 1.40574810e+05], [ 0.00000000e+00, 0.00000000e+00, 6.46647100e+04, 1.39553160e+05, 1.37962620e+05], [ 1.00000000e+00, 0.00000000e+00, 7.53288700e+04, 1.44135980e+05, 1.34050070e+05], [ 0.00000000e+00, 1.00000000e+00, 7.21076000e+04, 1.27864550e+05, 3.53183810e+05], [ 1.00000000e+00, 0.00000000e+00, 6.60515200e+04, 1.82645560e+05, 1.18148200e+05], [ 0.00000000e+00, 1.00000000e+00, 6.56054800e+04, 1.53032060e+05, 1.07138380e+05], [ 1.00000000e+00, 0.00000000e+00, 6.19944800e+04, 1.15641280e+05, 9.11312400e+04], [ 0.00000000e+00, 1.00000000e+00, 6.11363800e+04, 1.52701920e+05, 8.82182300e+04], [ 0.00000000e+00, 0.00000000e+00, 6.34088600e+04, 1.29219610e+05, 4.60852500e+04], [ 1.00000000e+00, 0.00000000e+00, 5.54939500e+04, 1.03057490e+05, 2.14634810e+05], [ 0.00000000e+00, 0.00000000e+00, 4.64260700e+04, 1.57693920e+05, 2.10797670e+05], [ 0.00000000e+00, 1.00000000e+00, 4.60140200e+04, 8.50474400e+04, 2.05517640e+05], [ 1.00000000e+00, 0.00000000e+00, 2.86637600e+04, 1.27056210e+05, 2.01126820e+05], [ 0.00000000e+00, 0.00000000e+00, 4.40699500e+04, 5.12831400e+04, 1.97029420e+05], [ 0.00000000e+00, 1.00000000e+00, 2.02295900e+04, 6.59479300e+04, 1.85265100e+05], [ 0.00000000e+00, 0.00000000e+00, 3.85585100e+04, 8.29820900e+04, 1.74999300e+05], [ 0.00000000e+00, 0.00000000e+00, 2.87543300e+04, 1.18546050e+05, 1.72795670e+05], [ 1.00000000e+00, 0.00000000e+00, 2.78929200e+04, 8.47107700e+04, 1.64470710e+05], [ 0.00000000e+00, 0.00000000e+00, 2.36409300e+04, 9.61896300e+04, 1.48001110e+05], [ 0.00000000e+00, 1.00000000e+00, 1.55057300e+04, 1.27382300e+05, 3.55341700e+04], [ 0.00000000e+00, 0.00000000e+00, 2.21777400e+04, 1.54806140e+05, 2.83347200e+04], [ 0.00000000e+00, 1.00000000e+00, 1.00023000e+03, 1.24153040e+05, 1.90393000e+03], [ 1.00000000e+00, 0.00000000e+00, 1.31546000e+03, 1.15816210e+05, 2.97114460e+05], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.35426920e+05, 0.00000000e+00], [ 0.00000000e+00, 1.00000000e+00, 5.42050000e+02, 5.17431500e+04, 0.00000000e+00], [ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.16983800e+05, 4.51730600e+04]])
In [41]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state = 0)
In [42]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train,y_train)
Out[42]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
In [43]:
y_pred = regressor.predict(X_test)
In [44]:
import statsmodels.formula.api as sm
X_train = np.append(arr = np.ones((40,1)),values = X_train,axis = 1) # 增加新的一列
X_opt = X_train[:,[0,1,2,3,4,5]]
regressor_OLS = sm.OLS(endog = y_train, exog = X_opt).fit()
regressor_OLS.summary()
Out[44]:
Dep. Variable: | y | R-squared: | 0.950 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.943 |
Method: | Least Squares | F-statistic: | 129.7 |
Date: | Sat, 14 Apr 2018 | Prob (F-statistic): | 3.91e-21 |
Time: | 23:08:24 | Log-Likelihood: | -421.10 |
No. Observations: | 40 | AIC: | 854.2 |
Df Residuals: | 34 | BIC: | 864.3 |
Df Model: | 5 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 4.255e+04 | 8358.538 | 5.091 | 0.000 | 2.56e+04 | 5.95e+04 |
x1 | -959.2842 | 4038.108 | -0.238 | 0.814 | -9165.706 | 7247.138 |
x2 | 699.3691 | 3661.563 | 0.191 | 0.850 | -6741.822 | 8140.560 |
x3 | 0.7735 | 0.055 | 14.025 | 0.000 | 0.661 | 0.886 |
x4 | 0.0329 | 0.066 | 0.495 | 0.624 | -0.102 | 0.168 |
x5 | 0.0366 | 0.019 | 1.884 | 0.068 | -0.003 | 0.076 |
Omnibus: | 15.823 | Durbin-Watson: | 2.468 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 23.231 |
Skew: | -1.094 | Prob(JB): | 9.03e-06 |
Kurtosis: | 6.025 | Cond. No. | 1.49e+06 |
In [45]:
X_opt = X_train [:, [0, 1, 3, 4, 5]]
regressor_OLS = sm.OLS(endog = y_train, exog = X_opt).fit()
regressor_OLS.summary()
Out[45]:
Dep. Variable: | y | R-squared: | 0.950 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.944 |
Method: | Least Squares | F-statistic: | 166.7 |
Date: | Sat, 14 Apr 2018 | Prob (F-statistic): | 2.87e-22 |
Time: | 23:08:48 | Log-Likelihood: | -421.12 |
No. Observations: | 40 | AIC: | 852.2 |
Df Residuals: | 35 | BIC: | 860.7 |
Df Model: | 4 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 4.292e+04 | 8020.397 | 5.352 | 0.000 | 2.66e+04 | 5.92e+04 |
x1 | -1272.1608 | 3639.780 | -0.350 | 0.729 | -8661.308 | 6116.986 |
x2 | 0.7754 | 0.053 | 14.498 | 0.000 | 0.667 | 0.884 |
x3 | 0.0319 | 0.065 | 0.488 | 0.629 | -0.101 | 0.165 |
x4 | 0.0363 | 0.019 | 1.902 | 0.065 | -0.002 | 0.075 |
Omnibus: | 16.074 | Durbin-Watson: | 2.467 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 24.553 |
Skew: | -1.086 | Prob(JB): | 4.66e-06 |
Kurtosis: | 6.164 | Cond. No. | 1.43e+06 |
In [46]:
X_opt = X_train [:, [0, 3, 4, 5]]
regressor_OLS = sm.OLS(endog = y_train, exog = X_opt).fit()
regressor_OLS.summary()
Out[46]:
Dep. Variable: | y | R-squared: | 0.950 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.946 |
Method: | Least Squares | F-statistic: | 227.8 |
Date: | Sat, 14 Apr 2018 | Prob (F-statistic): | 1.85e-23 |
Time: | 23:08:50 | Log-Likelihood: | -421.19 |
No. Observations: | 40 | AIC: | 850.4 |
Df Residuals: | 36 | BIC: | 857.1 |
Df Model: | 3 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 4.299e+04 | 7919.773 | 5.428 | 0.000 | 2.69e+04 | 5.91e+04 |
x1 | 0.7788 | 0.052 | 15.003 | 0.000 | 0.674 | 0.884 |
x2 | 0.0294 | 0.064 | 0.458 | 0.650 | -0.101 | 0.160 |
x3 | 0.0347 | 0.018 | 1.896 | 0.066 | -0.002 | 0.072 |
Omnibus: | 15.557 | Durbin-Watson: | 2.481 |
---|---|---|---|
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 22.539 |
Skew: | -1.081 | Prob(JB): | 1.28e-05 |
Kurtosis: | 5.974 | Cond. No. | 1.43e+06 |
In [47]:
X_opt = X_train [:, [0, 3, 5]]
regressor_OLS = sm.OLS(endog = y_train, exog = X_opt).fit()
regressor_OLS.summary()
Out[47]:
Dep. Variable: | y | R-squared: | 0.950 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.947 |
Method: | Least Squares | F-statistic: | 349.0 |
Date: | Sat, 14 Apr 2018 | Prob (F-statistic): | 9.65e-25 |
Time: | 23:08:53 | Log-Likelihood: | -421.30 |
No. Observations: | 40 | AIC: | 848.6 |
Df Residuals: | 37 | BIC: | 853.7 |
Df Model: | 2 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 4.635e+04 | 2971.236 | 15.598 | 0.000 | 4.03e+04 | 5.24e+04 |
x1 | 0.7886 | 0.047 | 16.846 | 0.000 | 0.694 | 0.883 |
x2 | 0.0326 | 0.018 | 1.860 | 0.071 | -0.003 | 0.068 |
Omnibus: | 14.666 | Durbin-Watson: | 2.518 |
---|---|---|---|
Prob(Omnibus): | 0.001 | Jarque-Bera (JB): | 20.582 |
Skew: | -1.030 | Prob(JB): | 3.39e-05 |
Kurtosis: | 5.847 | Cond. No. | 4.97e+05 |
In [48]:
X_opt = X_train [:, [0, 3]]
regressor_OLS = sm.OLS(endog = y_train, exog = X_opt).fit()
regressor_OLS.summary()
Out[48]:
Dep. Variable: | y | R-squared: | 0.945 |
---|---|---|---|
Model: | OLS | Adj. R-squared: | 0.944 |
Method: | Least Squares | F-statistic: | 652.4 |
Date: | Sat, 14 Apr 2018 | Prob (F-statistic): | 1.56e-25 |
Time: | 23:08:55 | Log-Likelihood: | -423.09 |
No. Observations: | 40 | AIC: | 850.2 |
Df Residuals: | 38 | BIC: | 853.6 |
Df Model: | 1 | ||
Covariance Type: | nonrobust |
coef | std err | t | P>|t| | [0.025 | 0.975] | |
---|---|---|---|---|---|---|
const | 4.842e+04 | 2842.717 | 17.032 | 0.000 | 4.27e+04 | 5.42e+04 |
x1 | 0.8516 | 0.033 | 25.542 | 0.000 | 0.784 | 0.919 |
Omnibus: | 13.132 | Durbin-Watson: | 2.325 |
---|---|---|---|
Prob(Omnibus): | 0.001 | Jarque-Bera (JB): | 16.254 |
Skew: | -0.991 | Prob(JB): | 0.000295 |
Kurtosis: | 5.413 | Cond. No. | 1.57e+05 |