版权声明:此文为笔者原创,如需转载请联系笔者:[email protected] https://blog.csdn.net/Scc_hy/article/details/82813148
1、数据准备
数据源自《应用回归分析》(第四版)
## 火灾损失表
### 距离消防站km
x = [3.4, 1.8, 4.6, 2.3, 3.1, 5.5, 0.7, 3.0, 2.6, 4.3, 2.1, 1.1, 6.1, 4.8, 3.8]
### 火灾损失 千元
y = [26.2, 17.8, 31.3, 23.1, 27.5, 36.0, 14.1, 22.3, 19.6, 31.3, 24.0, 17.3, 43.2, 36.4, 26.1]
绘制散点图观察基本趋势:
import matplotlib.pyplot as plt
plt.style.use('ggplot')
## 解决中文字符显示不全
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12)
plt.scatter(x, y)
plt.xlabel('距离消防站/千米',fontproperties = font)
plt.ylabel("火灾损失/千元",fontproperties = font)
plt.title('火灾损失',fontproperties = font)
plt.show()
2、sklearn
大材小用线性回归
import numpy as np
from sklearn.linear_model import LinearRegression
x_in = np.array(x).reshape(-1,1)
y_in = np.array(y).reshape(-1,1)
lreg = LinearRegression()
lreg.fit(x_in, y_in)
y_prd = lreg.predict(x_in)
输出相关统计参数
#### 统计量参数
def get_lr_stats(x, y, model):
message0 = '一元线性回归方程为: '+'\ty' + '=' + str(model.intercept_[0])+' + ' +str(model.coef_[0][0]) + '*x'
from scipy import stats
n = len(x)
y_prd = model.predict(x)
Regression = sum((y_prd - np.mean(y))**2) # 回归
Residual = sum((y - y_prd)**2) # 残差
R_square = Regression / (Regression + Residual) # 相关性系数R^2
F = (Regression / 1) / (Residual / ( n - 2 )) # F 分布
pf = stats.f.sf(F, 1, n-2)
message1 = ('相关系数(R^2): ' + str(R_square[0]) + ';' + '\n' +
'回归分析(SSR): ' + str(Regression[0]) + ';' + '\t残差(SSE): ' + str(Residual[0]) + ';' + '\n' +
' F : ' + str(F[0]) + ';' + '\t' + 'pf : ' + str(pf[0]) )
## T
L_xx = n * np.var(x)
sigma = np.sqrt(Residual / n)
t = model.coef_ * np.sqrt(L_xx) / sigma
pt = stats.t.sf(t, n-2)
message2 = ' t : ' + str(t[0][0])+ ';' + '\t' + 'pt : ' + str(pt[0][0])
return print(message0 +'\n' +message1 + '\n'+message2)
get_lr_stats(x_in, y_in, lreg)
结果如下
>>> get_lr_stats(x_in, y_in, lreg)
一元线性回归方程为: y=10.277928549524688 + 4.91933072677093*x
相关系数(R^2): 0.9234781689805285;
回归分析(SSR): 841.7663579806808; 残差(SSE): 69.75097535265259;
F : 156.88615963321718; pf : 1.2478000079204313e-08
t : 13.45445992541066; pt : 2.6193260491226123e-09
3、scipy.interpolate
图示线性回归效果
from scipy.interpolate import interp1d # 进行插值画图
linear_interp = interp1d(x, y_prd.transpose()[0], kind='linear')
computed = np.linspace(min(x),max(x) , 50)
linear_results = linear_interp(computed)
plt.scatter(x, y,s = 8, label = 'orignal data')
plt.scatter(x_in, y_prd,c = 'green', s = 9 , label = 'predict data')
plt.plot(computed, linear_results , label = 'linear_interp', alpha = 0.7, c = 'orange')
plt.xlabel('距离消防站/千米',fontproperties = font)
plt.ylabel("火灾损失/千元",fontproperties = font)
plt.title('火灾损失',fontproperties = font)
plt.ylim(0,50)
plt.legend(loc = 'upper left')
plt.show()
4、残差图
#### 残差图
def Residual_plot(x, y, model):
message = ''
ycout = '不存在异常值'
n = len(x)
y_prd = lreg.predict(x)
e = y - y_prd
sigama = np.std(e)
## ZRE 标准化残差
zre = e / sigama
## SRE 学生化残差
L_xx = n * np.var(x_in)
hii = 1/n + (x_in - np.mean(x_in))/L_xx ## 杠杆值
sre = e/(sigama*np.sqrt(1 - hii))
if sum(sre > 3)[0]: # 学生化残差在寻找异常时更优(解决方差不等问题)
ycout = x[sre>3], y[sre>3]
message = '异常值: ' +str(ycout)
else:
message = '异常值: ' + ycout
## 绘图 普通残差方差
mx = max(x)[0] + 1
plt.scatter(x, e, c = 'red', s= 6)
plt.plot([0, mx],[2*sigama,2*sigama], 'k--', c='green')
plt.plot([0, mx],[-2*sigama,-2*sigama], 'k--', c='green')
plt.plot([0, mx],[3*sigama,3*sigama], 'k--', c='orange')
plt.plot([0, mx],[-3*sigama,-3*sigama], 'k--', c='orange')
plt.annotate(message, xy = (0, np.ceil(3*sigama+1)), xycoords = 'data',fontproperties = font)
plt.xlim(0, mx)
plt.ylim(-np.ceil(3*sigama+2), np.ceil(3*sigama+2))
plt.show()
return print(message)
Residual_plot(x_in, y_in, lreg)