MSE:均方误差,mean_squared_error
RMSE:均方根误差,root_mean_squared_error
MAE:平均绝对误差,mean_absolute_error
05 衡量回归算法的标准,MSE vs MAE
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
波士顿房产数据
boston = datasets.load_boston()
boston.keys()
dict_keys(['data', 'target', 'feature_names', 'DESCR'])
特征向量名称:
boston.feature_names
array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
'TAX', 'PTRATIO', 'B', 'LSTAT'],
dtype='<U7')
x = boston.data[:,5] # 只使用房间数量这个特征,所有行,第五列
x.shape
(506,)
y = boston.target
y.shape
(506,)
plt.scatter(x, y)
plt.show()
np.max(y)
50.0
x = x[y < 50.0]#采用最大值的点可能不是真实的点
y = y[y < 50.0]
x.shape
(490,)
y.shape
(490,)
plt.scatter(x, y)
plt.show()
使用简单线性回归法
from playML.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)
x_train.shape
(392,)
y_train.shape
(392,)
x_test.shape
(98,)
y_test.shape
(98,)
from playML.SimpleLinearRegression import SimpleLinearRegression
reg = SimpleLinearRegression()
reg.fit(x_train, y_train)
SimpleLinearRegression()
reg.a_#斜率
7.8608543562689555
reg.b_#截距
-27.459342806705543
plt.scatter(x_train, y_train)
plt.plot(x_train, reg.predict(x_train), color='r')
plt.show()
plt.scatter(x_train, y_train)
plt.scatter(x_test, y_test, color="c")
plt.plot(x_train, reg.predict(x_train), color='r')
plt.show()
y_predict = reg.predict(x_test)
MSE
mse_test = np.sum((y_predict - y_test)**2) / len(y_test)
mse_test#MSE和实际的y的量纲不一样
24.156602134387438
RMSE
from math import sqrt
rmse_test = sqrt(mse_test)
rmse_test#平均误差在4.91左右
4.914936635846635
MAE
mae_test = np.sum(np.absolute(y_predict - y_test))/len(y_test)
mae_test#MAE得到的结果比RMSE得到的结果小,因为RMSE有平方操作,相当于放大了操作
3.5430974409463873
封装我们自己的评测函数
代码为:
import numpy as np
from math import sqrt
def accuracy_score(y_true, y_predict):
"""计算y_true和y_predict之间的准确率"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"
return np.sum(y_true == y_predict) / len(y_true)
#计算MSE
def mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的MSE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"
return np.sum((y_true - y_predict)**2) / len(y_true)
#计算RMSE
def root_mean_squared_error(y_true, y_predict):
"""计算y_true和y_predict之间的RMSE"""
return sqrt(mean_squared_error(y_true, y_predict))
#计算MAE
def mean_absolute_error(y_true, y_predict):
"""计算y_true和y_predict之间的MAE"""
assert len(y_true) == len(y_predict), \
"the size of y_true must be equal to the size of y_predict"
return np.sum(np.absolute(y_true - y_predict)) / len(y_true)
测试前面写的算法:
from playML.metrics import mean_squared_error
from playML.metrics import root_mean_squared_error
from playML.metrics import mean_absolute_error
mean_squared_error(y_test, y_predict)
24.156602134387438
root_mean_squared_error(y_test, y_predict)
4.914936635846635
mean_absolute_error(y_test, y_predict)
3.5430974409463873
scikit-learn中的MSE和MAE
没有RMSE,对MSE求方根即可得到
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
mean_squared_error(y_test, y_predict)
24.156602134387438
mean_absolute_error(y_test, y_predict)
3.5430974409463873
R Squared
R^2 越靠近1算法精确度越好
MSE
mse_test=np.sum((y_predict-y_test)**2)/len(y_test)
RSME
rmse_test=sqrt(mse_test)
MAE
mae_test=np.sum((np.absolute(y_predict-y_test))/len(y_test))
R Square
from playML.metrics import mean_squared_error
1 - mean_squared_error(y_test, y_predict)/np.var(y_test)
封装我们自己的 R Score
def r2_score(y_true, y_predict):
"""计算y_true和y_predict之间的R Square"""
return 1 - mean_squared_error(y_true, y_predict)/np.var(y_true)
from playML.metrics import r2_score
r2_score(y_test, y_predict)
0.61293168039373225
scikit-learn中的 r2_score
from sklearn.metrics import r2_score
r2_score(y_test, y_predict)
0.61293168039373236
scikit-learn中的LinearRegression中的score返回r2_score:http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html
在我们的SimpleRegression中添加score
reg.score(x_test, y_test)
0.61293168039373225