机器学习之路:python 集成回归模型 随机森林回归RandomForestRegressor 极端随机森林回归ExtraTreesRegressor GradientBoostingRegressor回归 预测波士顿房价

python3 学习机器学习api

使用了三种集成回归模型

git: https://github.com/linyi0604/MachineLearning

代码:

  1 from sklearn.datasets import load_boston
  2 from sklearn.cross_validation import train_test_split
  3 from sklearn.preprocessing import StandardScaler
  4 from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor
  5 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
  6 import numpy as np
  7 
  8 '''
  9 随机森林回归
 10 极端随机森林回归
 11 梯度提升回归
 12 
 13 通常集成模型能够取得非常好的表现
 14 '''
 15 
 16 # 1 准备数据
 17 # 读取波士顿地区房价信息
 18 boston = load_boston()
 19 # 查看数据描述
 20 # print(boston.DESCR)   # 共506条波士顿地区房价信息,每条13项数值特征描述和目标房价
 21 # 查看数据的差异情况
 22 # print("最大房价:", np.max(boston.target))   # 50
 23 # print("最小房价:",np.min(boston.target))    # 5
 24 # print("平均房价:", np.mean(boston.target))   # 22.532806324110677
 25 
 26 x = boston.data
 27 y = boston.target
 28 
 29 # 2 分割训练数据和测试数据
 30 # 随机采样25%作为测试 75%作为训练
 31 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33)
 32 
 33 # 3 训练数据和测试数据进行标准化处理
 34 ss_x = StandardScaler()
 35 x_train = ss_x.fit_transform(x_train)
 36 x_test = ss_x.transform(x_test)
 37 
 38 ss_y = StandardScaler()
 39 y_train = ss_y.fit_transform(y_train.reshape(-1, 1))
 40 y_test = ss_y.transform(y_test.reshape(-1, 1))
 41 
 42 # 4 三种集成回归模型进行训练和预测
 43 # 随机森林回归
 44 rfr = RandomForestRegressor()
 45 # 训练
 46 rfr.fit(x_train, y_train)
 47 # 预测 保存预测结果
 48 rfr_y_predict = rfr.predict(x_test)
 49 
 50 # 极端随机森林回归
 51 etr = ExtraTreesRegressor()
 52 # 训练
 53 etr.fit(x_train, y_train)
 54 # 预测 保存预测结果
 55 etr_y_predict = rfr.predict(x_test)
 56 
 57 # 梯度提升回归
 58 gbr = GradientBoostingRegressor()
 59 # 训练
 60 gbr.fit(x_train, y_train)
 61 # 预测 保存预测结果
 62 gbr_y_predict = rfr.predict(x_test)
 63 
 64 # 5 模型评估
 65 # 随机森林回归模型评估
 66 print("随机森林回归的默认评估值为:", rfr.score(x_test, y_test))
 67 print("随机森林回归的R_squared值为:", r2_score(y_test, rfr_y_predict))
 68 print("随机森林回归的均方误差为:", mean_squared_error(ss_y.inverse_transform(y_test),
 69                                           ss_y.inverse_transform(rfr_y_predict)))
 70 print("随机森林回归的平均绝对误差为:", mean_absolute_error(ss_y.inverse_transform(y_test),
 71                                              ss_y.inverse_transform(rfr_y_predict)))
 72 
 73 # 极端随机森林回归模型评估
 74 print("极端随机森林回归的默认评估值为:", etr.score(x_test, y_test))
 75 print("极端随机森林回归的R_squared值为:", r2_score(y_test, gbr_y_predict))
 76 print("极端随机森林回归的均方误差为:", mean_squared_error(ss_y.inverse_transform(y_test),
 77                                             ss_y.inverse_transform(gbr_y_predict)))
 78 print("极端随机森林回归的平均绝对误差为:", mean_absolute_error(ss_y.inverse_transform(y_test),
 79                                                ss_y.inverse_transform(gbr_y_predict)))
 80 
 81 # 梯度提升回归模型评估
 82 print("梯度提升回归回归的默认评估值为:", gbr.score(x_test, y_test))
 83 print("梯度提升回归回归的R_squared值为:", r2_score(y_test, etr_y_predict))
 84 print("梯度提升回归回归的均方误差为:", mean_squared_error(ss_y.inverse_transform(y_test),
 85                                             ss_y.inverse_transform(etr_y_predict)))
 86 print("梯度提升回归回归的平均绝对误差为:", mean_absolute_error(ss_y.inverse_transform(y_test),
 87                                                ss_y.inverse_transform(etr_y_predict)))
 88 
 89 '''
 90 随机森林回归的默认评估值为: 0.8391590262557747
 91 随机森林回归的R_squared值为: 0.8391590262557747
 92 随机森林回归的均方误差为: 12.471817322834646
 93 随机森林回归的平均绝对误差为: 2.4255118110236227
 94 
 95 极端随机森林回归的默认评估值为: 0.783339502805047
 96 极端随机森林回归的R_squared值为: 0.8391590262557747
 97 极端随机森林回归的均方误差为: 12.471817322834646
 98 极端随机森林回归的平均绝对误差为: 2.4255118110236227
 99 
100 GradientBoostingRegressor回归的默认评估值为: 0.8431187344932869
101 GradientBoostingRegressor回归的R_squared值为: 0.8391590262557747
102 GradientBoostingRegressor回归的均方误差为: 12.471817322834646
103 GradientBoostingRegressor回归的平均绝对误差为: 2.4255118110236227
104 '''

猜你喜欢

转载自www.cnblogs.com/Lin-Yi/p/8972051.html