python3 learn machine learning api
Three ensemble regression models were used
git: https://github.com/linyi0604/MachineLearning
Code:
1 from sklearn.datasets import load_boston 2 from sklearn.cross_validation import train_test_split 3 from sklearn.preprocessing import StandardScaler 4 from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, GradientBoostingRegressor 5 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 6 import numpy as np 7 8 ''' 9 随机森林回归 10 极端随机森林回归 11 Gradient boosting regression 12 13 Usually the ensemble model can achieve very good performance 14 ''' 15 16 # 1 Prepare data 17 #Read the housing price information in the Boston area 18 boston = load_boston() 19 #View data description 20 # print ( boston.DESCR ) # A total of 506 pieces of housing price information in the Boston area, each with 13 numerical feature descriptions and the target housing price 21 # Check the difference between the data 22 # print("Maximum housing price:", np.max(boston.target)) # 50 23 # print("Minimum house price:",np.min(boston.target)) # 5 24 # print("Average house price:", np.mean(boston.target)) # 22.532806324110677 25 26 x = boston.data 27 y = boston.target 28 29 # 2 split training data and test data 30 # randomly sample 25% as test 75% as training 31 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33 ) 32 33 # 3 Normalize training data and test data 34 ss_x = StandardScaler() 35 x_train = ss_x.fit_transform(x_train) 36 x_test = ss_x.transform(x_test) 37 38 ss_y = StandardScaler() 39 y_train = ss_y.fit_transform (y_train.reshape(-1, 1 )) 40y_test = ss_y.transform(y_test.reshape(-1, 1 )) 41 42 # 4 Three integrated regression models for training and prediction 43 #Random forest regression 44 rfr = RandomForestRegressor() 45 #training 46 rfr.fit (x_train, y_train) 47 #Predict save the prediction result 48 rfr_y_predict = rfr.predict(x_test) 49 50 #Extreme random forest regression 51 etr = ExtraTreesRegressor() 52 #train 53 etr.fit (x_train, y_train) 54 #Predict save the prediction result 55 etr_y_predict = rfr.predict(x_test) 56 57 #gradient boosting regression 58 gbr = GradientBoostingRegressor() 59 #training 60 gbr.fit (x_train, y_train) 61 #prediction save prediction result 62 gbr_y_predict = rfr.predict(x_test) 63 64 # 5 model Evaluation 65 #Random forest regression model evaluation 66 print ( " The default evaluation value of random forest regression is: " , rfr.score(x_test, y_test)) 67 print ( " The R_squared value of random forest regression is: " , r2_score(y_test, rfr_y_predict)) 68 print ( " The mean squared error of random forest regression is: " , mean_squared_error(ss_y.inverse_transform(y_test), 69 ss_y.inverse_transform(rfr_y_predict))) 70 print ( " The mean absolute error of random forest regression is: " , mean_absolute_error(ss_y .inverse_transform(y_test), 71 ss_y.inverse_transform(rfr_y_predict))) 72 73 #Extreme random forest regression model evaluation 74 print ( " The default evaluation value of extreme random forest regression is: " , etr.score(x_test, y_test)) 75 print ( "The R_squared value of extreme random forest regression is: " , r2_score(y_test, gbr_y_predict)) 76 print ( " The mean squared error of extreme random forest regression is: " , mean_squared_error(ss_y.inverse_transform(y_test), 77 ss_y.inverse_transform(gbr_y_predict) )) 78 print ( " The mean absolute error of extreme random forest regression is: " , mean_absolute_error(ss_y.inverse_transform(y_test), 79 ss_y.inverse_transform(gbr_y_predict))) 80 81 #gradient boosting regression model evaluation 82 print ( " gradient boosting The default evaluation value for regression regression is: ", gbr.score(x_test, y_test)) 83 print ( " The R_squared value of gradient boosting regression is: " , r2_score(y_test, etr_y_predict)) 84 print ( " The mean squared error of gradient boosting regression is: " , mean_squared_error( ss_y.inverse_transform(y_test), 85 ss_y.inverse_transform(etr_y_predict))) 86 print ( " The mean absolute error of gradient boosted regression is: " , mean_absolute_error(ss_y.inverse_transform(y_test), 87 ss_y.inverse_transform(etr_y_predict))) 88 89 ''' 90 The default evaluation value for random forest regression is: 0.8391590262557747 91 The R_squared value for random forest regression is: 0.8391590262557747 92 The mean squared error for random forest regression is: 12.471817322834646 93 The mean absolute error for random forest regression is : 2.4255118110236227 94 The evaluation value is: 0.783339502805047 96 The R_squared value of the extreme random forest regression is: 0.8391590262557747 97 The mean squared error of the extreme random forest regression is: 12.471817322834646 98 The mean absolute error of the extreme random forest regression is: 2.4255118110236227 The default value of the Boostinggress regression is 100 0.8431187344932869 101 The R_squared value of the GradientBoostingRegressor regression is: 0.8391590262557747 102 The mean squared error of the GradientBoostingRegressor regression is: 12.471817322834646 103 The mean absolute error of GradientBoostingRegressor regression is: 2.4255118110236227 104 '''