python3 learn the use of api
git: https://github.com/linyi0604/MachineLearning
Code:
1 from sklearn.datasets import load_boston 2 from sklearn.cross_validation import train_test_split 3 from sklearn.preprocessing import StandardScaler 4 from sklearn.tree import DecisionTreeRegressor 5 from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 6 import numpy as np 7 8 ''' 9 Regression tree: 10 Strictly speaking, a regression tree cannot be regarded as a regression 11. The leaf node is a group of training data. The mean value is not a continuous specific predicted value. 12 13 Solve the problem of feature nonlinearity 14 Does not require feature standardization and unified quantization 15 16 It is easy to be too complicated and loses generalization ability 17 The stability is poor, and slight changes will lead to major changes in the tree structure 18 19 ''' 20 21 # 1 Preparation Data 22 #Read the housing price information in the Boston area 23 boston = load_boston() 24 #View the data description 25 # print(boston.DESCR) # A total of 506 housing price information in the Boston area, each with 13 numerical feature descriptions and target price 26 #View Differences in data 27 # print("Maximum house price:", np.max(boston.target)) # 50 28 # print("Minimum house price:",np.min(boston.target)) # 5 29 #print("Average house price: ", np.mean(boston.target)) # 22.532806324110677 30 31 x = boston.data 32 y = boston.target 33 34 # 2 split training data and test data 35 # randomly sample 25% as test 75% as training 36 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33 ) 37 38 39 # 3 Normalize training data and test data 40 ss_x = StandardScaler() 41 x_train = ss_x .fit_transform(x_train) 42 x_test = ss_x.transform(x_test) 43 44 ss_y =StandardScaler() 45 y_train = ss_y.fit_transform(y_train.reshape(-1, 1 )) 46 y_test = ss_y.transform(y_test.reshape(-1, 1 )) 47 48 # 4 Use regression tree for training and prediction 49 # Initialize the k-nearest neighbor regression model for prediction using mean regression 50 dtr = DecisionTreeRegressor() 51 #training 52 dtr.fit (x_train, y_train) 53 #predict save the prediction result 54 dtr_y_predict = dtr.predict(x_test) 55 56 # 5 model evaluation 57 print ( " The default evaluation value of the regression tree is: " , dtr.score(x_test, y_test)) 58 print ( " The R_squared value of the flat regression tree is: " , r2_score(y_test, dtr_y_predict)) 59 print ( " The mean squared error of the regression tree is: " , mean_squared_error(ss_y.inverse_transform(y_test), 60 ss_y.inverse_transform(dtr_y_predict ))) 61 print ( " The mean absolute error of the regression tree is: " , mean_absolute_error(ss_y.inverse_transform(y_test), 62 ss_y.inverse_transform(dtr_y_predict))) 63 64 ''' 65 The default evaluation value of the regression tree is: 0.7066505912533438 66 The R_squared value of the flat regression tree is: 0.7066505912533438 67 The mean squared error of the regression tree is: 22.746692913385836 68 The mean absolute error of the regression tree is: 3.08740157480315 69 '''