The Road to Machine Learning: Python Regression Tree DecisionTreeRegressor Predicting Boston Housing Prices

 

python3 learn the use of api

git: https://github.com/linyi0604/MachineLearning

Code:

1  from sklearn.datasets import load_boston
 2  from sklearn.cross_validation import train_test_split
 3  from sklearn.preprocessing import StandardScaler
 4  from sklearn.tree import DecisionTreeRegressor
 5  from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
 6  import numpy as np
 7  
8  ''' 
9  Regression tree:
 10      Strictly speaking, a regression tree cannot be regarded as a regression
 11.      The leaf node is a group of training data. The mean value is not a continuous specific predicted value.
12      
13      Solve the problem of feature nonlinearity
 14      Does not require feature standardization and unified quantization
 15      
16 It      is easy to be too complicated and loses generalization ability
 17      The stability is poor, and slight changes will lead to major changes in the tree structure
 18      
19  ''' 
20  
21  # 1 Preparation Data 
22  #Read the housing price information in the Boston area 
23 boston = load_boston()
 24  #View the data description 
25  # print(boston.DESCR) # A total of 506 housing price information in the Boston area, each with 13 numerical feature descriptions and target price 
26  #View Differences in data 
27  # print("Maximum house price:", np.max(boston.target)) # 50 
28  # print("Minimum house price:",np.min(boston.target)) # 5 
29  #print("Average house price: ", np.mean(boston.target)) # 22.532806324110677 
30  
31 x = boston.data
 32 y = boston.target
 33  
34  # 2 split training data and test data 
35  # randomly sample 25% as test 75% as training 
36 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=33 )
 37  
38  
39  # 3 Normalize training data and test data 
40 ss_x = StandardScaler()
 41 x_train = ss_x .fit_transform(x_train)
 42 x_test = ss_x.transform(x_test)
 43  
44 ss_y =StandardScaler()
 45 y_train = ss_y.fit_transform(y_train.reshape(-1, 1 ))
 46 y_test = ss_y.transform(y_test.reshape(-1, 1 ))
 47  
48  # 4 Use regression tree for training and prediction 
49  # Initialize the k-nearest neighbor regression model for prediction using mean regression 
50 dtr = DecisionTreeRegressor()
 51  #training 52 dtr.fit 
(x_train, y_train)
 53 #predict save the prediction result 54 dtr_y_predict = dtr.predict(x_test)
 55 56 # 5 model evaluation 57 print ( " The default evaluation value of the regression tree is: " , dtr.score(x_test, y_test))
  
 
 
 58  print ( " The R_squared value of the flat regression tree is: " , r2_score(y_test, dtr_y_predict))
 59  print ( " The mean squared error of the regression tree is: " , mean_squared_error(ss_y.inverse_transform(y_test),
 60                                             ss_y.inverse_transform(dtr_y_predict )))
 61  print ( " The mean absolute error of the regression tree is: " , mean_absolute_error(ss_y.inverse_transform(y_test),
 62                                                 ss_y.inverse_transform(dtr_y_predict)))
 63  
64  ''' 
65  The default evaluation value of the regression tree is: 0.7066505912533438
 66 The R_squared value of the flat regression tree is: 0.7066505912533438
 67  The mean squared error of the regression tree is: 22.746692913385836
 68  The mean absolute error of the regression tree is: 3.08740157480315
 69  '''

 

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325030439&siteId=291194637