利用决策树预测房价
1 # -*- coding: utf-8 -*- 2 from __future__ import unicode_literals 3 # 数据提供模块 4 import sklearn.datasets as sd 5 # 小工具模块 6 import sklearn.utils as su 7 import sklearn.tree as st 8 import sklearn.ensemble as se 9 # 评价模型指标模块 10 import sklearn.metrics as sm 11 12 # 获取数据 13 hosing = sd.load_boston() 14 print(hosing.data.shape) 15 # 房价 16 print(hosing.target.shape) 17 # 特征 18 print(hosing.feature_names) 19 # hosing.feature_names======》['CRIM'(犯罪情况) 'ZN'(房子密度) 'INDUS'(商铺繁华程度) 20 # 'CHAS'(是否靠河) 'NOX'(一氧化氮浓度) 'RM'(房间数) 'AGE'(年代) 'DIS'(离市中心的距离) 21 # 'RAD'(公路密度) 'TAX'(房产税) 'PTRATIO'(师生比例)'B'(黑人比例) 'LSTAT'(地位低下人比例)] 22 #'洗牌',打乱数据顺序, 23 x, y = su.shuffle(hosing.data, hosing.target, random_state=7) 24 # 确定训练集大小80%; 25 train_size = int(len(x) * 0.8) 26 # 分别拿出训练集和测试集 27 train_x, test_x, train_y, test_y = \ 28 x[: train_size], x[train_size: ], \ 29 y[: train_size], y[train_size:] 30 # 建立决策树回归器,最大高度为4层 31 model = st.DecisionTreeRegressor(max_depth=4) 32 # 构建决策树 33 model.fit(train_x, train_y) 34 # 生成预测集 35 pred_test_y = model.predict(test_x) 36 # 评判预测匹配度 37 print(sm.r2_score(test_y, pred_test_y)) 38 39 # 建立正向激励回归器 40 model = se.AdaBoostRegressor(st.DecisionTreeRegressor( 41 max_depth=4), n_estimators=400, random_state=7) 42 model.fit(train_x, train_y) 43 pred_test_y = model.predict(test_x) 44 # 评判预测匹配度 45 print(sm.r2_score(test_y, pred_test_y)) 46 47 for test, pred_test in zip(test_y, pred_test_y): 48 print(test, '->', pred_test)
特征值的重要性排序
不同的算法,对特征的认识不一样,现在对比一下决策树和正向激励的特征顺序