模型任务
接受波士顿地区的房价特征,一共13个,
根据特征预测房子售价
数据载入
import sklearn.datasets as sd
housing = sd.load_boston()
测试集和训练集划分
#打乱原数据集,保证数据随机性#随机种子random_state = 7表示用时间做种子
x,y = su.shuffle(housing.data,housing.target,random_state = 7)
train_size = int(len(x)*0.8)
train_x,test_x,train_y,test_y =x[:train_size],\
x[train_size:],\
y[:train_size],\
y[train_size:]
构建一般决策树模型&训练
#构建决策树模型
model = st.DecisionTreeRegressor(max_depth=4)
model.fit(train_x,train_y)
pred_test_y = model.predict(test_x)
#计算误差
print(sm.r2_score(test_y,pred_test_y))
构建正向激励的决策树模型
model_se = se.AdaBoostRegressor(st.DecisionTreeRegressor(max_depth=4),
n_estimators=400,
random_state=7)
model_se.fit(train_x,train_y)
pred_test_y = model_se.predict(test_x)
print(sm.r2_score(test_y,pred_test_y))
for test,pred_test in zip(test_y,pred_test_y):
print(test,'->',pred_test)
查看特征影响
#提取一般决策树的特征重要性
fi_dt = model.feature_importances_
#产生降序索引
sorted_indexs = fi_dt.argsort()[::-1]
#提取正向决策树的特征重要性
fi_ab = model_se.feature_importances_
#产生降序索引
sorted_indexs = fi_ab.argsort()[::-1]
# 随机森林的特征重要性也是调用feature_importances_
绘制特征重要性排序
mp.figure('Feature Importace',facecolor='lightgray')
mp.subplot(211)
mp.title('Decision tree')
mp.ylabel('Importace',fontsize = 14)
mp.tick_params(labelsize = 10)
mp.grid(axis='y',linestyle = ':')
sorted_indexs = fi_dt.argsort()[::-1]
pos = np.arange(sorted_indexs.size)
mp.bar(pos,fi_dt[sorted_indexs],
facecolor= 'deepskyblue',
edgecolor = 'steelblue')
mp.xticks(pos,feature_names[sorted_indexs],rotation =30)
mp.subplot(212)
mp.title('AdaBoost Decision Tree',fontsize = 16)
mp.xlabel('Feature',fontsize = 12)
mp.ylabel('Importance',fontsize =12)
mp.tick_params(labelsize = 10)
mp.grid(axis = 'y',linestyle = ':')
sorted_indexs = fi_ab.argsort()[::-1]
pos = np.arange(sorted_indexs.size)
mp.bar(pos,fi_ab[sorted_indexs],
facecolor= 'lightcoral',
edgecolor = 'indianred')
mp.xticks(pos,feature_names[sorted_indexs],
rotation = 30)
mp.tight_layout()
mp.show()