ML12:特征重要性 选取

1.学习模型对象的feature_importances_属性,表示各个特征的相对重要性。不同的模型针对特征所认为的重要性不尽相同,但应该大体符合实际业务逻辑。
2.有学习模型输出的特征相对重要性与算法和特征集紧密相关,从不同的算法模型或特征集合中可能会得出完全不同的重要性排列。

from __future__ import unicode_literals
import numpy as np
import sklearn.datasets as sd
import sklearn.utils as su
import sklearn.tree as st
import sklearn.ensemble as se
import matplotlib.pyplot as mp
housing = sd.load_boston()
feature_names = housing.feature_names
x, y = su.shuffle(housing.data, housing.target,
                  random_state=7)
train_size = int(len(x) * 0.8)
train_x, test_x, train_y, test_y = \
    x[:train_size], x[train_size:], \
    y[:train_size], y[train_size:]
model = st.DecisionTreeRegressor(max_depth=4)
model.fit(train_x, train_y)
feature_importances_dt = model.feature_importances_
model = se.AdaBoostRegressor(
    st.DecisionTreeRegressor(max_depth=4),
    n_estimators=400, random_state=7)
model.fit(train_x, train_y)
feature_importances_ab = model.feature_importances_
mp.figure(num='Feature Importance',
          facecolor='lightgray')
mp.subplot(211)
mp.title('Decision Tree', fontsize=16)
mp.ylabel('Importance', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(axis='y', linestyle=':')
sorted_indices = feature_importances_dt.argsort()[::-1]
pos = np.arange(sorted_indices.size)
mp.bar(pos, feature_importances_dt[sorted_indices],
       facecolor='deepskyblue', edgecolor='steelblue')
mp.xticks(pos, feature_names[sorted_indices],
          rotation=30)
mp.subplot(212)
mp.title('AdaBoost Decision Tree', fontsize=16)
mp.xlabel('Feature', fontsize=12)
mp.ylabel('Importance', fontsize=12)
mp.tick_params(labelsize=10)
mp.grid(axis='y', linestyle=':')
sorted_indices = feature_importances_ab.argsort()[::-1]
pos = np.arange(sorted_indices.size)
mp.bar(pos, feature_importances_ab[sorted_indices],
       facecolor='lightcoral', edgecolor='indianred')
mp.xticks(pos, feature_names[sorted_indices],
          rotation=30)
mp.tight_layout()
mp.show()

猜你喜欢

转载自blog.csdn.net/weixin_38246633/article/details/80590533