随机森林特征选择

from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt selected_feat_names=set() for i in range(10): #这里我们进行十次循环取交集 tmp = set() rfc = RandomForestClassifier(n_jobs=-1) rfc.fit(X, y) #print("training finished") importances = rfc.feature_importances_ indices = np.argsort(importances)[::-1] # 降序排列 S={} for f in range(X.shape[1]): if importances[indices[f]] >=0.0001: tmp.add(X.columns[indices[f]]) S[X.columns[indices[f]]]=importances[indices[f]] #print("%2d) %-*s %f" % (f + 1, 30, X.columns[indices[f]], importances[indices[f]])) selected_feat_names |= tmp imp_fea=pd.Series(S) print(len(selected_feat_names), "features are selected")

猜你喜欢

转载自www.cnblogs.com/bonelee/p/9083841.html