from __future__ import division import tensorflow as tf import math import csv from sklearn import metrics import numpy as np from pylab import* from sklearn import cross_validation from sklearn.ensemble import RandomForestRegressor from sklearn.utils import shuffle from sklearn import preprocessing import matplotlib.pyplot as plt i=0 j=[] data = [] X = [] indicess = [] file = open("E:/predict_rf_para.txt", 'a') with open(r'D:\夏季.csv') as f: reader = csv.reader(f) for row in reader: if i == 0: i += 1 continue else: data.append(row[:]) print("data",data[1]) data = np.array(data) print("the shape of data",np.shape(data)) m,n = np.shape(data) print("the shape of data",m,n) for i in range(m): for j in range(n): data[i][j] = data[i][j].astype('float64') y = data[:,-1] y1 = data[:,-1] set1 = data[:,:-1] print("set1",set1) set2 = data[:,-1] def create_interval_dataset(dataset1, dataset2, xback, yback): dataX, dataY = [], [] for i in range(0, len(dataset1) - xback - yback, 1): dataX.append(dataset1[i:i+xback]) dataY.append(dataset2[i+xback:i+xback+yback]) return np.asarray(dataX), np.asarray(dataY) dataX, dataY = create_interval_dataset(set1, set2, xback=1, yback=1) print("dataY",np.shape(dataY)) print("dataX",np.shape(dataX)) for i in range(1): X_train, X_test, y_train, y_test = cross_validation.train_test_split(dataX, dataY, test_size=0.7) X_test = np.reshape(X_test, [-1, 7]) print("*********************") y_test = y_test.astype('float64') clf = RandomForestRegressor(oob_score=True, n_estimators=1500) clf = clf.fit(X_train, y_train.ravel()) predicted = clf.predict(X_test) predicted = predicted.astype('float64') X, y = X_train, y_train importances = clf.feature_importances_ std = np.std([tree.feature_importances_ for tree in clf.estimators_], axis=0) list=['0','1','2','3','4','5','6'] indices = np.argsort(importances)[::] dict = dict(map(lambda x,y:[x,y], list,importances)) dict1 = sorted(dict.items(),key = lambda item:item[1],reverse = True) print("importances",importances) indices = np.argsort(importances)[::-1] indicess = [] for i in range(len(dict1)): indicess.append(dict1[i][0]) indice = dict1[i][0] print("Feature ranking:") file.write("\n"+"\n") file.write(" 雨天天气下随机森林算法中各个特征的排序 "+"\n") file.write("———————————————————————————————————"+"\n") for f in range(X.shape[1]): print("%d. feature %s (%f)" % (f + 1, dict1[f][0], importances[indices[f]])) #print("%d. feature %s (%f)" % (f + 1, dict1[f][0], dict1[f][1])) file.write(str(f+1) +" ") file.write(str(dict1[f][0])+": ") file.write(str(importances[indices[f]])) file.write("\n") file.close() plt.figure() plt.title("Feature importances") plt.bar(range(X.shape[1]), importances[indices], color="r", yerr=std[indices], align="center") plt.xticks(indicess) #plt.plot(datax,data0, 'k', label='真实值数据点') #plt.xlim([-1, X.shape[1]]) plt.show()
Python语言利用随机森林实现特征重要性排序
猜你喜欢
转载自blog.csdn.net/pwtd_huran/article/details/79729144
今日推荐
周排行