机器学习-构建机器学习流水线

from sklearn.datasets import samples_generator
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline

#生成样本数据,可以通过修改n_feature参数来修改特征向量的维数
X, y = samples_generator.make_classification(
    n_informative=4, n_features=20, n_redundant=0, random_state=5)

#特征选择器
selector_k_best = SelectKBest(f_regression, k = 10)
#随机森林分类器
classifier = RandomForestClassifier(n_estimators=50, max_depth=4)
#创建流水线
pipelineClassifier = Pipeline([('selector', selector_k_best),('rf', classifier)])
#可以使用如下方法更新这些参数
#pipelineClassifier.set_params(selector__k = 6, rf_n_estimators = 25)
#训练分类器
pipelineClassifier.fit(X, y)
#预测输出结果
predict = pipelineClassifier.predict(X)
print('Predictions:\n', predict)
#评价分类器的性能
print(pipelineClassifier.score(X, y))

猜你喜欢

转载自blog.csdn.net/u012967763/article/details/79239979