sklearn official documentation Learning Notes Pipeline and FeatureUnion: Combined Estimator

sklearn official documentation study notes pipeline and feature union: combined estimator

Pipeline and FeatureUnion: combining estimators

Pipeline: Link Estimator

from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.decomposition import PCA
estimators=[('reduce_dim',PCA()),('clf',SVC())]#其中的'reduce_dim'是自定义的步骤名字
pipe=Pipeline(estimators)
pipe
Pipeline(steps=[('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('clf', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])
from sklearn.pipeline import make_pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import Binarizer
make_pipeline(Binarizer(),MultinomialNB())#make_pipeline是上面代码的一种简写形式
Pipeline(steps=[('binarizer', Binarizer(copy=True, threshold=0.0)), ('multinomialnb', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))])
pipe.steps[0]#steps属性里以列表形式存着管道中的估计器
('reduce_dim',
 PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
   svd_solver='auto', tol=0.0, whiten=False))
pipe.named_steps['reduce_dim']#在named_steps属性中以dict形式存着步骤
PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)
pipe.set_params(clf__C=10)#以这种形式给指定名字的估计器(clf)的参数(C)赋值 <estimator>__<parameter>
Pipeline(steps=[('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('clf', SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])

This is especially important for grid searches:

from sklearn.model_selection import GridSearchCV
params=dict(reduce_dim__n_components=[2,5,10],#设置reduce_dim的n_components参数为多个值以供选取模型最优值
            clf__C=[0.1,10,100])#在模型选择过程中可以设置参数列表
grid_search=GridSearchCV(pipe,param_grid=params)

Individual steps can also be replaced with parameters, and non-final steps can be ignored, set them to None:

from sklearn.linear_model import LogisticRegression
params=dict(reduce_dim=[None,PCA(5),PCA(10)],clf=[SVC(),LogisticRegression()],clf__C=[0.1,10,100])
grid_search=GridSearchCV(pipe,param_grid=params)

FeatureUnion: compound feature space

A FeatureUnion consists of a set of transformer objects. During the fit process, each transformer object independently fits the data, and their sample vectors are connected end to end to form a larger vector.
FeatureUnion and the Pipeline in the previous section have similar purposes, both to facilitate joint parameter estimation and verification. The two of them can be combined to form more complex models

from sklearn.pipeline import FeatureUnion
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
estimators=[('linear_pca',PCA()),('kernel_pca',KernelPCA())]
combined=FeatureUnion(estimators)
combined
FeatureUnion(n_jobs=1,
       transformer_list=[('linear_pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('kernel_pca', KernelPCA(alpha=1.0, coef0=1, copy_X=True, degree=3, eigen_solver='auto',
     fit_inverse_transform=False, gamma=None, kernel='linear',
     kernel_params=None, max_iter=None, n_components=None, n_jobs=1,
     random_state=None, remove_zero_eig=False, tol=0))],
       transformer_weights=None)

Like Pipeline, FeatureUnion has a shorthand constructor called make_union that doesn't require explicitly naming the components.

combined.set_params(kernel_pca=None)#Pipeline一样,可以使用set_params替换单独的步骤,并通过设置为None来忽略
FeatureUnion(n_jobs=1,
       transformer_list=[('linear_pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('kernel_pca', None)],
       transformer_weights=None)

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=326029482&siteId=291194637