機械学習：スタッキングレビュー

オリジナルではない、コードは葁サーから来ています

import numpy as np
import pandas as pd
from pandas import Series,DataFrame

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# 导入stacking
from sklearn.ensemble import StackingClassifier,StackingRegressor

# StackingClassifier() 
# list of (str, estimator) 
# [(str, estimator),(str, estimator),(str, estimator),(str, estimator)……]
# str: 对参与训练的模型的用户的自定义描述
# estimator: 算法对象（实例化） ，可以是任何的算法对象 （包含集成学习对象）

from sklearn.datasets import make_gaussian_quantiles
# 可以生成线性不可分的复杂数据（比较麻烦）

# 生成一组假数据
X1,y1 = make_gaussian_quantiles(cov=1.0,n_samples=500,n_features=2,n_classes=2)
X2,y2 = make_gaussian_quantiles(cov=2.0,n_samples=500,n_features=2,n_classes=2,mean=[3,3])

X = np.concatenate((X1,X2))
y = np.concatenate((y1,-y2+1)) # 翻转之后，在加上1  -> 复杂化模型

sns.set()

plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.rainbow)

<matplotlib.collections.PathCollection at 0x2562906fa30>

画像の説明を追加してください

# 线性不可分的复杂模型

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

# list of (str, estimator) 
estimators = [
    ('knn',KNeighborsClassifier()),
    ('LR',LogisticRegression()),
    ('GNB',GaussianNB()),
    ('DT',DecisionTreeClassifier())
]

# 生成算法对象（实例化）
sc = StackingClassifier(estimators=estimators)

sc.fit(X,y)

StackingClassifier(estimators=[('knn', KNeighborsClassifier()),
                               ('LR', LogisticRegression()),
                               ('GNB', GaussianNB()),
                               ('DT', DecisionTreeClassifier())])

for model in sc.estimators_:
    print(model.__class__.__name__,model.predict(X[:10]))

KNeighborsClassifier [1 0 0 0 1 0 1 1 1 0]
LogisticRegression [0 0 0 1 1 0 1 0 0 1]
GaussianNB [0 1 0 1 0 1 0 0 0 0]
DecisionTreeClassifier [1 0 0 0 1 0 1 1 1 0]

def show_edge(model, X,y):
    xmin, xmax = X[:,0].min(), X[:,0].max()
    ymin, ymax = X[:,1].min(), X[:,1].max()
    
    a = np.linspace(xmin, xmax, 200)
    b = np.linspace(ymin, ymax, 200)
    
    xx , yy = np.meshgrid(a ,b)
    X_test = np.c_[xx.ravel(),yy.ravel()]
    model.fit(X,y)
    y_ = model.predict(X_test)
    
    plt.scatter(X_test[:,0],X_test[:,1],c=y_,cmap=plt.cm.Blues_r)
    plt.scatter(X[:,0],X[:,1],c=y,cmap=plt.cm.Accent,alpha=0.7)
    plt.show()

show_edge(sc,X,y)

画像の説明を追加してください

ランダムフォレスト

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier(n_estimators=500)

show_edge(rfc,X,y)

画像の説明を追加してください

GBDT

from sklearn.ensemble import GradientBoostingClassifier

GBC = GradientBoostingClassifier(n_estimators=300)

show_edge(GBC, X,y)

画像の説明を追加してください

機械学習：スタッキングレビュー

おすすめ