Model selection

通常机器学习最难的是选择合适的estimator,不同的estimator适用于不同的数据集和问题



def gen_estimators():
    '''
    List of the different estimators.
    '''
    estimators = [
        ('Lasso regression',linear_model.Lasso(alpha=0.1),True),
        ('Ridge regression',linear_model.Ridge(alpha=0.1),True),
        ('Hinge regression',linear_model.Hinge(),True),
        ('Lassolars regression',linear_model.LassoLars(alpha=0.1),True),
        ('OrthogonalMatchingPursuitCV regression',linear_model.OrthogonalMatchingPursuitCV(),True),
        ('BayesianRidge regression',linear_model.BayesianRidge(),True),
        ('PassiveAggressiveRegressor regression',linear_model.PassiveAggressiveRegressor(),True),
        ('HuberRegressor regression',linear_model.Huber(),True),
        ('LogisticRegression regression',linear_model.LogisticRegression(),True),
    ]
    return estimators
遍历,观察得分
def cross_validate():
    for name,clf,flag in gen_estimators():
        x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.4,random_state=0)
        clf.fit(x_train,y_train)
        print(name,'\n',clf.coef_)
        scores = cross_val_score(clf,X,y,cv=5,scoring='roc_aus')
        print(scores)

scikit-learn交叉验证
hold测试:训练集和测试集分割

from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split
iris = load_iris()
X = iris.data
y = iris.target

x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.4,random_state=0) #随机数种子,没有设置,得到的划分每次都不一样
print(y_train)
print(y_test)


猜你喜欢

转载自blog.csdn.net/lengxiaomo123/article/details/68924759