python 自动调参工具 hyperopt 教程

一、介绍

hyperopt 是一个自动调参工具，与 sklearn 的 GridSearchCV 相比，hyperopt 具有更加完善的功能，且模型不必符合 sklearn 接口规范。

1.1. 项目地址

1.2. 安装方法

pip install hyperopt

二、使用

2.1. 官方样例解读

# 定义一个需要优化的目标函数，比如可以封装成xgb的一个评估函数，以给定超参训练完xgb后，对回归问题返回 MAE 的值
def objective(args):
    case, val = args
    if case == 'case 1':
        return val
    else:
        return val ** 2

# 定义搜索空间
from hyperopt import hp
space = hp.choice('a',
    [
        ('case 1', 1 + hp.lognormal('c1', 0, 1)),
        ('case 2', hp.uniform('c2', -10, 10))
    ])

# 在搜索空间内进线搜索，去最小化目标函数（也可以最大化，见官方文档）
from hyperopt import fmin, tpe, space_eval
best = fmin(objective, space, algo=tpe.suggest, max_evals=100)

print(best)
# -> {'a': 1, 'c2': 0.01420615366247227}
print(space_eval(space, best))
# -> ('case 2', 0.01420615366247227}

2.2. 搜索空间设置

hp.choice(label,[options])
- 返回一个选项，选项可以是list或者tuple.options可以是嵌套的表达式，用于组成条件参数。
hp.pchoice(label,p_options)
- 以一定的概率返回一个p_options的一个选项。这个选项使得函数在搜索过程中对每个选项的可能性不均匀。
hp.uniform(label,low,high)
- 参数在low和high之间均匀分布。
hp.quniform(label,low,high,q)
- 参数的取值是round(uniform(low,high)/q)*q，适用于那些离散的取值。
hp.loguniform(label,low,high)
- 绘制exp(uniform(low,high)),变量的取值范围是[exp(low),exp(high)]
hp.randint(label,upper)
- 返回一个在[0,upper)前闭后开的区间内的随机整数。

2.3. XGBoost Hyperopt Demo

代码样例来自《python调参神器hyperopt》

#coding:utf-8
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
from random import shuffle
from xgboost.sklearn import XGBClassifier
from sklearn.cross_validation import cross_val_score
import pickle
import time
from hyperopt import fmin, tpe, hp,space_eval,rand,Trials,partial,STATUS_OK

def loadFile(fileName = "E://zalei//browsetop200Pca.csv"):
    data = pd.read_csv(fileName,header=None)
    data = data.values
    return data

data = loadFile()
label = data[:,-1]
attrs = data[:,:-1]
labels = label.reshape((1,-1))
label = labels.tolist()[0]

minmaxscaler = MinMaxScaler()
attrs = minmaxscaler.fit_transform(attrs)

index = range(0,len(label))
shuffle(index)
trainIndex = index[:int(len(label)*0.7)]
print len(trainIndex)
testIndex = index[int(len(label)*0.7):]
print len(testIndex)
attr_train = attrs[trainIndex,:]
print attr_train.shape
attr_test = attrs[testIndex,:]
print attr_test.shape
label_train = labels[:,trainIndex].tolist()[0]
print len(label_train)
label_test = labels[:,testIndex].tolist()[0]
print len(label_test)
print np.mat(label_train).reshape((-1,1)).shape


def GBM(argsDict):
    max_depth = argsDict["max_depth"] + 5
    n_estimators = argsDict['n_estimators'] * 5 + 50
    learning_rate = argsDict["learning_rate"] * 0.02 + 0.05
    subsample = argsDict["subsample"] * 0.1 + 0.7
    min_child_weight = argsDict["min_child_weight"]+1
    print "max_depth:" + str(max_depth)
    print "n_estimator:" + str(n_estimators)
    print "learning_rate:" + str(learning_rate)
    print "subsample:" + str(subsample)
    print "min_child_weight:" + str(min_child_weight)
    global attr_train,label_train

    gbm = xgb.XGBClassifier(nthread=4,    #进程数
                            max_depth=max_depth,  #最大深度
                            n_estimators=n_estimators,   #树的数量
                            learning_rate=learning_rate, #学习率
                            subsample=subsample,      #采样数
                            min_child_weight=min_child_weight,   #孩子数
                            max_delta_step = 10,  #10步不降则停止
                            objective="binary:logistic")

    metric = cross_val_score(gbm,attr_train,label_train,cv=5,scoring="roc_auc").mean()
    print metric
    return -metric

space = {"max_depth":hp.randint("max_depth",15),
         "n_estimators":hp.randint("n_estimators",10),  #[0,1,2,3,4,5] -> [50,]
         "learning_rate":hp.randint("learning_rate",6),  #[0,1,2,3,4,5] -> 0.05,0.06
         "subsample":hp.randint("subsample",4),#[0,1,2,3] -> [0.7,0.8,0.9,1.0]
         "min_child_weight":hp.randint("min_child_weight",5), #
        }
algo = partial(tpe.suggest,n_startup_jobs=1)
best = fmin(GBM,space,algo=algo,max_evals=4)#max_evals表示想要训练的最大模型数量，越大越容易找到最优解

print best
print GBM(best)

三、调参经验

一般来说，算法模型会对重要的超参进行适度搜索。比如xgb的learning_rate，最大深度，损失函数等。

为什么要做超参搜索？我认为对于“炼丹”新人，或缺乏足够数据了解的情况，这类工具可以帮忙规避非常不合理的参数设置。

但特征决定了 " \(天花板_1\) "，模型结构决定了 " \(天花板_2\) "，调参只能在训练集内逼近 " \(天花板_3\) "，调参实在落了下乘。所以像是 hyperopt 这类自动调参工具，不一定仅仅对learning_rate这种超参做搜索，比如在 cnn 中我们可以对模型结构做搜索，可能会有更高的提升空间。其实这也是近几年很火的 NAS 方向 (Neural Architecture Search)。