写了一个简单的调参脚本,作为程序入口:
from hyperopt import hp, STATUS_OK, Trials, fmin, tpe
import hyperopt
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn.datasets import load_iris
import numpy as np
iris=load_iris()
space = {
'C': hp.uniform('C', 0.001, 1000),
'shrinking': hp.choice('shrinking', [True, False]),
'kernel': hp.choice('kernel', [
{
'name': 'rbf',
'gamma': hp.choice('rbf_gamma', ['auto', hp.uniform('rbf_gamma_uniform',0.0001, 8)])
},
{
'name': 'linear',
},
{
'name': 'sigmoid',
'gamma': hp.choice('sigmoid_gamma', ['auto', hp.uniform('sigmoid_gamma_uniform',0.0001, 8)]),
'coef0': hp.uniform('sigmoid_coef0', 0, 10)
},
{
'name': 'poly',
'gamma': hp.choice('poly_gamma', ['auto', hp.uniform('poly_gamma_uniform',0.0001, 8)]),
'coef0': hp.uniform('poly_coef0', 0, 10),
'degree': hp.uniformint('poly_degree', 1, 5),
}
])
}
def svm_from_cfg(cfg):
kernel_cfg=cfg.pop('kernel')
kernel=kernel_cfg.pop('name')
cfg.update(kernel_cfg)
cfg['kernel']=kernel
clf=svm.SVC(**cfg)
scores = cross_val_score(clf, iris.data, iris.target, cv=5)
return {
'loss':1 - np.mean(scores),
'status':STATUS_OK
}
trials = Trials()
best = fmin(svm_from_cfg,
space=space,
algo=tpe.suggest,
max_evals=200,
trials=trials)
print(best)
注意到fmin
是程序入口。
这个fmin其实在hyperopt.base.Trials#fmin
调试,进入这个入口
有是一个fmin, 进入hyperopt.fmin.fmin
domain = base.Domain(fn, space, pass_expr_memo_ctrl=pass_expr_memo_ctrl)
domain是超参空间
domain.params
Out[4]:
{'C': <hyperopt.pyll.base.Apply at 0x7f9b75511940>,
'kernel': <hyperopt.pyll.base.Apply at 0x7f9b6529c1d0>,
'rbf_gamma': <hyperopt.pyll.base.Apply at 0x7f9b652946a0>,
...
注释很良心,这里应该是真正的优化算法执行入口
# next line is where the fmin is actually executed
rval.exhaust()
进入hyperopt.fmin.FMinIter#exhaust
进入hyperopt.fmin.FMinIter#run
# Based on existing trials and the domain, use `algo` to probe in
# new hp points. Save the results of those inspections into
# `new_trials`. This is the core of `run`, all the rest is just
# processes orchestration
new_trials = algo(
new_ids, self.domain, trials, self.rstate.randint(2 ** 31 - 1)
)
这里的algo值得是推荐算法(见下面的打印),trials我理解为历史记录,是TPE的学习资料。
domain是搜索空间。
algo
Out[10]:
<function hyperopt.tpe.suggest(new_ids, domain, trials, seed, prior_weight=1.0, n_startup_jobs=20, n_EI_candidates=24, gamma=0.25, verbose=True)>
再看到hyperopt/tpe.py:837
对于suggest
函数的定义:
def suggest(
new_ids,
domain,
trials,
seed,
prior_weight=_default_prior_weight,
n_startup_jobs=_default_n_startup_jobs,
n_EI_candidates=_default_n_EI_candidates,
gamma=_default_gamma,
verbose=True,
):
上面的algo
对象可能是一个偏函数(存疑)
先不管这些,进入suggest
函数内。
# use build_posterior_wrapper to create the pyll nodes
observed, observed_loss, posterior = build_posterior_wrapper(
domain, prior_weight, gamma
)
这里说nodes, 可以涉及到了树结构超参空间了,要留意。
def build_posterior_wrapper(domain, prior_weight, gamma):
prior_weight (float): smoothing factor for counts, to avoid having 0 prob
gamma (float): the threshold to split between l(x) and g(x), see eq. 2
gamma就是 γ \gamma γ
γ = p ( y < y ∗ ) \gamma=p(y < y^{*}) γ=p(y<y∗)
打印一下gamma,果然是上分位数0.25
,。
# -- these dummy values will be replaced in build_posterior() and never used
observed = {"idxs": pyll.Literal(), "vals": pyll.Literal()}
observed_loss = {"idxs": pyll.Literal(), "vals": pyll.Literal()}
observed
和observed_loss
对应的应该是论文中的观测值,分别是X和y。
进入hyperopt.tpe.build_posterior
。参数:
def build_posterior(
specs,
prior_idxs,
prior_vals,
obs_idxs,
obs_vals,
obs_loss_idxs,
obs_loss_vals,
oloss_gamma,
prior_weight,
):
注释:
This method clones a posterior inference graph by iterating forward in topological order, and replacing prior random-variables (prior_idxs, prior_vals) with new posterior distributions (post_specs, post_idxs, post_vals) that make use of observations (obs_idxs, obs_vals).
该方法通过按拓扑顺序向前迭代,用新的后验分布(post_specs, post_idxs, post_vals)代替先验随机变量(prior_idxs, prior_vals),利用观测值(obs_idxs, obs_vals)克隆后验推理图。
从注释中我们不难看出,prior_idxs
, prior_vals
是先验随机变量, obs_idxs
, obs_vals
,obs_loss_idxs
,obs_loss_vals
是观测。往下翻,返回值是return post_idxs, post_vals
,对应的是注释中说的后验分布。
expr = pyll.as_apply([specs, prior_idxs, prior_vals])
这里大概是将用python数据结构(dict,list,tuple)定义的东西转为他特定的数据结构。
nodes = pyll.dfs(expr)
打印一下,inputs()好像返回了什么不得了的东西。估计表示的是树的分支吧
expr.inputs()
Out[29]:
[<hyperopt.pyll.base.Apply at 0x7f9b93599cc0>,
<hyperopt.pyll.base.Apply at 0x7f9b6473d898>,
<hyperopt.pyll.base.Apply at 0x7f9b6473dac8>]
0%| | 0/200 [52:44<?, ?trial/s, best loss=?]
因为对于空间的定义,可以想象成一个树,有3个分支。
dfs是深度优先搜索算法,因为这个函数比较短,这里贴出完整代码:
def dfs(aa, seq=None, seqset=None):
if seq is None:
assert seqset is None
seq = []
seqset = {}
# -- seqset is the set of all nodes we have seen (which may be still on
# the stack)
# N.B. it used to be a stack, but now it's a dict mapping to inputs
# because that's an optimization saving us from having to call inputs
# so often.
if aa in seqset: # 这个结点已经看过了,退出
return
assert isinstance(aa, Apply)
seqset[aa] = aa.inputs() # 表示aa的分支
for ii in seqset[aa]:
dfs(ii, seq, seqset)
seq.append(aa)
return seq
In[36]: len(nodes)
Out[36]:
261
为啥这么多结点?存疑,待研究
# prior_vals:
# {'C': <hyperopt.pyll.base.Apply at 0x7f9b6529c978>,
# 'kernel': <hyperopt.pyll.base.Apply at 0x7f9b935a3160>,
for nid in prior_vals: # nid: C
# construct the leading args for each call to adaptive_parzen_sampler
# which will permit the "adaptive parzen samplers" to adapt to the
# correct samples.
obs_below, obs_above = scope.ap_split_trials(
obs_idxs[nid], obs_vals[nid], obs_loss_idxs, obs_loss_vals, oloss_gamma
)
进入hyperopt.tpe.ap_split_trials
@scope.define_info(o_len=2)
def ap_split_trials(o_idxs, o_vals, l_idxs, l_vals, gamma, gamma_cap=DEFAULT_LF):
"""Split the elements of `o_vals` (observations values) into two groups: those for
trials whose losses (`l_vals`) were above gamma, and those below gamma. Note that
only unique elements are returned, so the total number of returned elements might
be lower than `len(o_vals)`
"""
o_idxs, o_vals, l_idxs, l_vals = list(
map(np.asarray, [o_idxs, o_vals, l_idxs, l_vals])
)
# XXX if this is working, refactor this sort for efficiency
# Splitting is done this way to cope with duplicate loss values.
n_below = min(int(np.ceil(gamma * np.sqrt(len(l_vals)))), gamma_cap)
l_order = np.argsort(l_vals)
keep_idxs = set(l_idxs[l_order[:n_below]])
below = [v for i, v in zip(o_idxs, o_vals) if i in keep_idxs]
keep_idxs = set(l_idxs[l_order[n_below:]])
above = [v for i, v in zip(o_idxs, o_vals) if i in keep_idxs]
return np.asarray(below), np.asarray(above)