Python 和 HyperOpt : How to make multi-process grid searching?-6ren

Python 和 HyperOpt : How to make multi-process grid searching?

转载作者：行者123 更新时间：2023-11-30 08:36:04

我正在尝试调整一些参数，但搜索空间非常大。到目前为止我有 5 个维度，它可能会增加到大约 10 个。问题是，我认为如果我能弄清楚如何对其进行多重处理，我可以获得显着的加速，但我找不到任何好的方法它。我正在使用 hyperopt，但我不知道如何让它使用超过 1 个核心。这是我的代码，没有所有不相关的内容:

from numpy    import random
from pandas   import DataFrame
from hyperopt import fmin, tpe, hp, Trials





def calc_result(x):

    huge_df = DataFrame(random.randn(100000, 5), columns=['A', 'B', 'C', 'D', 'E'])

    total = 0

    # Assume that I MUST iterate
    for idx_and_row in huge_df.iterrows():
        idx = idx_and_row[0]
        row = idx_and_row[1]


        # Assume there is no way to optimize here
        curr_sum = row['A'] * x['adjustment_1'] + \
                   row['B'] * x['adjustment_2'] + \
                   row['C'] * x['adjustment_3'] + \
                   row['D'] * x['adjustment_4'] + \
                   row['E'] * x['adjustment_5']


        total += curr_sum

    # In real life I want the total as high as possible, but for the minimizer, it has to negative a negative value
    total_as_neg = total * -1

    print(total_as_neg)

    return total_as_neg


space = {'adjustment_1': hp.quniform('adjustment_1', 0, 1, 0.001),
         'adjustment_2': hp.quniform('adjustment_2', 0, 1, 0.001),
         'adjustment_3': hp.quniform('adjustment_3', 0, 1, 0.001),
         'adjustment_4': hp.quniform('adjustment_4', 0, 1, 0.001),
         'adjustment_5': hp.quniform('adjustment_5', 0, 1, 0.001)}

trials = Trials()

best = fmin(fn        = calc_result,
            space     = space,
            algo      = tpe.suggest,
            max_evals = 20000,
            trials    = trials)

到目前为止，我有 4 个核心，但基本上我需要多少就可以得到多少。如何让 hyperopt 使用超过 1 个核心，或者是否有一个可以多进程的库？

最佳答案

如果您有 Mac 或 Linux(或 Windows Linux 子系统)，则可以添加大约 10 行代码来与 ray 并行执行此操作。如果您通过 latest wheels here 安装 ray ，然后您可以通过最少的修改运行脚本，如下所示，以使用 HyperOpt 进行并行/分布式网格搜索。在较高级别上，它使用 tpe.suggest 运行 fmin 并以并行方式在内部创建 Trials 对象。

from numpy    import random
from pandas   import DataFrame
from hyperopt import fmin, tpe, hp, Trials


def calc_result(x, reporter):  # add a reporter param here

    huge_df = DataFrame(random.randn(100000, 5), columns=['A', 'B', 'C', 'D', 'E'])

    total = 0

    # Assume that I MUST iterate
    for idx_and_row in huge_df.iterrows():
        idx = idx_and_row[0]
        row = idx_and_row[1]


        # Assume there is no way to optimize here
        curr_sum = row['A'] * x['adjustment_1'] + \
                   row['B'] * x['adjustment_2'] + \
                   row['C'] * x['adjustment_3'] + \
                   row['D'] * x['adjustment_4'] + \
                   row['E'] * x['adjustment_5']


        total += curr_sum

    # In real life I want the total as high as possible, but for the minimizer, it has to negative a negative value
    # total_as_neg = total * -1

    # print(total_as_neg)

    # Ray will negate this by itself to feed into HyperOpt
    reporter(timesteps_total=1, episode_reward_mean=total)

    return total_as_neg


space = {'adjustment_1': hp.quniform('adjustment_1', 0, 1, 0.001),
         'adjustment_2': hp.quniform('adjustment_2', 0, 1, 0.001),
         'adjustment_3': hp.quniform('adjustment_3', 0, 1, 0.001),
         'adjustment_4': hp.quniform('adjustment_4', 0, 1, 0.001),
         'adjustment_5': hp.quniform('adjustment_5', 0, 1, 0.001)}

import ray
import ray.tune as tune
from ray.tune.hpo_scheduler import HyperOptScheduler

ray.init()
tune.register_trainable("calc_result", calc_result)
tune.run_experiments({"experiment": {
    "run": "calc_result",
    "repeat": 20000,
    "config": {"space": space}}}, scheduler=HyperOptScheduler())