ValueError: ctypes objects containing pointers cannot be pickled

Hi!
I’m trying to use Tune to search the optimal combination of parameters in a model. When I run Tune the following message appear in the terminal:

2022-08-04 14:11:20,580 INFO services.py:1470 -- View the Ray dashboard at http://127.0.0.1:8265
2022-08-04 14:11:23,292 WARNING function_runner.py:603 -- Function checkpointing is disabled. This may result in unexpected behavior when using checkpointing features or certain schedulers. To enable, set the train function arguments to be `func(config, checkpoint_dir=None)`.
Traceback (most recent call last):
  File "e:\Usuario\Cliope\Documents\GitHub\EP_model_validation_program\EnergyPlusModelValidation_v3.py", line 165, in <module>   
    analysis = tune.run(
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\tune\tune.py", line 515, in run
    experiments[i] = Experiment(
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\tune\experiment.py", line 164, in __init__ 
    self._run_identifier = Experiment.register_if_needed(run)
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\tune\experiment.py", line 353, in register_if_needed
    register_trainable(name, run_object)
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\tune\registry.py", line 96, in register_trainable
    _global_registry.register(TRAINABLE_CLASS, name, trainable)
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\tune\registry.py", line 178, in register   
    self._to_flush[(category, key)] = pickle.dumps_debug(value)
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\cloudpickle\__init__.py", line 39, in dumps_debug
    return dumps(obj, *args, **kwargs)
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\cloudpickle\cloudpickle_fast.py", line 73, 
in dumps
    cp.dump(obj)
  File "C:\Users\Usuario\AppData\Local\Programs\Python\Python39\lib\site-packages\ray\cloudpickle\cloudpickle_fast.py", line 620, in dump
    return Pickler.dump(self, obj)
ValueError: ctypes objects containing pointers cannot be pickled

A simplefied version of my code is as follow:

from ray import tune
import numpy as np
import funciones as fn

def function(config, config_var):
    # ================================ #
    def exchange_function():
        # temperature calculation from simulator
        Ti = simulator_exchange(get_temperature)
        # append the temperature value
        variables['Ti'].append(Ti)
    # ================================ #
   # define a dict with variables
    variables = {
        "Ti": []
    }
    # configure new hyperparameters to input in the simulator
    config_var['epJSON_file'] = fn.parameter_variation(config_var['epJSON_file'], config['a'])
    # calculate variables in a configuration of hyperparameters
    while True:
       exchange_function()
    # transform the list in an numpy array
    Ts = np.array(variables['Ti'])
    # load an external file with information
    Tm_cocina = np.loadtxt(config_var['real_temperature'] + "anual.txt")
   # calculate the erros between simulated and measured variables
    errors = fn.Errors(Tm,Ts)
    # return the R2
    return {"R2": errors[3]}

if __name__ == '__main__':
    
    config_var = {
        "epJSON_file": "a path"
    }
    search_space = {    
        'a': tune.quniform(0.2, 0.9, 0.05)
    }

    # configure how checkpoints are sync'd to the scheduler/sampler
    sync_config = tune.SyncConfig()  # the default mode is to use use rsync

    analysis = tune.run(
        tune.with_parameters(RunEP, config_var=config_var),
        stop={"training_iteration": 1},
        config=search_space,
        # name of your experiment
        name="experiment_2022-08-04",
        # a directory where results are stored before being
        # sync'd to head node/cloud storage
        local_dir="my path for results",
        # sync our checkpoints via rsync
        # you don't have to pass an empty sync config - but we
        # do it here for clarity and comparison
        sync_config=sync_config,
        metric="R2",
        mode="max"
    )

    print("Best config is:", analysis.best_config)

Anyone knows why this code don’t work?
Thanks in advance!!

The error is because Ray encounters error when trying to pickle the function and launch it in a Ray actor.
Browsing through function, I assume the problem is with how you simulate temperature? Can you say more about what you are trying to do?
Thanks!!

I received the same error attempting to tune an xgboost model (omitting imports and data prep):

train_dm = xgb.QuantileDMatrix(
    train_df.iloc[:train_cutoff, 9:-2].to_numpy(dtype="float16"),
    label=train_df.iloc[:train_cutoff, 8].to_numpy(dtype="int")
)

val_dm = xgb.QuantileDMatrix(
    train_df.iloc[train_cutoff:, 9:-2].to_numpy(dtype="float16"),
    label=train_df.iloc[train_cutoff:, 8].to_numpy(dtype="int"),
    ref=train_dm
)

def train_xgb(config, train_dm=None, val_dm=None):

    results = {}

    # gamma regression model
    model = xgb.train(
        params={
            "objective": "reg:gamma",
            "tree_method": "hist",
            "booster": "gbtree",
            "eval_metric": "gamma-deviance",
            **config
        },
        dtrain=train_dm,
        num_boost_round=max_trees,
        evals=[(train_dm, "train"), (val_dm, "validation")],
        evals_result=results,
        early_stopping_rounds=20
    )

    # report final score to tune
    session.report({
        "gamma-deviance": model.best_score,
        "iterations": model.best_iteration
    })

    return {"gamma-deviance": model.best_score}

search_space = {
    "learning_rate": tune.loguniform(1e-2, 5e-1),
    "max_depth": tune.randint(4, 64),
    "subsample": tune.quniform(0.5, 1.0, 0.01),
    "colsample_bytree": tune.quniform(0.5, 1.0, 0.01),
    "colsample_bylevel": tune.quniform(0.5, 1.0, 0.01),
}

algo = HyperOptSearch()
scheduler = AsyncHyperBandScheduler()

tuner = tune.Tuner(
    tune.with_parameters(train_xgb, train_dm=train_dm, val_dm=val_dm),
    param_space=search_space,
    tune_config=tune.TuneConfig(
        metric="gamma-deviance",
        mode="min",
        search_alg=algo,
        scheduler=scheduler,
        num_samples=num_samples,
        max_concurrent_trials=n_jobs
    )
)

Given numerous examples in the docs, I would assume the model function would be serializable… am I missing something? Or does it have something to do with trying to use QuantileDMatrix datasets in the object store? Any insight is appreciated!