Hi there,
I have been attempting to train a binary classification model using Ray Tune, XGBoost with HyperOpt hyperparameter search. When I begin the parameter search, the system quickly returns an error:
Traceback (most recent call last):
File "help_train.py", line 68, in <module>
tune_search = tune_xgboost()
File "help_train.py", line 52, in tune_xgboost
tune_search = tune.run(
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\tune\tune.py", line 394, in run
experiments[i] = Experiment(
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\tune\experiment.py", line 152, in __init__
self._run_identifier = Experiment.register_if_needed(run)
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\tune\experiment.py", line 298, in register_if_needed
register_trainable(name, run_object)
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\tune\registry.py", line 73, in register_trainable
_global_registry.register(TRAINABLE_CLASS, name, trainable)
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\tune\registry.py", line 145, in register
self._to_flush[(category, key)] = pickle.dumps_debug(value)
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\cloudpickle\__init__.py", line 39, in dumps_debug
return dumps(obj, *args, **kwargs)
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\cloudpickle\cloudpickle_fast.py", line 73, in dumps
cp.dump(obj)
File "C:\Users\Joe\anaconda3\lib\site-packages\ray\cloudpickle\cloudpickle_fast.py", line 580, in dump
return Pickler.dump(self, obj)
ValueError: ctypes objects containing pointers cannot be pickled
This is a miniture recreated example.
The data I use to recreate this issue can be found here:
https://easyupload.io/m/41oeo1 up until 11 September 2021
If you would like to access this data after this date, please msg me.
System versions:
Windows 10 Pro
Python - 3.8.5
Hyperopt - 0.2.5
XGBoost - 1.3.3
Ray - 1.5.1
Script: (ran from cmd line)
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.suggest.hyperopt import HyperOptSearch
import xgboost as xgb
import pandas as pd
import os
from datetime import datetime
# download data from here: https://easyupload.io/m/41oeo1 up until 11 September 2021
print('Importing Data:')
X_train = pd.read_excel('X_train.xlsx').to_numpy()
y_train = pd.read_excel('y_train.xlsx').to_numpy()
print('Completed importing data:')
train_set = xgb.DMatrix(X_train, label=y_train)
RANDOMSTATE = 42
def train_test_xgb(config):
# run cross validation
cv_results = xgb.cv(
config,
train_set,
num_boost_round=150,
seed=RANDOMSTATE,
nfold=4,
metrics='rmse',
early_stopping_rounds=30,
)
score = cv_results['test-rmse-mean'].min()
result = {
'eval' : {'rmse' : score}
}
return result
def tune_xgboost():
search_space = {
'learning_rate' : [0.1, 0.15, 0.2, 0.25, 0.3, 0.4],
'max_depth' : [4, 5, 6, 7, 8]
}
# This will enable aggressive early stopping of bad trials.
scheduler = ASHAScheduler(
max_t=10, # 10 training iterations
grace_period=1,
reduction_factor=2)
algo=HyperOptSearch(random_state_seed=RANDOMSTATE)
tune_search = tune.run(
train_test_xgb,
metric="eval-rmse",
mode="min",
# You can add "gpu": 0.1 to allocate GPUs
# resources_per_trial={"cpu": 1, "gpu" : 0.1},
config=search_space,
num_samples=10,
scheduler=scheduler,
search_alg=algo,
verbose=1)
return tune_search
if __name__ == "__main__":
print('\n\n\n\n===============\nSearch started at {0}!\n\n\n\n================='.format(datetime.now()))
tune_search = tune_xgboost()
print('\n\n\n\n===============\nCompleted search at {0}!\n\n\n\n================='.format(datetime.now()))
Is there some issue with the way I have setup my search?
Are you able to replicate this error?
How can it be resolved?
Thank you very much for your help
Joe