How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I’m getting a similar error.
config = {
'max_depth': 6,
'gamma': 5,
'reg_alpha': 5,
'reg_lambda': 1,
'colsample_bytree': 0.7,
'min_child_weight': 2.1,
'eval_metric': 'rmse',
'n_estimators': 180,
'early_stopping_rounds': 30
}
class ParquetToRayDMatrixDataLoader:
def load_data(self, path: str, label_prefix: str = DEFAULT_LABEL_PREFIX) -> RayDMatrix:
logger.info(f"Loading data from parquet to RayDMatrix: {path}")
# This is required since we need to give label column name to RayDMatrix
ds = ray.data.read_parquet(path)
label_cols = self.get_labels_by_prefix(columns=ds.schema().names, label_prefix=label_prefix)
logger.info(f"Found label columns: {label_cols}, type of label_cols: {type(label_cols)}, length: {len(label_cols)}")
return RayDMatrix(path, label= label_cols, filetype=RayFileType.PARQUET)
@staticmethod
def get_labels_by_prefix(columns: list, label_prefix: str) -> list:
return [col for col in columns if col.startswith(label_prefix)]
data = ParquetToRayDMatrixDataLoader().load_data("my_gcp_path", label_prefix='residual')
result = {}
model = train(params=config, dtrain=data, early_stopping_rounds=int(config['early_stopping_rounds']),
num_boost_round=int(config['n_estimators']), evals=[(data, 'train')],
evals_result=result, verbose_eval=True, ray_params=RayParams(num_actors=2, cpus_per_actor=1))
print(model)
And I’m getting:
e[2me[36m(TrainingPipeline pid=75, ip=10.52.10.3)e[0m INFO:ray_job_submitter.data_loaders.data_loader:Found label columns: ['residual'], type of label_cols: <class 'list'>, length: 1
Traceback (most recent call last):
File "/tmp/ray/session_2023-07-13_02-47-54_417851_8/runtime_resources/working_dir_files/_ray_pkg_098209d84788cc6b/./ray_job_submitter/ray_driver.py", line 65, in <module>
run_remote()
File "/home/ray/anaconda3/lib/python3.10/site-packages/click/core.py", line 1130, in __call__
return self.main(*args, **kwargs)
File "/home/ray/anaconda3/lib/python3.10/site-packages/click/core.py", line 1055, in main
rv = self.invoke(ctx)
File "/home/ray/anaconda3/lib/python3.10/site-packages/click/core.py", line 1404, in invoke
return ctx.invoke(self.callback, **ctx.params)
File "/home/ray/anaconda3/lib/python3.10/site-packages/click/core.py", line 760, in invoke
return __callback(*args, **kwargs)
File "/tmp/ray/session_2023-07-13_02-47-54_417851_8/runtime_resources/working_dir_files/_ray_pkg_098209d84788cc6b/./ray_job_submitter/ray_driver.py", line 58, in run_remote
metadata = ray.get(training_result_ref)
File "/home/ray/anaconda3/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 18, in auto_init_wrapper
return fn(*args, **kwargs)
File "/home/ray/anaconda3/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "/home/ray/anaconda3/lib/python3.10/site-packages/ray/_private/worker.py", line 2540, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(IndexError): e[36mray::TrainingPipeline.start_training()e[39m (pid=75, ip=10.52.10.3, actor_id=fdaa8f2f48a604b31916c62c02000000, repr=<ewx_training.training.TrainingPipeline object at 0x7f972b6054b0>)
File "/home/ray/anaconda3/lib/python3.10/site-packages/pandas/core/series.py", line 976, in _take_with_is_copy
return self.take(indices=indices, axis=axis)
File "/home/ray/anaconda3/lib/python3.10/site-packages/pandas/core/series.py", line 961, in take
new_index = self.index.take(indices)
File "/home/ray/anaconda3/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 1090, in take
taken = algos.take(
File "/home/ray/anaconda3/lib/python3.10/site-packages/pandas/core/algorithms.py", line 1257, in take
result = arr.take(indices, axis=axis)
IndexError: index 2 is out of bounds for axis 0 with size 1
The above exception was the direct cause of the following exception:
e[36mray::TrainingPipeline.start_training()e[39m (pid=75, ip=10.52.10.3, actor_id=fdaa8f2f48a604b31916c62c02000000, repr=<ewx_training.training.TrainingPipeline object at 0x7f972b6054b0>)
File "/app/src/ewx-training/ewx_training/training.py", line 216, in start_training
trained_model: Any = self.trainer.train_loop(model, df)
File "/tmp/ray/session_2023-07-13_02-47-54_417851_8/runtime_resources/working_dir_files/_ray_pkg_098209d84788cc6b/ray_job_submitter/custom_trainer/main.py", line 57, in train_loop
best_model, result = new_model.single_xgb_train(self.config, model, train_func=self.train_func)
File "/tmp/ray/session_2023-07-13_02-47-54_417851_8/runtime_resources/working_dir_files/_ray_pkg_098209d84788cc6b/ray_job_submitter/custom_trainer/main.py", line 15, in single_xgb_train
xgb_reg_model = train_func(param, self.dtrain, early_stopping_rounds=int(param['early_stopping_rounds']),
File "/home/ray/anaconda3/lib/python3.10/site-packages/xgboost_ray/main.py", line 1525, in train
dtrain.load_data(ray_params.num_actors)
File "/home/ray/anaconda3/lib/python3.10/site-packages/xgboost_ray/matrix.py", line 913, in load_data
refs, self.n = self.loader.load_data(
File "/home/ray/anaconda3/lib/python3.10/site-packages/xgboost_ray/matrix.py", line 460, in load_data
"label": ray.put(y.iloc[indices] if y is not None else None),
File "/home/ray/anaconda3/lib/python3.10/site-packages/pandas/core/indexing.py", line 1103, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/home/ray/anaconda3/lib/python3.10/site-packages/pandas/core/indexing.py", line 1647, in _getitem_axis
return self._get_list_axis(key, axis=axis)
File "/home/ray/anaconda3/lib/python3.10/site-packages/pandas/core/indexing.py", line 1621, in _get_list_axis
raise IndexError("positional indexers are out-of-bounds") from err
IndexError: positional indexers are out-of-bounds
Although right now I only have one label column, so the label_cols is just [‘residual’], this issue is fixed if I hard code the label to be a single string, instead of a list… Does it mean that xgboost ray doesn’t support multi output models?
Seems related to : Xgboost_ray crashes when used for multiclass text classification - #5 by Y_C