Hello - I am new to the Ray, I created the train_set = RayDMatrix(ray_dataset, "rz_flag")
and passed it
bst = train(
params= xgboost_params | hyper_params,
dtrain=train_set,
evals=[(train_set, "train")],
evals_result=evals_result,
ray_params=RayParams(
cpus_per_actor=15,
num_actors=10,
),
verbose_eval=False,
num_boost_round=10,
# callbacks = [TqdmCallback(10)]
)
training process goes fine but I get the following error in prediction. What is the issue here? Creating a RayDMatrix from ray_dataset? ray_dataset is created using ray.data.read_parquet()
pred_set = RayDMatrix(data = ray_dataset,
label = "rz_flag",
)
bst = xgb.Booster(model_file="model.xgb")
pred_ray = predict(bst, pred_set, ray_params=RayParams(num_actors=10, cpus_per_actor=15))
---------------------------------------------------------------------------
RayTaskError(TypeError) Traceback (most recent call last)
File <command-4498259778197045>:9
4 pred_set = RayDMatrix(data = ray_dataset,
5 label = "rz_flag",
6 )
8 bst = xgb.Booster(model_file="model.xgb")
----> 9 pred_ray = predict(bst, pred_set, ray_params=RayParams(num_actors=10, cpus_per_actor=15))
11 print(pred_ray)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/main.py:1850, in predict(model, data, ray_params, _remote, **kwargs)
1848 while tries <= max_actor_restarts:
1849 try:
-> 1850 return _predict(model, data, ray_params=ray_params, **kwargs)
1851 except RayActorError:
1852 if tries + 1 <= max_actor_restarts:
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/main.py:1752, in _predict(model, data, ray_params, **kwargs)
1749 wait_load.extend(_trigger_data_load(actor, data, []))
1751 try:
-> 1752 ray.get(wait_load)
1753 except Exception as exc:
1754 logger.warning(f"Caught an error during prediction: {str(exc)}")
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/ray/_private/auto_init_hook.py:18, in wrap_auto_init.<locals>.auto_init_wrapper(*args, **kwargs)
15 @wraps(fn)
16 def auto_init_wrapper(*args, **kwargs):
17 auto_init_ray()
---> 18 return fn(*args, **kwargs)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/ray/_private/client_mode_hook.py:103, in client_mode_hook.<locals>.wrapper(*args, **kwargs)
101 if func.__name__ != "init" or is_client_mode_enabled_by_default:
102 return getattr(ray, func.__name__)(*args, **kwargs)
--> 103 return func(*args, **kwargs)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/ray/_private/worker.py:2540, in get(object_refs, timeout)
2538 worker.core_worker.dump_object_store_memory_usage()
2539 if isinstance(value, RayTaskError):
-> 2540 raise value.as_instanceof_cause()
2541 else:
2542 raise value
RayTaskError(TypeError): ray::_RemoteRayXGBoostActor.load_data() (pid=22141, ip=10.24.105.11, actor_id=5a1eaafcaa0216f3622eb4e302000000, repr=<xgboost_ray.main._RemoteRayXGBoostActor object at 0x7fafe8fe6d30>)
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/main.py", line 639, in load_data
param = data.get_data(self.rank, self.num_actors)
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/matrix.py", line 928, in get_data
self.load_data(num_actors=num_actors, rank=rank)
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/matrix.py", line 913, in load_data
refs, self.n = self.loader.load_data(
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/matrix.py", line 651, in load_data
local_df = data_source.load_data(
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/data_sources/ray_dataset.py", line 61, in load_data
data = [data[i] for i in indices]
File "/local_disk0/.ephemeral_nfs/envs/pythonEnv-9a4cb759-9e6b-43cf-ab41-7ce3b0f8e0cd/lib/python3.9/site-packages/xgboost_ray/data_sources/ray_dataset.py", line 61, in <listcomp>
data = [data[i] for i in indices]
TypeError: 'Dataset' object is not subscriptable