when i verify the Ray example as below there still the error. Is there anybody to have some idea? thanks.
RayTaskError(ValueError) Traceback (most recent call last)
/tmp/ipykernel_46448/1110740239.py in
8
9 # Train the classifier
—> 10 bst = train(
11 params=xgboost_params,
12 dtrain=train_set,
/opt/conda/lib/python3.9/site-packages/xgboost_ray/main.py in train(params, dtrain, num_boost_round, evals, evals_result, additional_results, ray_params, _remote, *args, **kwargs)
1284 _wrapped = force_on_current_node(_wrapped)
1285
→ 1286 bst, train_evals_result, train_additional_results = ray.get(
1287 _wrapped.remote(
1288 params,
/opt/conda/lib/python3.9/site-packages/ray/_private/client_mode_hook.py in wrapper(*args, **kwargs)
102 # we only convert init function if RAY_CLIENT_MODE=1
103 if func.name != “init” or is_client_mode_enabled_by_default:
→ 104 return getattr(ray, func.name)(*args, **kwargs)
105 return func(*args, **kwargs)
106
/opt/conda/lib/python3.9/site-packages/ray/util/client/api.py in get(self, vals, timeout)
42 timeout: Optional timeout in milliseconds
43 “”"
—> 44 return self.worker.get(vals, timeout=timeout)
45
46 def put(self, *args, **kwargs):
/opt/conda/lib/python3.9/site-packages/ray/util/client/worker.py in get(self, vals, timeout)
436 op_timeout = max_blocking_operation_time
437 try:
→ 438 res = self._get(to_get, op_timeout)
439 break
440 except GetTimeoutError:
/opt/conda/lib/python3.9/site-packages/ray/util/client/worker.py in _get(self, ref, timeout)
464 logger.exception(“Failed to deserialize {}”.format(chunk.error))
465 raise
→ 466 raise err
467 if chunk.total_size > OBJECT_TRANSFER_WARNING_SIZE and log_once(
468 “client_object_transfer_size_warning”
RayTaskError(ValueError): ray::_wrapped() (pid=3279436, ip=192.168.156.43)
File “/opt/conda/lib/python3.9/site-packages/xgboost_ray/main.py”, line 1275, in _wrapped
File “/tmp/ray/session_2022-08-23_22-10-24_470493_112/runtime_resources/pip/a8e57680f27af79b38868e663e15b85d89590602/virtualenv/lib/python3.9/site-packages/xgboost_ray/main.py”, line 1453, in train
bst, train_evals_result, train_additional_results = _train(
File “/tmp/ray/session_2022-08-23_22-10-24_470493_112/runtime_resources/pip/a8e57680f27af79b38868e663e15b85d89590602/virtualenv/lib/python3.9/site-packages/xgboost_ray/main.py”, line 1011, in _train
dtrain.assert_enough_shards_for_actors(num_actors=ray_params.num_actors)
File “/tmp/ray/session_2022-08-23_22-10-24_470493_112/runtime_resources/pip/a8e57680f27af79b38868e663e15b85d89590602/virtualenv/lib/python3.9/site-packages/xgboost_ray/matrix.py”, line 748, in assert_enough_shards_for_actors
self.loader.assert_enough_shards_for_actors(num_actors=num_actors)
File “/tmp/ray/session_2022-08-23_22-10-24_470493_112/runtime_resources/pip/a8e57680f27af79b38868e663e15b85d89590602/virtualenv/lib/python3.9/site-packages/xgboost_ray/matrix.py”, line 450, in assert_enough_shards_for_actors
data_source = self.get_data_source()
File “/tmp/ray/session_2022-08-23_22-10-24_470493_112/runtime_resources/pip/a8e57680f27af79b38868e663e15b85d89590602/virtualenv/lib/python3.9/site-packages/xgboost_ray/matrix.py”, line 436, in get_data_source
raise ValueError(
ValueError: Invalid data source type: <class ‘modin.pandas.dataframe.DataFrame’> with FileType: None for a distributed dataset.
FIX THIS by passing a supported data type. Supported data types for distributed datasets are a list of CSV or Parquet sources. If using Modin, Dask, or Petastorm, make sure the library is installed.