Hi,
I am trying to use gpu to accelerate the training process. And here is some parts of my codes.
ray.init(
num_cpus=16,
num_gpus=1,
include_dashboard=False,
ignore_reinit_error=True,
log_to_driver=False,
)
and
config = (
PPOConfig()
.environment(env="mobile-medium-ma-v0")
.framework("torch")
.resources(num_cpus_per_worker=1, num_gpus_per_worker=1/16)
.rollouts(num_rollout_workers=15)
)
And it shows me the following bugs. I am wondering why?
Thanks for replying in advance.
2023-08-03 14:51:47,125 ERROR tune_controller.py:873 -- Trial task failed for trial PPO_mobile-medium-ma-v0_cd219_00000
Traceback (most recent call last):
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
result = ray.get(future)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\auto_init_hook.py", line 18, in auto_init_wrapper
return fn(*args, **kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\worker.py", line 2542, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=30316, ip=127.0.0.1, actor_id=c243566c84a3d139d0151cba01000000, repr=PPO)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 242, in _setup
self.add_workers(
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 635, in add_workers
raise result.get()
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\rllib\utils\actor_manager.py", line 488, in __fetch_result
result = ray.get(r)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\auto_init_hook.py", line 18, in auto_init_wrapper
return fn(*args, **kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\worker.py", line 2542, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=30360, ip=127.0.0.1, actor_id=440aa6f3c23f53fbb9850e2601000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001FF094645B0>)
File "python\ray\_raylet.pyx", line 1434, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1438, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1378, in ray._raylet.execute_task.function_executor
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\function_manager.py", line 724, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
return method(self, *_args, **_kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 715, in __init__
raise RuntimeError(
RuntimeError: Found 0 GPUs on your machine (GPU devices found: [])! If your
machine does not have any GPUs, you should set the config keys `num_gpus` and
`num_gpus_per_worker` to 0 (they may be set to 1 by default for your
particular RL algorithm).
To change the config for the `rllib train|rollout` command, use
`--config={'[key]': '[value]'}` on the command line.
To change the config for `tune.Tuner().fit()` in a script: Modify the python dict
passed to `tune.Tuner(param_space=[...]).fit()`.
To change the config for an RLlib Algorithm instance: Modify the python dict
passed to the Algorithm's constructor, e.g. `PPO(config=[...])`.
During handling of the above exception, another exception occurred:
ray::PPO.__init__() (pid=30316, ip=127.0.0.1, actor_id=c243566c84a3d139d0151cba01000000, repr=PPO)
File "python\ray\_raylet.pyx", line 1431, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1510, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1434, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1438, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1378, in ray._raylet.execute_task.function_executor
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\_private\function_manager.py", line 724, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
return method(self, *_args, **_kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 475, in __init__
super().__init__(
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\tune\trainable\trainable.py", line 170, in __init__
self.setup(copy.deepcopy(self.config))
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
return method(self, *_args, **_kwargs)
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 601, in setup
self.workers = WorkerSet(
File "c:\Users\18406\anaconda3\envs\rayenvtest\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 194, in __init__
raise e.args[0].args[2]
RuntimeError: Found 0 GPUs on your machine (GPU devices found: [])! If your
machine does not have any GPUs, you should set the config keys `num_gpus` and
`num_gpus_per_worker` to 0 (they may be set to 1 by default for your
particular RL algorithm).
To change the config for the `rllib train|rollout` command, use
`--config={'[key]': '[value]'}` on the command line.
To change the config for `tune.Tuner().fit()` in a script: Modify the python dict
passed to `tune.Tuner(param_space=[...]).fit()`.
To change the config for an RLlib Algorithm instance: Modify the python dict
passed to the Algorithm's constructor, e.g. `PPO(config=[...])`.