When I wrap my call in tune:
# same as above until the trainer constructor.
# ...
stop = {"timesteps_total": 50000}
results = tune.run(PPOTrainer, config=config2, stop=stop)
I either get the following error (when I specify num_gpus in the config to 1):
Failure # 1 (occurred at 2021-09-01_19-28-18)
Traceback (most recent call last):
File "D:\miniconda\envs\enigma\lib\site-packages\ray\tune\trial_runner.py", line 739, in _process_trial
results = self.trial_executor.fetch_result(trial)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\tune\ray_trial_executor.py", line 729, in fetch_result
result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\_private\client_mode_hook.py", line 82, in wrapper
return func(*args, **kwargs)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\worker.py", line 1566, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, e[36mray::PPO.__init__()e[39m (pid=37420, ip=192.168.86.99)
File "python\ray\_raylet.pyx", line 534, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 484, in ray._raylet.execute_task.function_executor
File "D:\miniconda\envs\enigma\lib\site-packages\ray\_private\function_manager.py", line 563, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\agents\trainer_template.py", line 123, in __init__
Trainer.__init__(self, config, env, logger_creator)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\agents\trainer.py", line 584, in __init__
super().__init__(config, logger_creator)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\tune\trainable.py", line 103, in __init__
self.setup(copy.deepcopy(self.config))
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\agents\trainer.py", line 731, in setup
self._init(self.config, self.env_creator)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\agents\trainer_template.py", line 152, in _init
num_workers=self.config["num_workers"])
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\agents\trainer.py", line 819, in _make_workers
logdir=self.logdir)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 86, in __init__
lambda p, pid: (pid, p.observation_space, p.action_space)))
File "D:\miniconda\envs\enigma\lib\site-packages\ray\_private\client_mode_hook.py", line 82, in wrapper
return func(*args, **kwargs)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\worker.py", line 1566, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, e[36mray::RolloutWorker.__init__()e[39m (pid=37272, ip=192.168.86.99)
File "python\ray\_raylet.pyx", line 534, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 484, in ray._raylet.execute_task.function_executor
File "D:\miniconda\envs\enigma\lib\site-packages\ray\_private\function_manager.py", line 563, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 557, in __init__
policy_dict, policy_config)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1342, in _build_policy_map
policy_map[name] = cls(obs_space, act_space, merged_conf)
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\policy\policy_template.py", line 267, in __init__
get_batch_divisibility_req=get_batch_divisibility_req,
File "D:\miniconda\envs\enigma\lib\site-packages\ray\rllib\policy\torch_policy.py", line 157, in __init__
self.device = self.devices[0]
IndexError: list index out of range
or it chooses not to use my gpu.