Hi everyone,
I defined a custom environment and trained a multi-agent PPO with ray.Tune. It trained okay without any errors. However, when I change PPO to APPO and IMPALA (both config and in tune trainable), I get this error below after training for one or two episodes.
ERROR tune_controller.py:1374 -- Trial task failed for trial APPO_multiAgent_env_77c02_00000
Traceback (most recent call last):
File "/python3.8/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
result = ray.get(future)
File "python3.8/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
File "python3.8/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "python3.8/site-packages/ray/_private/worker.py", line 2624, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): ray::APPO.train() (pid=11259, ip=127.0.0.1, actor_id=fe74a48b015ee2ab21d5abb001000000, repr=APPO)
File "python3.8/site-packages/ray/tune/trainable/trainable.py", line 342, in train
raise skipped from exception_cause(skipped)
File "python3.8/site-packages/ray/tune/trainable/trainable.py", line 339, in train
result = self.step()
File "python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 852, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 3042, in _run_one_training_iteration
results = self.training_step()
File "python3.8/site-packages/ray/rllib/algorithms/appo/appo.py", line 363, in training_step
train_results = super().training_step()
File "python3.8/site-packages/ray/rllib/algorithms/impala/impala.py", line 735, in training_step
train_results = self.learn_on_processed_samples()
File "python3.8/site-packages/ray/rllib/algorithms/impala/impala.py", line 953, in learn_on_processed_samples
result = self.learner_group.update(
File "python3.8/site-packages/ray/rllib/core/learner/learner_group.py", line 186, in update
self._learner.update(
File "python3.8/site-packages/ray/rllib/core/learner/learner.py", line 1303, in update
) = self._update(nested_tensor_minibatch)
File "python3.8/site-packages/ray/rllib/core/learner/torch/torch_learner.py", line 365, in _update
return self._possibly_compiled_update(batch)
File "python3.8/site-packages/ray/rllib/core/learner/torch/torch_learner.py", line 123, in _uncompiled_update
loss_per_module = self.compute_loss(fwd_out=fwd_out, batch=batch)
File "python3.8/site-packages/ray/rllib/core/learner/learner.py", line 1023, in compute_loss
loss = self.compute_loss_for_module(
File "python3.8/site-packages/ray/rllib/algorithms/appo/torch/appo_torch_learner.py", line 62, in compute_loss_for_module
behaviour_actions_logp_time_major = make_time_major(
File "python3.8/site-packages/ray/rllib/algorithms/impala/torch/vtrace_torch_v2.py", line 48, in make_time_major
***rs = torch.reshape(tensor, [B, T] + list(tensor.shape[1:]))***
***RuntimeError: shape '[9, 50]' is invalid for input of size 499***
This is the code Iām running:
config = (
APPOConfig()
.environment("my_env")
.experimental(
_enable_new_api_stack=True
)
.rollouts(num_rollout_workers=0, enable_connectors=True)
.framework("torch")
.rl_module(
rl_module_spec=MultiAgentRLModuleSpec(
module_specs={p: SingleAgentRLModuleSpec() for p in policies},
),
)
.multi_agent(
policies=policies,
policy_mapping_fn=policy_mapping_fn,
)
)
results = tune.Tuner(
"APPO",
param_space=config.to_dict(),
run_config=air.RunConfig(stop={"training_iteration": 15}, verbose=1),
).fit()
I have no idea why this happens and any help would be valued.
P.S. I also tried SAC, which gives get_default_rl_module_spec NotImplementedError
error in get_marl_module_spec
.