Hi, what solution did you find to this problem? I’m currently facing the same issue.
My custom multi agent environment trains fine with PPO, but not with APPO, after ~5 minutes of training I get a similar error. I checked the rewards and observations, these are never nan. I tried setting grad_clip to 2 which didn’t help either.
APPO pid=16752) Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/David/ray_results/APPO_2024-03-17_17-23-13/APPO_flight_aafd8_00000_0_2024-03-17_17-23-20/checkpoint_000002)
(APPO pid=16752) Exception in thread Thread-1:
(APPO pid=16752) Traceback (most recent call last):
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1369, in _worker
(APPO pid=16752) self.loss(model, self.dist_class, sample_batch)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\appo\appo_torch_policy.py", line 134, in loss
(APPO pid=16752) action_dist = dist_class(model_out, model)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 250, in __init__
(APPO pid=16752) self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\normal.py", line 56, in __init__
(APPO pid=16752) super().__init__(batch_shape, validate_args=validate_args)
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\distribution.py", line 68, in __init__
(APPO pid=16752) raise ValueError(
(APPO pid=16752) ValueError: Expected parameter loc (Tensor of shape (550, 2)) of distribution Normal(loc: torch.Size([550, 2]), scale: torch.Size([550, 2])) to satisfy the constraint Real(), but found invalid values:
(APPO pid=16752) tensor([[nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) ...,
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan]], device='cuda:0', grad_fn=<SplitBackward0>)
(APPO pid=16752)
(APPO pid=16752) The above exception was the direct cause of the following exception:
(APPO pid=16752)
(APPO pid=16752) Traceback (most recent call last):
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\threading.py", line 980, in _bootstrap_inner
(APPO pid=16752) self.run()
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\execution\learner_thread.py", line 76, in run
(APPO pid=16752) self.step()
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\execution\learner_thread.py", line 93, in step
(APPO pid=16752) multi_agent_results = self.local_worker.learn_on_batch(batch)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 815, in learn_on_batch
(APPO pid=16752) info_out[pid] = policy.learn_on_batch(batch)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
(APPO pid=16752) return func(self, *a, **k)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 730, in learn_on_batch
(APPO pid=16752) grads, fetches = self.compute_gradients(postprocessed_batch)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
(APPO pid=16752) return func(self, *a, **k)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 946, in compute_gradients
(APPO pid=16752) tower_outputs = self._multi_gpu_parallel_grad_calc([postprocessed_batch])
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1454, in _multi_gpu_parallel_grad_calc
(APPO pid=16752) raise last_result[0] from last_result[1]
(APPO pid=16752) ValueError: Expected parameter loc (Tensor of shape (550, 2)) of distribution Normal(loc: torch.Size([550, 2]), scale: torch.Size([550, 2])) to satisfy the constraint Real(), but found invalid values:
(APPO pid=16752) tensor([[nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) ...,
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan]], device='cuda:0', grad_fn=<SplitBackward0>)
(APPO pid=16752) tracebackTraceback (most recent call last):
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1369, in _worker
(APPO pid=16752) self.loss(model, self.dist_class, sample_batch)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\appo\appo_torch_policy.py", line 134, in loss
(APPO pid=16752) action_dist = dist_class(model_out, model)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 250, in __init__
(APPO pid=16752) self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\normal.py", line 56, in __init__
(APPO pid=16752) super().__init__(batch_shape, validate_args=validate_args)
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\distribution.py", line 68, in __init__
(APPO pid=16752) raise ValueError(
(APPO pid=16752) ValueError: Expected parameter loc (Tensor of shape (550, 2)) of distribution Normal(loc: torch.Size([550, 2]), scale: torch.Size([550, 2])) to satisfy the constraint Real(), but found invalid values:
(APPO pid=16752) tensor([[nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) ...,
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan],
(APPO pid=16752) [nan, nan]], device='cuda:0', grad_fn=<SplitBackward0>)
(APPO pid=16752)
(APPO pid=16752) In tower 0 on device cuda:0
2024-03-17 17:31:36,097 ERROR tune_controller.py:1374 -- Trial task failed for trial APPO_flight_aafd8_00000
Traceback (most recent call last):
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
result = ray.get(future)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\_private\auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\_private\worker.py", line 2624, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): ray::APPO.train() (pid=16752, ip=127.0.0.1, actor_id=e95f1d1a63644982222642f601000000, repr=APPO)
File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\tune\trainable\trainable.py", line 342, in train
raise skipped from exception_cause(skipped)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\tune\trainable\trainable.py", line 339, in train
result = self.step()
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 852, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 3042, in _run_one_training_iteration
results = self.training_step()
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
return method(self, *_args, **_kwargs)
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\appo\appo.py", line 363, in training_step
train_results = super().training_step()
File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\impala\impala.py", line 698, in training_step
raise RuntimeError("The learner thread died while training!")
RuntimeError: The learner thread died while training!
2024-03-17 17:31:36,659 ERROR tune.py:1038 -- Trials did not complete: [APPO_flight_aafd8_00000]
2024-03-17 17:31:36,660 INFO tune.py:1042 -- Total run time: 496.57 seconds (495.95 seconds for the tuning loop).
(APPO pid=16752) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::RolloutWorker.apply() (pid=27664, ip=127.0.0.1, actor_id=2d3e472c64004a0f0721650a01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001FEF99E3E50>)
(APPO pid=16752) File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
(APPO pid=16752) File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
(APPO pid=16752) return method(__ray_actor, *args, **kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(APPO pid=16752) return method(self, *_args, **_kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
(APPO pid=16752) raise e
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
(APPO pid=16752) return func(self, *args, **kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\impala\impala.py", line 912, in <lambda>
(APPO pid=16752) lambda worker: worker.sample(),
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(APPO pid=16752) return method(self, *_args, **_kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
(APPO pid=16752) batches = [self.input_reader.next()]
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
(APPO pid=16752) batches = [self.get_data()]
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
(APPO pid=16752) item = next(self._env_runner)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
(APPO pid=16752) outputs = self.step()
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 382, in step
(APPO pid=16752) eval_results = self._do_policy_eval(to_eval=to_eval)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1082, in _do_policy_eval
(APPO pid=16752) eval_results[policy_id] = policy.compute_actions_from_input_dict(
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 572, in compute_actions_from_input_dict
(APPO pid=16752) return self._compute_action_helper(
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
(APPO pid=16752) return func(self, *a, **k)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1305, in _compute_action_helper
(APPO pid=16752) action_dist = dist_class(dist_inputs, self.model)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 250, in __init__
(APPO pid=16752) self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\normal.py", line 56, in __init__
(APPO pid=16752) super().__init__(batch_shape, validate_args=validate_args)
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\distribution.py", line 68, in __init__
(APPO pid=16752) raise ValueError(
(APPO pid=16752) ValueError: Expected parameter loc (Tensor of shape (1, 2)) of distribution Normal(loc: torch.Size([1, 2]), scale: torch.Size([1, 2])) to satisfy the constraint Real(), but found invalid values:
(APPO pid=16752) tensor([[nan, nan]])
(APPO pid=16752) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::RolloutWorker.apply() (pid=26308, ip=127.0.0.1, actor_id=94217366322346472d52231601000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001C90F9B3EE0>)
(APPO pid=16752) File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
(APPO pid=16752) File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
(APPO pid=16752) return method(__ray_actor, *args, **kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(APPO pid=16752) return method(self, *_args, **_kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
(APPO pid=16752) raise e
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
(APPO pid=16752) return func(self, *args, **kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\impala\impala.py", line 912, in <lambda>
(APPO pid=16752) lambda worker: worker.sample(),
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(APPO pid=16752) return method(self, *_args, **_kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
(APPO pid=16752) batches = [self.input_reader.next()]
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
(APPO pid=16752) batches = [self.get_data()]
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
(APPO pid=16752) item = next(self._env_runner)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
(APPO pid=16752) outputs = self.step()
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 382, in step
(APPO pid=16752) eval_results = self._do_policy_eval(to_eval=to_eval)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1082, in _do_policy_eval
(APPO pid=16752) eval_results[policy_id] = policy.compute_actions_from_input_dict(
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 572, in compute_actions_from_input_dict
(APPO pid=16752) return self._compute_action_helper(
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
(APPO pid=16752) return func(self, *a, **k)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1305, in _compute_action_helper
(APPO pid=16752) action_dist = dist_class(dist_inputs, self.model)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 250, in __init__
(APPO pid=16752) self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\normal.py", line 56, in __init__
(APPO pid=16752) super().__init__(batch_shape, validate_args=validate_args)
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\distribution.py", line 68, in __init__
(APPO pid=16752) raise ValueError(
(APPO pid=16752) ValueError: Expected parameter loc (Tensor of shape (1, 2)) of distribution Normal(loc: torch.Size([1, 2]), scale: torch.Size([1, 2])) to satisfy the constraint Real(), but found invalid values:
(APPO pid=16752) tensor([[nan, nan]])
(APPO pid=16752) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::RolloutWorker.apply() (pid=27040, ip=127.0.0.1, actor_id=6485255fc80476e4f491571201000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000029921ED4E20>)
(APPO pid=16752) File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
(APPO pid=16752) File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
(APPO pid=16752) return method(__ray_actor, *args, **kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(APPO pid=16752) return method(self, *_args, **_kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
(APPO pid=16752) raise e
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
(APPO pid=16752) return func(self, *args, **kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\algorithms\impala\impala.py", line 912, in <lambda>
(APPO pid=16752) lambda worker: worker.sample(),
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(APPO pid=16752) return method(self, *_args, **_kwargs)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
(APPO pid=16752) batches = [self.input_reader.next()]
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
(APPO pid=16752) batches = [self.get_data()]
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
(APPO pid=16752) item = next(self._env_runner)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
(APPO pid=16752) outputs = self.step()
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 382, in step
(APPO pid=16752) eval_results = self._do_policy_eval(to_eval=to_eval)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1082, in _do_policy_eval
(APPO pid=16752) eval_results[policy_id] = policy.compute_actions_from_input_dict(
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 572, in compute_actions_from_input_dict
(APPO pid=16752) return self._compute_action_helper(
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
(APPO pid=16752) return func(self, *a, **k)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1305, in _compute_action_helper
(APPO pid=16752) action_dist = dist_class(dist_inputs, self.model)
(APPO pid=16752) File "c:\Users\David\.conda\envs\rllib\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 250, in __init__
(APPO pid=16752) self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\normal.py", line 56, in __init__
(APPO pid=16752) super().__init__(batch_shape, validate_args=validate_args)
(APPO pid=16752) File "C:\Users\David\AppData\Roaming\Python\Python39\site-packages\torch\distributions\distribution.py", line 68, in __init__
(APPO pid=16752) raise ValueError(
(APPO pid=16752) ValueError: Expected parameter loc (Tensor of shape (1, 2)) of distribution Normal(loc: torch.Size([1, 2]), scale: torch.Size([1, 2])) to satisfy the constraint Real(), but found invalid values:
(APPO pid=16752) tensor([[nan, nan]])
(APPO pid=16752) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::RolloutWorker.apply() (pid=18652, ip=127.0.0.1, actor_id=67aa0e07cebf5a900a4db01301000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000002AE63F03EB0>)
(APPO pid=16752) File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
...
...
Clipped to adhere to max post length