@mannyv Did it. I took my setting exactly and just changed the environment to the Stateless Cartpole environment (and deleted my own eval function, etc). I didnāt change anything that affects the training cycle, I just made the code executable. Same problem as before: I get NaNs:
Failure # 1 (occurred at 2021-11-18_01-37-38)
Traceback (most recent call last):
File "/opt/conda/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 890, in _process_trial
results = self.trial_executor.fetch_result(trial)
File "/opt/conda/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 788, in fetch_result
result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
File "/opt/conda/lib/python3.7/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/ray/worker.py", line 1625, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): e[36mray::PPO.train_buffered()e[39m (pid=95, ip=10.1.8.250, repr=PPO)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/agents/ppo/ppo_torch_policy.py", line 46, in ppo_surrogate_loss
curr_action_dist = dist_class(logits, model)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/models/torch/torch_action_dist.py", line 73, in __init__
logits=self.inputs)
File "/opt/conda/lib/python3.7/site-packages/torch/distributions/categorical.py", line 64, in __init__
super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
File "/opt/conda/lib/python3.7/site-packages/torch/distributions/distribution.py", line 56, in __init__
f"Expected parameter {param} "
ValueError: Expected parameter logits (Tensor of shape (80, 2)) of distribution Categorical(logits: torch.Size([80, 2])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan]], grad_fn=<SubBackward0>)
The above exception was the direct cause of the following exception:
e[36mray::PPO.train_buffered()e[39m (pid=95, ip=10.1.8.250, repr=PPO)
File "/opt/conda/lib/python3.7/site-packages/ray/tune/trainable.py", line 224, in train_buffered
result = self.train()
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 682, in train
raise e
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 668, in train
result = Trainable.train(self)
File "/opt/conda/lib/python3.7/site-packages/ray/tune/trainable.py", line 283, in train
result = self.step()
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py", line 206, in step
step_results = next(self.train_exec_impl)
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 756, in __next__
return next(self.built_iterator)
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/opt/conda/lib/python3.7/site-packages/ray/util/iter.py", line 791, in apply_foreach
result = fn(item)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/execution/train_ops.py", line 69, in __call__
}, lw, self.num_sgd_iter, self.sgd_minibatch_size, [])
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/utils/sgd.py", line 108, in do_minibatch_sgd
}, minibatch.count)))[policy_id]
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 958, in learn_on_batch
info_out[pid] = policy.learn_on_batch(batch)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
return func(self, *a, **k)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 507, in learn_on_batch
grads, fetches = self.compute_gradients(postprocessed_batch)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/policy/policy_template.py", line 336, in compute_gradients
return parent_cls.compute_gradients(self, batch)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
return func(self, *a, **k)
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 679, in compute_gradients
[postprocessed_batch])
File "/opt/conda/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 1052, in _multi_gpu_parallel_grad_calc
raise last_result[0] from last_result[1]
ValueError: Expected parameter logits (Tensor of shape (80, 2)) of distribution Categorical(logits: torch.Size([80, 2])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan]], grad_fn=<SubBackward0>)
In tower 0 on device cpu
The configuration is the same as I posted in my previous post.