Hi There,
Would you be able to provide an example? I am using a custom gym and am seeing this error appear on almost all workers after some time while using the Population Based Trainer
Failure # 1 (occurred at 2022-01-20_08-33-33)
Traceback (most recent call last):
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/tune/trial_runner.py", line 924, in _process_trial
results = self.trial_executor.fetch_result(trial)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/tune/ray_trial_executor.py", line 787, in fetch_result
result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/worker.py", line 1713, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): e[36mray::PPO.train_buffered()e[39m (pid=797698, ip=192.168.86.90, repr=PPO)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/agents/ppo/ppo_torch_policy.py", line 82, in loss
curr_action_dist = dist_class(logits, model)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/models/torch/torch_action_dist.py", line 71, in __init__
self.dist = torch.distributions.categorical.Categorical(
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/torch/distributions/categorical.py", line 64, in __init__
super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/torch/distributions/distribution.py", line 55, in __init__
raise ValueError(
ValueError: Expected parameter logits (Tensor of shape (1540, 3)) of distribution Categorical(logits: torch.Size([1540, 3])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan],
...,
[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan]], grad_fn=<SubBackward0>)
The above exception was the direct cause of the following exception:
e[36mray::PPO.train_buffered()e[39m (pid=797698, ip=192.168.86.90, repr=PPO)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/tune/trainable.py", line 255, in train_buffered
result = self.train()
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/tune/trainable.py", line 314, in train
result = self.step()
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/agents/trainer.py", line 885, in step
raise e
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/agents/trainer.py", line 867, in step
result = self.step_attempt()
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/agents/trainer.py", line 925, in step_attempt
step_results = next(self.train_exec_impl)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 756, in __next__
return next(self.built_iterator)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/util/iter.py", line 791, in apply_foreach
result = fn(item)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/execution/train_ops.py", line 197, in __call__
results = policy.learn_on_loaded_batch(
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 534, in learn_on_loaded_batch
return self.learn_on_batch(batch)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
return func(self, *a, **k)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 434, in learn_on_batch
grads, fetches = self.compute_gradients(postprocessed_batch)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
return func(self, *a, **k)
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 605, in compute_gradients
tower_outputs = self._multi_gpu_parallel_grad_calc(
File "/home/samthomas/miniconda3/envs/profit-ray/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 1085, in _multi_gpu_parallel_grad_calc
raise last_result[0] from last_result[1]
ValueError: Expected parameter logits (Tensor of shape (1540, 3)) of distribution Categorical(logits: torch.Size([1540, 3])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan],
...,
[nan, nan, nan],
[nan, nan, nan],
[nan, nan, nan]], grad_fn=<SubBackward0>)
In tower 0 on device cpu
I have attempted to catch non-finite observations and actions in my step method
def step(self, action):
"""Returns state observation, reward, done and info"""
observation, done = self.data_source.take_step()
if not self.action_space.contains(action) or not np.isfinite(observation).all():
observation = np.zeros_like(observation)
reward = -1
done = True
info = {}
return observation, reward, done, info
However I am still seeing the error.
Any help would be much appreciated.