How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi,
I’m trying to train a QMix algorithm. I’m using a custom environment. When I start the training everything goes well, but in certain point appearce this error:
2023-08-25 13:12:19,048 ERROR tune_controller.py:911 -- Trial task failed for trial QMIX_EPEnv_a2374_00000
Traceback (most recent call last):
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
result = ray.get(future)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\_private\auto_init_hook.py", line 24, in auto_init_wrapper
return fn(*args, **kwargs)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\_private\worker.py", line 2493, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray::QMix.train() (pid=24624, ip=127.0.0.1, actor_id=7061c53d6556519d949f97a901000000, repr=QMix)
File "python\ray\_raylet.pyx", line 1424, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 1364, in ray._raylet.execute_task.function_executor
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\tune\trainable\trainable.py", line 375, in train
raise skipped from exception_cause(skipped)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\tune\trainable\trainable.py", line 372, in train
result = self.step()
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\algorithms\algorithm.py", line 851, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\algorithms\algorithm.py", line 2835, in _run_one_training_iteration
results = self.training_step()
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\util\tracing\tracing_helper.py", line 464, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\algorithms\qmix\qmix.py", line 275, in training_step
new_sample_batches = synchronous_parallel_sample(
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\execution\rollout_ops.py", line 82, in synchronous_parallel_sample
sample_batches = [worker_set.local_worker().sample()]
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\evaluation\rollout_worker.py", line 696, in sample
batches = [self.input_reader.next()]
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\evaluation\sampler.py", line 92, in next
batches = [self.get_data()]
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\evaluation\sampler.py", line 277, in get_data
item = next(self._env_runner)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
outputs = self.step()
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 382, in step
eval_results = self._do_policy_eval(to_eval=to_eval)
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1081, in _do_policy_eval
eval_results[policy_id] = policy.compute_actions_from_input_dict(
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\algorithms\qmix\qmix_policy.py", line 319, in compute_actions_from_input_dict
action_distribution=TorchCategorical(masked_q_values_folded),
File "C:\Users\grhen\AppData\Roaming\Python\Python39\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 91, in __init__
self.dist = torch.distributions.categorical.Categorical(logits=self.inputs)
File "C:\Users\grhen\anaconda3\envs\ep_rllib261\lib\site-packages\torch\distributions\categorical.py", line 66, in __init__
super(Categorical, self).__init__(batch_shape, validate_args=validate_args)
File "C:\Users\grhen\anaconda3\envs\ep_rllib261\lib\site-packages\torch\distributions\distribution.py", line 56, in __init__
raise ValueError(
ValueError: Expected parameter logits (Tensor of shape (4, 2)) of distribution Categorical(logits: torch.Size([4, 2])) to satisfy the constraint IndependentConstraint(Real(), 1), but found invalid values:
tensor([[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan]])
This occurs not always in the same time, but I can not finish the training because this error. I tried simple and tune training and with both get the same error.
Someone have some idea to fix it? Thanks