How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi all,
It seems like there is a bug with the attention wrapper, when providing prev_action or prev_reward it crashes as per log below:
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=3744, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000002BB8F98F790>)
File "python\ray\_raylet.pyx", line 875, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 879, in ray._raylet.execute_task
File "python\ray\_raylet.pyx", line 819, in ray._raylet.execute_task.function_executor
File "C:\personal\ai\ray_venv\lib\site-packages\ray\_private\function_manager.py", line 674, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 738, in __init__
self._update_policy_map(policy_dict=self.policy_dict)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1985, in _update_policy_map
self._build_policy_map(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
return method(self, *_args, **_kwargs)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 2097, in _build_policy_map
new_policy = create_policy_for_framework(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\utils\policy.py", line 142, in create_policy_for_framework
return policy_class(observation_space, action_space, merged_config)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\algorithms\ppo\ppo_torch_policy.py", line 67, in __init__
self._initialize_loss_from_dummy_batch()
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\policy\policy.py", line 1401, in _initialize_loss_from_dummy_batch
actions, state_outs, extra_outs = self.compute_actions_from_input_dict(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 518, in compute_actions_from_input_dict
return self._compute_action_helper(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\utils\threading.py", line 24, in wrapper
return func(self, *a, **k)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\policy\torch_policy_v2.py", line 1133, in _compute_action_helper
dist_inputs, state_out = self.model(input_dict, state_batches, seq_lens)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\models\modelv2.py", line 259, in __call__
res = self.forward(restored, state or [], seq_lens)
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\models\torch\attention_net.py", line 406, in forward
one_hot(
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\utils\torch_utils.py", line 465, in one_hot
[nn.functional.one_hot(x[:, i].long(), n) for i, n in enumerate(nvec)],
File "C:\personal\ai\ray_venv\lib\site-packages\ray\rllib\utils\torch_utils.py", line 465, in <listcomp>
[nn.functional.one_hot(x[:, i].long(), n) for i, n in enumerate(nvec)],
IndexError: index 2 is out of bounds for dimension 1 with size 2
During handling of the above exception, another exception occurred:
Versions / Dependencies
OS: Win 11
Ray: Nightly Wheel
Python: 3.10
Reproduction script
import ray
from ray.rllib.env import PolicyServerInput
from ray.rllib.algorithms.ppo import PPOConfig
import numpy as np
from gymnasium.spaces import MultiDiscrete, Box
def _input(ioctx):
# We are remote worker, or we are local worker with num_workers=0:
# Create a PolicyServerInput.
if ioctx.worker_index > 0 or ioctx.worker.num_workers == 0:
return PolicyServerInput(
ioctx,
'127.0.0.1',
55556 + ioctx.worker_index - (1 if ioctx.worker_index > 0 else 0),
)
# No InputReader (PolicyServerInput) needed.
else:
return None
ppo_config = PPOConfig()
ppo_config.model = {
"vf_share_layers": True,
'use_attention': True,
"max_seq_len": 64,
"attention_num_transformer_units": 1,
"attention_dim": 256,
"attention_memory_inference": 64,
"attention_memory_training": 64,
"attention_num_heads": 8,
"attention_head_dim": 32,
"attention_position_wise_mlp_dim": 128,
"attention_use_n_prev_actions": 2,
# "attention_use_n_prev_rewards": 64,
"attention_init_gru_gate_bias": 2.0,
"conv_filters": [
[64, [12, 16], [7, 9]],
[128, [6, 6], 4],
[256, [9, 9], 1]
],
"conv_activation": "relu",
}
ppo_config.rollouts(num_rollout_workers=2, enable_connectors=False)
ppo_config.offline_data(input_=_input)
ppo_config.framework_str = 'torch'
ppo_config.log_sys_usage = False
ppo_config.compress_observations = True
ppo_config.shuffle_sequences = False
ppo_config.env = None
ppo_config.observation_space = Box(low=0, high=1, shape=(240, 320, 1), dtype=np.float32)
ppo_config.action_space = MultiDiscrete(
[
2, # W
2, # A
2, # S
2, # D
2, # Space
2, # H
2, # J
2, # K
2 # L
]
)
from ray import tune
name = "Attention_Repro1"
print(f"Starting: {name}")
tempyy = ppo_config.to_dict()
tune.run("PPO",
resume='AUTO',
config=tempyy,
name=name,
keep_checkpoints_num=20, checkpoint_score_attr="episode_reward_mean", mode='max',
checkpoint_freq=1,
metric="episode_reward_mean",
max_failures=10,
checkpoint_at_end=True)
Edit:
Also opened issue at: [RLlib] Using Attention_Net + Prev_Action or Prev_Reward gives: Index error · Issue #35334 · ray-project/ray · GitHub