How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi, I’m trying to run sumo-rl with ray. I’m having no issues with its “grid4x4”, “arterial4x4”, “ingolstadt1”, “cologne1” environments. Because every agent in these environments have the same action space. But when it comes to environments like “ingolstadt21”, my code crashes. Here is my code:
import os
import ray
from ray import tune
from ray.rllib.algorithms.dqn import DQNConfig
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.tune.registry import register_env
from sumo_rl import ingolstadt21
from torch import nn
env = ingolstadt21(parallel=True, use_gui=True, yellow_time=2, render_mode='human')
class CNNModelV2(TorchModelV2, nn.Module):
def __init__(self, obs_space, act_space, num_outputs, *args, **kwargs):
TorchModelV2.__init__(self, obs_space, act_space, num_outputs, *args, **kwargs)
nn.Module.__init__(self)
self.model = nn.Sequential(
nn.Conv2d(3, 32, [8, 8], stride=(4, 4)),
nn.ReLU(),
nn.Conv2d(32, 64, [4, 4], stride=(2, 2)),
nn.ReLU(),
nn.Conv2d(64, 64, [3, 3], stride=(1, 1)),
nn.ReLU(),
nn.Flatten(),
(nn.Linear(3136, 512)),
nn.ReLU(),
)
self.policy_fn = nn.Linear(512, num_outputs)
self.value_fn = nn.Linear(512, 1)
def forward(self, input_dict, state, seq_lens):
model_out = self.model(input_dict["obs"].permute(0, 3, 1, 2))
self._value_out = self.value_fn(model_out)
return self.policy_fn(model_out), state
def value_function(self):
return self._value_out.flatten()
if __name__ == "__main__":
ray.init()
env_name = "ingolstad21"
register_env(env_name, lambda config: ParallelPettingZooEnv(env))
ModelCatalog.register_custom_model("CNNModelV2", CNNModelV2)
config = (
DQNConfig()
.environment(env=env_name, clip_actions=True)
.rollouts(num_rollout_workers=1, rollout_fragment_length=128)
.training(
train_batch_size=512,
lr=2e-5,
gamma=0.99,
grad_clip=None,
)
.debugging(log_level="ERROR")
.framework(framework="torch")
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)
tune.run(
"DQN",
name="DQN",
stop={"timesteps_total": 5000000 if not os.environ.get("CI") else 50000},
checkpoint_freq=10,
local_dir="~/ray_results/" + env_name,
config=config.to_dict(),
)
The error is as follows:
(DQN pid=62553) Retrying in 1 seconds
(DQN pid=62553) Step #0.00 (0ms ?*RT. ?UPS, TraCI: 44ms, vehicles TOT 0 ACT 0 BUF 0)
(RolloutWorker pid=62612) Retrying in 1 seconds
(RolloutWorker pid=62612) Retrying in 1 seconds
2024-05-11 01:46:23,324 ERROR tune_controller.py:1331 -- Trial task failed for trial DQN_ingolstad21_1a43f_00000
Traceback (most recent call last):
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
result = ray.get(future)
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 21, in auto_init_wrapper
return fn(*args, **kwargs)
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/_private/worker.py", line 2623, in get
values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/_private/worker.py", line 863, in get_objects
raise value
ray.exceptions.ActorDiedError: The actor died because of an error raised in its creation task, ray::DQN.__init__() (pid=62553, ip=192.168.0.28, actor_id=a72bef1689b2fe121ee6654301000000, repr=DQN)
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 239, in _setup
self.add_workers(
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 754, in add_workers
raise result.get()
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/utils/actor_manager.py", line 497, in _fetch_result
result = ray.get(r)
ray.exceptions.ActorDiedError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=62612, ip=192.168.0.28, actor_id=5586410b26a8872903a6855601000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7a9553e423b0>)
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/evaluation/rollout_worker.py", line 477, in __init__
self.policy_dict, self.is_policy_to_train = self.config.get_multi_agent_setup(
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm_config.py", line 3302, in get_multi_agent_setup
raise ValueError(
ValueError: Two agents in your environment map to the same policyID (as per your `policy_mapping_fn`), however, these agents also have different observation spaces!
During handling of the above exception, another exception occurred:
ray::DQN.__init__() (pid=62553, ip=192.168.0.28, actor_id=a72bef1689b2fe121ee6654301000000, repr=DQN)
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 554, in __init__
super().__init__(
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 158, in __init__
self.setup(copy.deepcopy(self.config))
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/algorithms/algorithm.py", line 640, in setup
self.workers = EnvRunnerGroup(
File ".cache/pypoetry/virtualenvs/project_env--pKdqVlZ-py3.10/lib/python3.10/site-packages/ray/rllib/env/env_runner_group.py", line 191, in __init__
raise e.args[0].args[2]
ValueError: Two agents in your environment map to the same policyID (as per your `policy_mapping_fn`), however, these agents also have different observation spaces!
I saw a PR that hopefully solves this, but it doesn’t seem like it did.
Thanks in advance, I’d be glad to answer your further questions.