I am trying to write a multi-agent environment with action masking using the new stack following the examples in the API documentation.
I have written a program that trains a very minimal random environment using the PPO algorithm. (I realize that there are some examples in ray.rlib.examples.multi_agent
but these are all run using the extremely complicated run_rllib_example_script_experiment
function that I know will be a nightmare to debug if I make one little mistake.) I want an example that is easy for myself and others to follow.
Here is what I have so far.
import random
from typing import Any, Optional
from gymnasium import Env
from gymnasium.spaces import Discrete, Box, Dict, MultiBinary
from numpy import ndarray
from ray.rllib.algorithms import PPOConfig
from ray.rllib.core.rl_module import MultiRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.examples.rl_modules.classes.action_masking_rlm import (
ActionMaskingTorchRLModule,
)
class MaskedMultiAgentRandomEnvironment(Env):
"""A custom OpenAI Gym multi-agent environment with action masking.
This environment consists of 3 simultaneously-interacting agents with 5 continuous observations
and 2 discrete actions each which can be masked. Each agent returns random observations, action
masks, and rewards and has a 10% chance of termination after each step.
"""
AGENTS = 3
OBSERVATIONS = 5
ACTIONS = 2
def __init__(self, _: dict[str, Any] = ()):
super().__init__()
self.agents = self.possible_agents
self.observation_spaces = {}
self.action_spaces = {}
for agent in self.agents:
self.observation_spaces[agent] = Dict(
{
"observations": (Box(0, 1, (self.OBSERVATIONS,))),
"action_mask": MultiBinary(self.ACTIONS),
}
)
self.action_spaces[agent] = Discrete(self.ACTIONS)
@property
def possible_agents(self) -> set[str]:
return {f"Agent {i}" for i in range(self.AGENTS)}
def reset(
self, *, seed: Optional[int] = None, options: Optional[dict[str, Any]] = None
) -> tuple[dict, dict[str, Any]]:
super().reset(seed=seed, options=options)
self.agents = self.possible_agents
observations = self.random_agent_observations()
return observations, {}
def step(self, actions: dict[str, int]) -> tuple[
dict[str, dict[str, ndarray]], # Observations
dict[str, float], # Rewards
dict[str, bool], # Terminations
dict[str, bool], # Truncations
dict[str, Any], # Info
]:
observations = self.random_agent_observations()
rewards = self.random_agent_rewards()
terminations = self.random_agent_terminations()
self.agents -= set(agent for agent, done in terminations.items() if done)
return observations, rewards, terminations, {}, {}
def random_agent_observations(self) -> dict[str, dict[str, ndarray]]:
return {agent: self.observation_space.sample() for agent in self.agents}
def random_agent_rewards(self) -> dict[str, float]:
return {agent: random.random() for agent in self.agents}
def random_agent_terminations(self) -> dict[str, bool]:
return {
agent: random.choices([True, False], weights=[0.1, 0.9])[0]
for agent in self.agents
}
POLICY_ID = "Shared Policy"
def train_multi_agent_masked_algorithm():
"""
Train a model for the multi-agent random environment using the PPO algorithm with default
parameters and a single policy shared between all the agents.
"""
module_spec = RLModuleSpec(module_class=ActionMaskingTorchRLModule)
base_config = (
PPOConfig()
.environment(
env=MaskedMultiAgentRandomEnvironment,
disable_env_checking=True,
)
.multi_agent(policy_mapping_fn=lambda _, __, **___: POLICY_ID)
.rl_module(
rl_module_spec=MultiRLModuleSpec(rl_module_specs={POLICY_ID: module_spec})
)
)
algo = base_config.build_algo()
print(algo)
result = algo.train()
print(result)
if __name__ == "__main__":
train_multi_agent_masked_algorithm()
It fails in gymnasium.wrappers.common.PassiveEnvChecker
because the environment does not specify an action_space
member.
2025-03-02 12:25:26,525 INFO worker.py:1841 -- Started a local Ray instance.
2025-03-02 12:25:28,369 ERROR actor_manager.py:833 -- Ray error (The actor died because of an error raised in its creation task, ray::SingleAgentEnvRunner.__init__() (pid=31569, ip=127.0.0.1, actor_id=4222258f312567add75fc51e01000000, repr=<ray.rllib.env.single_agent_env_runner.SingleAgentEnvRunner object at 0x12fe72570>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/ray/lib/python3.12/site-packages/ray/rllib/env/single_agent_env_runner.py", line 100, in __init__
self.make_env()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/ray/lib/python3.12/site-packages/ray/rllib/env/single_agent_env_runner.py", line 654, in make_env
gym.make_vec(
File "/Users/mcneill/miniforge3/envs/ray/lib/python3.12/site-packages/gymnasium/envs/registration.py", line 920, in make_vec
env = gym.vector.SyncVectorEnv(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/ray/lib/python3.12/site-packages/gymnasium/vector/sync_vector_env.py", line 96, in __init__
self.envs = [env_fn() for env_fn in env_fns]
^^^^^^^^
File "/Users/mcneill/miniforge3/envs/ray/lib/python3.12/site-packages/gymnasium/envs/registration.py", line 905, in create_single_env
single_env = make(env_spec, **env_spec_kwargs.copy())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/ray/lib/python3.12/site-packages/gymnasium/envs/registration.py", line 805, in make
env = gym.wrappers.PassiveEnvChecker(env)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/ray/lib/python3.12/site-packages/gymnasium/wrappers/common.py", line 261, in __init__
raise AttributeError(
AttributeError: The environment must specify an action space. https://gymnasium.farama.org/introduction/create_custom_env/), taking actor 1 out of service.
According to the RLlib multi-agent documentation, multi-agent environments are supposed to specify an action_spaces
member, not action_space
.
Why is this failing?
From Reddit and various RLlib support groups it appears that I’m not the only one blocked on issues of multi agents and action masking. The examples aren’t well-documented and the tiniest modification launches you into an extremely difficult debugging task. I’m trying to help by writing simple, well-documented, and up-to-date examples that anyone can follow.
I have a masked single-agent example in the Ray Masked project that I think will be helpful to members of the Ray community. It took me a few weeks of debugging to produce it.
I am confident that with a few more weeks of debugging I could figure the multi-agent case out on my own as well, but it would be really helpful if I could get some guidance from a Ray developer. I think this would be an easy way to unblock multiple people.