How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I have written a custom MultiAgentEnv
that I want to train with RLLib. The environment is called “Hexapawn”.
import logging
from copy import copy
from typing import Optional, Any, Callable
from gymnasium.spaces import MultiDiscrete, Discrete
from numpy import ndarray, zeros, ones, int8
from pettingzoo.utils.env import AgentID
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from .game import Player, Board, Direction, Move, Square, FILES
logger = logging.getLogger(__name__)
class Hexapawn(MultiAgentEnv):
"""
A Hexapawn game environment for multi-agent reinforcement learning.
This class represents a Hexapawn game environment, inheriting from MultiAgentEnv. It supports rendering modes and
provides methods for managing game state, actions, and observations for agents.
"""
metadata = {"render_modes": ["ansi"], "frames_per_second": 1}
def __init__(self, configuration: dict[str, Any] = ()):
super().__init__()
self.board = Board()
configuration = dict(configuration)
self.render_mode = configuration.get("render_mode", "human")
self.frames_per_second = configuration.get(
"frames_per_second", self.metadata["frames_per_second"]
)
self.possible_agents = [player for player in [Player.WHITE, Player.BLACK]]
# 3x3 grid with 3 values (empty, self, and opponent).
self.observation_spaces = self.agent_dictionary(
MultiDiscrete((len(Player) + 1) * ones((self.rows, self.cols), dtype=int))
)
# 3x3 grid with three moves at each square (capture left, and capture right, and forward).
self.action_spaces = self.agent_dictionary(Discrete(self.actions))
self.gui = None
@property
def rows(self) -> int:
return self.board.ranks
@property
def cols(self) -> int:
return self.board.files
@property
def moves(self) -> int:
return len(Direction)
@property
def actions(self) -> int:
return self.rows * self.cols * self.moves
def agent_dictionary(self, value) -> dict[AgentID, Any]:
def constant_value(_):
return value
if isinstance(value, Callable):
f = value
else:
f = constant_value
return {agent: f(agent) for agent in self.possible_agents}
def reset(
self, *, seed: Optional[int] = None, options: Optional[dict] = None
) -> tuple[dict[Player, ndarray], dict[Player, dict]]: # Observation, Info
self.board = Board()
self.agents = self.possible_agents[:]
observations = {Player.WHITE: self.observation(Player.WHITE)}
info = self.agent_dictionary(self.info)
return observations, info
def step(self, actions: dict[Player, int]) -> tuple[
dict[Player, ndarray], # Observations
dict[Player, float], # Rewards
dict[Player, bool], # Terminated
dict[Player, bool], # Truncated
dict[Player, dict], # Info
]:
observations, rewards, terminated = {}, {}, {}
for player, action in actions.items():
self.board += self.move_from_action(action)
opponent = self.board.player
observations |= {opponent: self.observation(opponent)}
if losing_player := self.board.has_lost():
observations = {}
rewards[-losing_player] = 1
rewards[losing_player] = 0
terminated = self.agent_dictionary(True)
info = self.agent_dictionary(self.info)
return observations, rewards, terminated, {}, info
def observation(self, player: Player) -> ndarray:
return self.players_board(player).board
def info(self, player: Player) -> dict[str, Any]:
return {"action_mask": self.mask(player)}
def mask(self, player: Player) -> ndarray:
mask = zeros(self.actions, dtype=int8)
board = self.players_board(player)
for move in board.legal_moves():
action = self.action_from_move(move)
mask[action] = 1
return mask
def action_from_move(self, move: Move) -> int:
rank = move.origin.rank
file = move.origin.file
direction = int(move.direction)
return sum(self.rows**i * x for i, x in enumerate([rank, file, direction]))
def move_from_action(self, action: int) -> Move:
rank, file, direction = [(action // self.rows**i) % self.rows for i in range(3)]
return Move(origin=Square(rank=rank, file=file), direction=Direction(direction))
def render(self) -> Optional[str]:
return str(self)
def players_board(self, player: Player) -> Board:
board = copy(self.board)
if board.player is not player:
board = -board
return board
@staticmethod
def players_move(player: Player, move: Move) -> Move:
if player is Player.BLACK:
move = copy(move)
move.direction = -move.direction
move.origin = Square(
rank=move.origin.rank, file=FILES - move.origin.file - 1
)
return move
def __str__(self):
board = self.players_board(Player.WHITE)
if player := self.board.has_lost():
s = f"{repr(-player)} wins."
else:
s = f"{repr(self.board.player)}'s move"
return f"{board}\n{s}"
Here is my training code so far.
import ray
from ray.rllib.algorithms import DQNConfig
from ray.tune.registry import register_env
from hexapawn.environment import Hexapawn
def train_hexapawn():
def hexapawn_factory(configuration: dict) -> Hexapawn:
return Hexapawn(configuration)
ray.init()
register_env("hexapawn", hexapawn_factory)
config = DQNConfig().environment("hexapawn").multi_agent().rl_module()
algo = config.build_algo()
algo.train()
if __name__ == "__main__":
train_hexapawn()
It is very minimal. I am trying to get training to complete without errors before I add more configuration parameters. You’ll need a few other files if you want to run this yourself. The easiest way is to check it out from gitlab.
Whenever I try to run it I get cryptic error messages. I spend a lot of time debugging the RLLib code. I’ve tried various configurations and nothing works.
Can someone provide me with a minimal training configuration so that I have something to start from?
When I run the above I get the following, but I don’t think this particular error message tells us much. I’m pretty sure I’m not providing the correct configuration parameters, but I don’t know how to figure out what those configuration parameters are.
/Users/mcneill/miniforge3/envs/Hexapawn/bin/python /Users/mcneill/src/Hexapawn/src/hexapawn/train.py
2025-02-20 09:59:57,166 INFO worker.py:1832 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8265
2025-02-20 09:59:58,371 WARNING algorithm_config.py:4726 -- You are running DQN on the new API stack! This is the new default behavior for this algorithm. If you don't want to use the new API stack, set `config.api_stack(enable_rl_module_and_learner=False,enable_env_runner_and_connector_v2=False)`. For a detailed migration guide, see here: https://docs.ray.io/en/master/rllib/new-api-stack-migration-guide.html
/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/algorithms/algorithm.py:574: RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
`UnifiedLogger` will be removed in Ray 2.7.
return UnifiedLogger(config, logdir, loggers=None)
/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/tune/logger/unified.py:53: RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
self._loggers.append(cls(self.config, self.logdir, self.trial))
/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/tune/logger/unified.py:53: RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
self._loggers.append(cls(self.config, self.logdir, self.trial))
/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/tune/logger/unified.py:53: RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
self._loggers.append(cls(self.config, self.logdir, self.trial))
Traceback (most recent call last):
File "/Users/mcneill/src/Hexapawn/src/hexapawn/train.py", line 20, in <module>
train_hexapawn()
File "/Users/mcneill/src/Hexapawn/src/hexapawn/train.py", line 15, in train_hexapawn
algo = config.build_algo()
^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/algorithms/algorithm_config.py", line 957, in build_algo
return algo_class(
^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/algorithms/algorithm.py", line 590, in __init__
super().__init__(
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/tune/trainable/trainable.py", line 158, in __init__
self.setup(copy.deepcopy(self.config))
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/algorithms/algorithm.py", line 693, in setup
self.env_runner_group = EnvRunnerGroup(
^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/env/env_runner_group.py", line 196, in __init__
self._setup(
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/env/env_runner_group.py", line 291, in _setup
self._local_env_runner = self._make_worker(
^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/env/env_runner_group.py", line 1187, in _make_worker
worker = cls(
^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/env/single_agent_env_runner.py", line 98, in __init__
self.make_env()
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/ray/rllib/env/single_agent_env_runner.py", line 658, in make_env
gym.make_vec(
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/gymnasium/envs/registration.py", line 918, in make_vec
env = gym.vector.SyncVectorEnv(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/gymnasium/vector/sync_vector_env.py", line 86, in __init__
self.envs = [env_fn() for env_fn in env_fns]
^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/gymnasium/envs/registration.py", line 903, in create_single_env
single_env = make(env_spec, **env_spec_kwargs.copy())
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/gymnasium/envs/registration.py", line 803, in make
env = gym.wrappers.PassiveEnvChecker(env)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/gymnasium/wrappers/common.py", line 264, in __init__
check_action_space(env.action_space)
File "/Users/mcneill/miniforge3/envs/Hexapawn/lib/python3.12/site-packages/gymnasium/utils/passive_env_checker.py", line 67, in check_space
raise TypeError(
TypeError: action space does not inherit from `gymnasium.spaces.Space`, actual type: <class 'NoneType'>
Process finished with exit code 1