Unsupported Action Space Exception (Dict with a DQN)

I am probably doing something wrong, but I’m trying to do a DQN on a vector of actions (centralized decision for a multi-agent setting). The action space I have chosen is Dict.

In the build step in RLlib, I get this error:

Action space Dict(1: Discrete(5), 2: Discrete(5), 3: Discrete(5)) is not supported for DQN.

Should I modify my action space or is it something I am doing incorrectly ?
Thank you in advance.

Hi @Douae_Ahmadoun and welcome to the forum,

maybe you could share a reproducable example or at least some code how you setup the algorithm?

Simon

Hi @Lars_Simon_Zehnder @Douae_Ahmadoun ,
I’m also facing a similar issue. I am trying to train pong in pettingzoo environment using DQN.

Below is my implementation

import os

from pettingzoo.atari import pong_v3

import ray
from gymnasium.spaces import Box, Discrete
from ray import tune
from ray.rllib.algorithms.dqn import DQNConfig
from ray.rllib.algorithms.dqn.dqn_torch_model import DQNTorchModel
from ray.rllib.env import PettingZooEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.torch_utils import FLOAT_MAX
from ray.tune.registry import register_env
from supersuit import color_reduction_v0, frame_stack_v1, frame_skip_v0,normalize_obs_v0,dtype_v0,resize_v1
import numpy as np 
torch, nn = try_import_torch()

 if __name__ == "__main__":
    ray.init()

    alg_name = "DQN"
    # function that outputs the environment you wish to register.
    def policy_mapping_fn(agent_id, episode, worker, **kwargs):
        return "first_0" if episode.episode_id % 2 == agent_id else "second_0"
    def env_creator():
        env = pong_v3.env()
        env=frame_stack_v1(color_reduction_v0(frame_skip_v0(env, 4),'full'),4)
        env= dtype_v0(env, np.dtype(np.float32))
        env= normalize_obs_v0(env, env_min=0, env_max=1)
        env=resize_v1(env,x_size=84,y_size=84)
        return env

    env_name = "pong_v3"
    register_env(env_name, lambda config: PettingZooEnv(env_creator()))

    test_env = PettingZooEnv(env_creator())
    obs_space = test_env.observation_space
    act_space = test_env.action_space
    config = (
        DQNConfig()
        .environment(
            env=env_name,
            env_config={
            "max_episode_steps": 108000,
            "obs_type": "grayscale",
            # The authors actually use an action repetition of 4.
            "repeat_action_probability": 0.25,
        },
        clip_rewards=True,
            )
    .training(
        # Note, the paper uses also an Adam epsilon of 0.00015.
        lr=0.0000625,
        n_step=3,
        gamma=0.99,
        tau=1.0,
        train_batch_size=32,
        target_network_update_freq=32000,
        replay_buffer_config={
            "type": "PrioritizedEpisodeReplayBuffer",
            "capacity": 1000000,
            "alpha": 0.5,
            # Note the paper used a linear schedule for beta.
            "beta": 0.4,
        },
        # Note, these are frames.
        num_steps_sampled_before_learning_starts=80000,
        noisy=True,
        num_atoms=51,
        v_min=-10.0,
        v_max=10.0,
        double_q=True,
        dueling=True,
        # learner_connector=_make_learner_connector,
    )
    .reporting(
        metrics_num_episodes_for_smoothing=10,
        min_sample_timesteps_per_iteration=1000,
    )
    .evaluation(
        evaluation_duration="auto",
        evaluation_interval=1,
        evaluation_num_env_runners=1,
        evaluation_parallel_to_training=True,
        evaluation_config={
            "explore": False,
        },
    )
        .multi_agent(
            policies={
                "first_0": (None, obs_space, act_space, {}),
                "second_0": (None, obs_space, act_space, {}),
            },
            policy_mapping_fn=policy_mapping_fn,
        )
        .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
        .debugging(
            log_level="DEBUG"
        )  # TODO: change to ERROR to match pistonball example
        .framework(framework="torch")
    )

P.S.- I am taking references from these two examples to write the code.

  1. ray/rllib/tuned_examples/dqn/benchmark_dqn_atari.py at master · ray-project/ray · GitHub
  2. RLlib: DQN for Simple Poker - PettingZoo Documentation

Hey @tor_baba,

do you have a similar action space like the original post mentioned? If yes, I would try to wrap your environment, so ray thinks it’s just working with a simple action space like Discrete(). You can then handle the complex action space inside the wrapper.