VectorEnv with MultiAgentEnv instances Fails

Has anyone been able to use a VectorEnv with sub environments of MultiAgentEnvs successfully?

I am currently trying to have a class like the following;

class MultiVectorizedSimSimEnv(VectorEnv):
    """Vectorized version of the SimSimEnv."""
    def __init__(self, env_config: dict, episode_length=10):

        self.env = SimSimEnv(env_config, episode_length=episode_length)

        super().__init__(
            observation_space=self.env.observation_space,
            # action space
            action_space=self.env.action_space,
            num_envs=env_config.get("num_sims", 1),  # default to 1
        ),
....
        self.sims = [
            SimSimEnv(env_config=env_config, episode_length=tent_episode_length)
            for _ in range(self.num_sims)
        ]

that has instances of the following environment class

class SimSimEnv(MultiAgentEnv):
    def __init__(self, env_config, episode_length, config=None):

However, Ray doesn’t appear to recognize that the subenvs are multiagent and throws an error that I can’t use multiagent training on the vector env:

Error:

ValueError: Have multiple policies <PolicyMap lru-caching-capacity=100 policy-IDs=[‘policy_red’, ‘policy_blue’]>, but the env <harness.envs.multi_simsim_env.MultiVectorizedSimSimEnv object at 0x0000019BD0AC2170> is not a subclass of BaseEnv, MultiAgentEnv, ActorHandle, or ExternalMultiAgentEnv!

This is the configuration I have been using for training:

    def env_creator(_):
        return MultiVectorizedSimSimEnv(env_config)  # return an env instance

    tune.register_env("multi_simsim", env_creator)

    def select_policy(agent_id, episode, **kwargs):
        return f"policy_{agent_id}"

    # Define the configuration
    from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, PolicySpec

    from harness.policies.example_policy import EntropyLossPG

    config = (
        AlgorithmConfig(algo_class=EntropyLossPG)
        .environment(env="multi_simsim", disable_env_checking=True)
        .framework("torch")
        .rollouts(
            num_rollout_workers=0,
            num_envs_per_worker=4,
            rollout_fragment_length=8,  # was 10
        )
        .multi_agent(
            policies={
                "policy_red": PolicySpec(
                    config=AlgorithmConfig.overrides(
                        model={"use_lstm": True}, framework_str="torch"
                    )
                ),
                "policy_blue": PolicySpec(
                    config=AlgorithmConfig.overrides(
                        model={"use_lstm": True}, framework_str="torch"
                    )
                ),
            },
            policy_mapping_fn=select_policy,
        )
        .build()
    )

    # Start the training
    tune.run("PPO", config=config)

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

Fixed! Just do the following:

class MultiAgentSupport(MultiAgentEnv):
    def __init__(self, **kwargs):
        self.num_envs = kwargs.get("num_envs")

        super().__init__(
            observation_space=self.env.observation_space,
            action_space=self.env.action_space,
        )

and in the VectorEnv class inherit it

class MultiVectorizedSimSimEnv(VectorEnv, MultiAgentSupport):