PettingZoo Asymmetric Environments

Using an asymmetric environments such as mpe/simple_push with the PettingZooEnv wrapper fails due to the assert statements when not applying additional padding. Is there a reason the wrapper is limited to symmetric environments? I created a copy that simply removes the validation and it seems to work fine with PPO (using separate PPO policies for each agent).
AFAICT rllib supports environments with asymmetric observation / action spaces, however I couldn’t find any asymmetric ones in the default environments.

How severe does this issue affect your experience of using Ray?

  • Low: It annoys or frustrates me for a moment.

can you provide an example script so that I can better understand the problem? I don’t have experience with asymmetric environments.

I’m talking about this wrapper: ray/pettingzoo_env.py at master · ray-project/ray · GitHub
I simply took out the asserts and it seems to be working fine, and I am wondering why these asserts are there in the first place.
Here is a small script to reproduce what I mean:

import ray
from pettingzoo.mpe import simple_push_v2
from ray import tune
from ray.rllib import MultiAgentEnv
from ray.rllib.agents.ppo import PPOTFPolicy
from ray.tune import register_env


class AsymmetricPettingZooEnv(MultiAgentEnv):
    def __init__(self, env):
        super().__init__()
        self.env = env
        env.reset()

        # Get first observation space, assuming all agents have equal space
        self.observation_space = self.env.observation_space(self.env.agents[0])

        # Get first action space, assuming all agents have equal space
        self.action_space = self.env.action_space(self.env.agents[0])

    def reset(self):
        self.env.reset()
        return {
            self.env.agent_selection: self.env.observe(
                self.env.agent_selection)
        }

    def step(self, action):
        self.env.step(action[self.env.agent_selection])
        obs_d = {}
        rew_d = {}
        done_d = {}
        info_d = {}
        while self.env.agents:
            obs, rew, done, info = self.env.last()
            a = self.env.agent_selection
            obs_d[a] = obs
            rew_d[a] = rew
            done_d[a] = done
            info_d[a] = info
            if self.env.dones[self.env.agent_selection]:
                self.env.step(None)
            else:
                break

        all_done = not self.env.agents
        done_d["__all__"] = all_done

        return obs_d, rew_d, done_d, info_d

    def close(self):
        self.env.close()

    def seed(self, seed=None):
        self.env.seed(seed)

    def render(self, mode="human"):
        return self.env.render(mode)

    @property
    def get_sub_environments(self):
        return self.env.unwrapped


if __name__ == "__main__":
    ray.init(local_mode=False)
    num_agents = 2


    def env_creator(args):
        return AsymmetricPettingZooEnv(simple_push_v2.env())


    register_env("petting-zoo", env_creator)
    env = env_creator({})

    action_spaces = env.env.action_spaces
    observation_spaces = env.env.observation_spaces
    agents = env.env.agents

    policies = {
        f"policy_{id}": (
            PPOTFPolicy,
            observation_spaces[id],
            action_spaces[id],
            {},
        ) for id in agents
    }

    # agent 0 -> policy_0
    # agent 1 -> policy_1
    def policy_mapping_fn(agent_id, episode, worker, **kwargs):
        return f"policy_{agent_id}"

    config = {
        "multiagent": {
            "policies": policies,
            "policy_mapping_fn": policy_mapping_fn,
            "policies_to_train": None,  # Train all
        },
        "framework": 'tf'
    }

    config.update({'env': 'petting-zoo'})
    results = tune.run('PPO', config=config, stop={'timesteps_total': 100_000})