Using an asymmetric environments such as mpe/simple_push with the PettingZooEnv wrapper fails due to the assert statements when not applying additional padding. Is there a reason the wrapper is limited to symmetric environments? I created a copy that simply removes the validation and it seems to work fine with PPO (using separate PPO policies for each agent).
AFAICT rllib supports environments with asymmetric observation / action spaces, however I couldn’t find any asymmetric ones in the default environments.
How severe does this issue affect your experience of using Ray?
I’m talking about this wrapper: ray/pettingzoo_env.py at master · ray-project/ray · GitHub
I simply took out the asserts and it seems to be working fine, and I am wondering why these asserts are there in the first place.
Here is a small script to reproduce what I mean:
import ray
from pettingzoo.mpe import simple_push_v2
from ray import tune
from ray.rllib import MultiAgentEnv
from ray.rllib.agents.ppo import PPOTFPolicy
from ray.tune import register_env
class AsymmetricPettingZooEnv(MultiAgentEnv):
def __init__(self, env):
super().__init__()
self.env = env
env.reset()
# Get first observation space, assuming all agents have equal space
self.observation_space = self.env.observation_space(self.env.agents[0])
# Get first action space, assuming all agents have equal space
self.action_space = self.env.action_space(self.env.agents[0])
def reset(self):
self.env.reset()
return {
self.env.agent_selection: self.env.observe(
self.env.agent_selection)
}
def step(self, action):
self.env.step(action[self.env.agent_selection])
obs_d = {}
rew_d = {}
done_d = {}
info_d = {}
while self.env.agents:
obs, rew, done, info = self.env.last()
a = self.env.agent_selection
obs_d[a] = obs
rew_d[a] = rew
done_d[a] = done
info_d[a] = info
if self.env.dones[self.env.agent_selection]:
self.env.step(None)
else:
break
all_done = not self.env.agents
done_d["__all__"] = all_done
return obs_d, rew_d, done_d, info_d
def close(self):
self.env.close()
def seed(self, seed=None):
self.env.seed(seed)
def render(self, mode="human"):
return self.env.render(mode)
@property
def get_sub_environments(self):
return self.env.unwrapped
if __name__ == "__main__":
ray.init(local_mode=False)
num_agents = 2
def env_creator(args):
return AsymmetricPettingZooEnv(simple_push_v2.env())
register_env("petting-zoo", env_creator)
env = env_creator({})
action_spaces = env.env.action_spaces
observation_spaces = env.env.observation_spaces
agents = env.env.agents
policies = {
f"policy_{id}": (
PPOTFPolicy,
observation_spaces[id],
action_spaces[id],
{},
) for id in agents
}
# agent 0 -> policy_0
# agent 1 -> policy_1
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
return f"policy_{agent_id}"
config = {
"multiagent": {
"policies": policies,
"policy_mapping_fn": policy_mapping_fn,
"policies_to_train": None, # Train all
},
"framework": 'tf'
}
config.update({'env': 'petting-zoo'})
results = tune.run('PPO', config=config, stop={'timesteps_total': 100_000})