Hey everyone,
i was wondering why the specs about observation and action space specs are provided in some of the multi-agent environment examples. I looked into the FlexAgentsMultiAgent class from the multi_agent_env.py rllib example and it looks to me that the dimensionality of obs, rew, done and info comes from the environment that initializes the agents (in this case MockEnv() ). So what do i need self.observation_space
and self.action_space
for?
class FlexAgentsMultiAgent(MultiAgentEnv):
"""Env of independent agents, each of which exits after n steps."""
def __init__(self):
self.agents = {}
self.agentID = 0
self.dones = set()
self.observation_space = gym.spaces.Discrete(2)
self.action_space = gym.spaces.Discrete(2)
self.resetted = False
def spawn(self):
# Spawn a new agent into the current episode.
agentID = self.agentID
self.agents[agentID] = MockEnv(25)
self.agentID += 1
return agentID
def reset(self):
self.agents = {}
self.spawn()
self.resetted = True
self.dones = set()
obs = {}
for i, a in self.agents.items():
obs[i] = a.reset()
return obs
def step(self, action_dict):
obs, rew, done, info = {}, {}, {}, {}
# Apply the actions.
for i, action in action_dict.items():
obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
if done[i]:
self.dones.add(i)
# Sometimes, add a new agent to the episode.
if random.random() > 0.75:
i = self.spawn()
obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
if done[i]:
self.dones.add(i)
# Sometimes, kill an existing agent.
if len(self.agents) > 1 and random.random() > 0.25:
keys = list(self.agents.keys())
key = random.choice(keys)
done[key] = True
del self.agents[key]
done["__all__"] = len(self.dones) == len(self.agents)
return obs, rew, done, info