Nested structure difference

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

I have implemented a multiagent environment inheriting from MultiAgentEnv class. The code containing class is as follows:

class NPIComplianceEnv(MultiAgentEnv):
#metadata = {‘render.modes’: [‘human’]}

def __init__(self, num_agents=5):
    super(NPIComplianceEnv, self).__init__()
    
    self.num_agents = num_agents
    self.action_space = spaces.Discrete(3)  # Actions: 0: Stay home, 1: Go to public, 2: Go to work
    
    # Observation for each agent includes its own state and a view of the states of other agents
    self.observation_space = spaces.Dict({
        "Health_status": spaces.Discrete(5),  # SEIR+D model states
        "LSI_level": spaces.Discrete(4),
        "Nearby_infected": spaces.Box(low=0, high=num_agents, shape=(1,), dtype=np.int32)
    })

    self.states = {
        "Health_status": np.zeros(self.num_agents, dtype=int),
        "LSI_level": np.zeros(self.num_agents, dtype=int),
        "Nearby_infected": np.zeros((self.num_agents, 1), dtype=int)
    }
    
    self.rng = default_rng()
    print('============Initialization Done!==================')
    
    print("========== Observations:")
    print(type(self.observation_space))
    print(self.observation_space)
    print("========== States:")
    
    print(type(self.states))
    print(self.states)
    

def reset(self, seed=None, options=None):
    super().reset(seed=seed)
    # Initialize the states for all agents
    self.states = {
        "Health_status": self.rng.integers(0, 5, self.num_agents),
        "LSI_level": self.rng.integers(0, 4, self.num_agents),
        "Nearby_infected": self.rng.integers(0, 100, (self.num_agents, 1))  # Example data
    }
    print('===============The Environment was reset!===============')
    return self.states, {}

def step(self, actions):
    print('==============Starting The Training================')
    rewards = np.zeros(self.num_agents)
    done = False
    info = {}
    print('==============================')
    print(type(self.states))
    print('==============================')
    # Updating health statuses based on interactions and actions
    for i in range(self.num_agents):
        action = actions[i]
        if action == 1:  # Assuming going to public places
            infection_risk = 0.01  # Simplified infection risk
            infected = self.rng.random() < infection_risk
            rewards[i] -= 10 if infected else 0  # Penalize getting infected                
            self.states['Health_status'][i] = 2 if infected else self.states['Health_status'][i]

    # Updating global state based on actions
    print('==============Updating global state================')
    
    global_infection_update = np.sum(actions == 1) * 0.01  # Simplified global effect
    print(global_infection_update)
    self.states['LSI_level'] = (self.states['LSI_level'] + global_infection_update) % 4
    print('==============================')
    
    

    return self.states, rewards, done, False, info

def render(self, mode='human'):
    print(f"Current states: {self.states}")

def close(self):
    pass

def seed(self, seed=None):

The problem is that when trying to train the agents, the following error occurs:

============Initialization Done!==================
========== Observations:
<class ‘gymnasium.spaces.dict.Dict’>
Dict(‘Health_status’: Discrete(5), ‘LSI_level’: Discrete(4), ‘Nearby_infected’: Box(0, 5, (1,), int32))
========== States:
<class ‘dict’>
{‘Health_status’: array([0, 0, 0, 0, 0]), ‘LSI_level’: array([0, 0, 0, 0, 0]), ‘Nearby_infected’: array([[0],
[0],
[0],
[0],
[0]])}
2024-06-03 16:45:54,494 WARNING util.py:61 – Install gputil for GPU system monitoring.
2024-06-03 16:45:54,500 ERROR actor_manager.py:523 – Ray error, taking actor 1 out of service. ray::RolloutWorker.apply() (pid=2843903, ip=128.40.86.32, actor_id=8e23bcd3a9157fb746fa7e9c01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f87a5ca5e10>)
ValueError: The two structures don’t have the same nested structure.

First structure: type=ndarray str=[2 3 0 4 0]

Second structure: type=OrderedDict str=OrderedDict([(‘Health_status’, 4), (‘LSI_level’, 2), (‘Nearby_infected’, array([0], dtype=int32))])

More specifically: Substructure “type=OrderedDict str=OrderedDict([(‘Health_status’, 4), (‘LSI_level’, 2), (‘Nearby_infected’, array([0], dtype=int32))])” is a sequence, while substructure “type=ndarray str=[2 3 0 4 0]” is not

the main function contains:

agent = (ppo.PPOConfig()
.environment(env=“npi_compliance_env”, env_config={“num_agents”: 5})
.multi_agent(policies={“policy_0”: (None,
NPIComplianceEnv().observation_space,
NPIComplianceEnv().action_space,
{})},
policy_mapping_fn = lambda agent_id, episode, worker, **kwargs: “policy_0”)
.resources(num_gpus=1)
.framework(“torch”)
.training(train_batch_size=4000) # Example of additional config
.build()
)

for i in range(10):
result = agent.train()
print(f"Iteration {i}: reward = {result[‘episode_reward_mean’]}")

Could you please let me know why the state and observation spaces don’t have similar structure? and How this will be fixed.

Hi @dejkam,

You are returning an observation that is incompatible with the observation_space you specified. Each agent’s observation should be 3 values in a dictionary two integers and one real value.

To make it multiagent you bead to provide the observation of each agent seperately with an outer dictionary where each key names one agent in the environment.

obs = {
agent1:{Health_status: 1, LSI_level: 4, Nearby_infected: 0.3},
some_other_agent_name: {Health_status:0, LSI_level:2, Nearby_infected: 0.23},
} 

Thanks for your resolving reply!