Return obs_space in gym.Box format

Hello everyone
I am trying to run a PPO algorithm on a multi-agent environment. The action space and observation space of the agents are continuous and in the gym.Box format.
environment definition is as follows:


import gym
#from gym.spaces import Discrete, MultiDiscrete
import numpy as np
import random

from ray.rllib.env.multi_agent_env import MultiAgentEnv


class MultiUnitAuction(MultiAgentEnv):
    def __init__(self , config = None):
        config = config or {}
        self.marginal_valuation_vector = [1 , 3 , 4 , 4 , 4 , 71,7, 56, 88,16, 25, 32, 39, 12, 48, 10,40,14,12,30]
        #self.marginal_valuation_vector = [5 , 10 , 12 , 15 , 20]
        #define a counter to end the episode
        #self.counter = 0
        #number of agents
        self.num_agents = 20
        #block_size
        self.block_size = 7
        # define a list for the market clearing price
        self.p_c = 0
        # average accepted bids
        #define agents
        #self.agents = [User(i , max(marginal_valuation_vector)) for i in marginal_valuation_vector]
        #self.dones = set()
        self.observation_space = gym.spaces.Dict({
            #"agent_" + str(i) : gym.spaces.Box(low=np.float32(0), high=np.float32(max(self.marginal_valuation_vector)), shape=(1,), dtype=np.float32) for i in range(0,self.num_agents)
            "agent_" + str(i) : gym.spaces.Box(low=np.array([0]), high=np.array([max(self.marginal_valuation_vector)]), dtype=np.float32)  for i in range(0,self.num_agents)
        })
        
        
        
        self.action_space = gym.spaces.Dict({
            #"agent_" + str(i) : gym.spaces.Box(low=np.float32(0), high=np.float32(self.marginal_valuation_vector[i]), shape=(1,), dtype=np.float32) for i in range(0,self.num_agents)
            "agent_" + str(i) : gym.spaces.Box(low=np.array([0]), high=np.array([self.marginal_valuation_vector[i]]), dtype=np.float32)  for i in range(0,self.num_agents)
      
        })
        
        self.reset()
        
    def reset(self):
        """Returns initial observation of next(!) episode."""
        # Return the initial observation in the new episode.
        return self._get_obs()

    def step(self, action: dict):
        """
        Returns (next observation, rewards, dones, infos) after having taken the given actions.
        
        e.g.
        `action={"agent1": action_for_agent1, "agent2": action_for_agent2}`
        """
        
        bids = []
        for i in range(0 , self.num_agents):
            bids.append(action["agent_" + str(i)])
        
        self.p_c = sorted(bids)[len(bids) - self.block_size]
    
        
        # Get observations (based on new agent positions).
        obs = self._get_obs()

        # calculate rewards
        r = []
        for i in range(0 , self.num_agents):
            if bids[i] < self.p_c:
                r.append(0)
            else:
                r.append(self.marginal_valuation_vector[i] - bids[i])
        
        
        rewards = {
             "agent_" + str(i) : float(r[i]) for i in range(0,self.num_agents)
        }

        
        is_done = True
        # Generate a `done` dict (per-agent and total).
        
        dones = {
            "agent_0" : is_done,
            "agent_1" : is_done,
            "agent_2" : is_done,
            "agent_3" : is_done,
            "agent_4" : is_done,
            "agent_5" : is_done,
            "agent_6" : is_done,
            "agent_7" : is_done,
            "agent_8" : is_done,
            "agent_9" : is_done,
            "agent_10" : is_done,
            "agent_11" : is_done,
            "agent_12" : is_done,
            "agent_13" : is_done,
            "agent_14" : is_done,
            "agent_15" : is_done,
            "agent_16" : is_done,
            "agent_17" : is_done,
            "agent_18" : is_done,
            "agent_19" : is_done,
            # special `__all__` key indicates that the episode is done for all agents.
            "__all__": is_done,
        }
        
        return obs, rewards, dones, {}  # <- info dict (not needed here).

    def _get_obs(self):
        """
        Returns obs dict (agent name to discrete-pos tuple) using each
        agent's current x/y-positions.
        """
        return {
            "agent_" + str(i) : [self.p_c] for i in range(0,self.num_agents)
        }

    def render(self, mode=None):
        pass

The main problem comes from the _get_obs() function. When I define it as:

def _get_obs(self):
        """
        Returns obs dict (agent name to discrete-pos tuple) using each
        agent's current x/y-positions.
        """
        return {
            "agent_" + str(i) : [self.p_c] for i in range(0,self.num_agents)
        }

I am getting the following error:

ValueError: ('Observation ({} dtype={}) outside given space ({})!', array([[6.075556]], dtype=float32), dtype('float32'), Box([0.], [88.], (1,), float32))

so, somehow the observation space is not as defined, but I have checked the environment many times. I used .contains() function and the result is always part of the observation space.

Am I using this right?
How should I return the state variables??

Also the config is as follows:

from ray.rllib.agents.ppo import PPOTrainer


policies= {
    "policy_" + str(i): (None , game.observation_space["agent_" + str(i)] , game.action_space["agent_" + str(i)] , {"gamma" : 0.8}) for i in range(0,game.num_agents)
}

def policy_mapping_fn (agent_id: str):
    #assert agent_id in [str(i) for i in range(0,5)], f"ERROR: invalid agent id {agent_id}!!!"
    return "policy_" + str(agent_id[len(agent_id)-1])
        
config={
    "env": MultiUnitAuction,  # "my_env" <- if we previously have registered the env with `tune.register_env("[name]", lambda config: [returns env object])`.
    "framework": "torch",
    "create_env_on_driver": True,
    "multiagent": {
        "policies": policies,
        "policy_mapping_fn": policy_mapping_fn,
},
}

Thanks

Hi @amohazab ,

from what I see I would make the guess that the dimensions of your observation array are not as defined in the Box space: you defined an array of shape (1,), but the array that is set has dimension (1,1).