# Return obs_space in gym.Box format

Hello everyone
I am trying to run a PPO algorithm on a multi-agent environment. The action space and observation space of the agents are continuous and in the gym.Box format.
environment definition is as follows:

``````
import gym
#from gym.spaces import Discrete, MultiDiscrete
import numpy as np
import random

from ray.rllib.env.multi_agent_env import MultiAgentEnv

class MultiUnitAuction(MultiAgentEnv):
def __init__(self , config = None):
config = config or {}
self.marginal_valuation_vector = [1 , 3 , 4 , 4 , 4 , 71,7, 56, 88,16, 25, 32, 39, 12, 48, 10,40,14,12,30]
#self.marginal_valuation_vector = [5 , 10 , 12 , 15 , 20]
#define a counter to end the episode
#self.counter = 0
#number of agents
self.num_agents = 20
#block_size
self.block_size = 7
# define a list for the market clearing price
self.p_c = 0
# average accepted bids
#define agents
#self.agents = [User(i , max(marginal_valuation_vector)) for i in marginal_valuation_vector]
#self.dones = set()
self.observation_space = gym.spaces.Dict({
#"agent_" + str(i) : gym.spaces.Box(low=np.float32(0), high=np.float32(max(self.marginal_valuation_vector)), shape=(1,), dtype=np.float32) for i in range(0,self.num_agents)
"agent_" + str(i) : gym.spaces.Box(low=np.array(), high=np.array([max(self.marginal_valuation_vector)]), dtype=np.float32)  for i in range(0,self.num_agents)
})

self.action_space = gym.spaces.Dict({
#"agent_" + str(i) : gym.spaces.Box(low=np.float32(0), high=np.float32(self.marginal_valuation_vector[i]), shape=(1,), dtype=np.float32) for i in range(0,self.num_agents)
"agent_" + str(i) : gym.spaces.Box(low=np.array(), high=np.array([self.marginal_valuation_vector[i]]), dtype=np.float32)  for i in range(0,self.num_agents)

})

self.reset()

def reset(self):
"""Returns initial observation of next(!) episode."""
# Return the initial observation in the new episode.
return self._get_obs()

def step(self, action: dict):
"""
Returns (next observation, rewards, dones, infos) after having taken the given actions.

e.g.
`action={"agent1": action_for_agent1, "agent2": action_for_agent2}`
"""

bids = []
for i in range(0 , self.num_agents):
bids.append(action["agent_" + str(i)])

self.p_c = sorted(bids)[len(bids) - self.block_size]

# Get observations (based on new agent positions).
obs = self._get_obs()

# calculate rewards
r = []
for i in range(0 , self.num_agents):
if bids[i] < self.p_c:
r.append(0)
else:
r.append(self.marginal_valuation_vector[i] - bids[i])

rewards = {
"agent_" + str(i) : float(r[i]) for i in range(0,self.num_agents)
}

is_done = True
# Generate a `done` dict (per-agent and total).

dones = {
"agent_0" : is_done,
"agent_1" : is_done,
"agent_2" : is_done,
"agent_3" : is_done,
"agent_4" : is_done,
"agent_5" : is_done,
"agent_6" : is_done,
"agent_7" : is_done,
"agent_8" : is_done,
"agent_9" : is_done,
"agent_10" : is_done,
"agent_11" : is_done,
"agent_12" : is_done,
"agent_13" : is_done,
"agent_14" : is_done,
"agent_15" : is_done,
"agent_16" : is_done,
"agent_17" : is_done,
"agent_18" : is_done,
"agent_19" : is_done,
# special `__all__` key indicates that the episode is done for all agents.
"__all__": is_done,
}

return obs, rewards, dones, {}  # <- info dict (not needed here).

def _get_obs(self):
"""
Returns obs dict (agent name to discrete-pos tuple) using each
agent's current x/y-positions.
"""
return {
"agent_" + str(i) : [self.p_c] for i in range(0,self.num_agents)
}

def render(self, mode=None):
pass

``````

The main problem comes from the _get_obs() function. When I define it as:

``````def _get_obs(self):
"""
Returns obs dict (agent name to discrete-pos tuple) using each
agent's current x/y-positions.
"""
return {
"agent_" + str(i) : [self.p_c] for i in range(0,self.num_agents)
}
``````

I am getting the following error:

``````ValueError: ('Observation ({} dtype={}) outside given space ({})!', array([[6.075556]], dtype=float32), dtype('float32'), Box([0.], [88.], (1,), float32))
``````

so, somehow the observation space is not as defined, but I have checked the environment many times. I used .contains() function and the result is always part of the observation space.

Am I using this right?
How should I return the state variables??

Also the config is as follows:

``````from ray.rllib.agents.ppo import PPOTrainer

policies= {
"policy_" + str(i): (None , game.observation_space["agent_" + str(i)] , game.action_space["agent_" + str(i)] , {"gamma" : 0.8}) for i in range(0,game.num_agents)
}

def policy_mapping_fn (agent_id: str):
#assert agent_id in [str(i) for i in range(0,5)], f"ERROR: invalid agent id {agent_id}!!!"
return "policy_" + str(agent_id[len(agent_id)-1])

config={
"env": MultiUnitAuction,  # "my_env" <- if we previously have registered the env with `tune.register_env("[name]", lambda config: [returns env object])`.
"framework": "torch",
"create_env_on_driver": True,
"multiagent": {
"policies": policies,
"policy_mapping_fn": policy_mapping_fn,
},
}

``````

Thanks

Hi @amohazab ,

from what I see I would make the guess that the dimensions of your observation array are not as defined in the `Box` space: you defined an array of shape `(1,)`, but the array that is set has dimension `(1,1)`.