Can anyone please guide me how to resolve this error
algo = PPOConfig()
.environment(env=env_wrapper)
.multi_agent(
policies={
“policy_0”: (
None, EconomicsWrapper.get_action_space(0, env), EconomicsWrapper.get_observation_space(‘0’, obs), {“gamma”: 0.80}
),
“policy_1”: (
None, EconomicsWrapper.get_action_space(1, env), EconomicsWrapper.get_observation_space(‘1’, obs), {“gamma”: 0.80}
),
“policy_2”: (
None, EconomicsWrapper.get_action_space(2, env), EconomicsWrapper.get_observation_space(‘2’, obs), {“gamma”: 0.80}
),
“policy_3”: (
None, EconomicsWrapper.get_action_space(3, env), EconomicsWrapper.get_observation_space(‘3’, obs), {“gamma”: 0.80}
),
“policy_p”: (
None, EconomicsWrapper.get_action_space(‘p’, env), EconomicsWrapper.get_observation_space(‘p’, obs), {“gamma”: 0.80}
)
},
policy_mapping_fn=lambda agent_id: f"policy_{agent_id}"
).build()
-----x-------
from ray.rllib.env.multi_agent_env import MultiAgentEnv
from gym import spaces
import numpy as np
import warnings
class EconomicsWrapper(MultiAgentEnv):
def __init__(self, env_config):
super().__init__()
print("lokesh")
print(env_config["env"])
self.env = env_config["env"]
self._agent_ids = self.get_agent_ids()
self._infos = {}
self.terminateds = set()
self.truncateds = set()
def get_agent_ids(self):
agents = []
for agent in self.env.all_agents:
agents.append(str(agent.idx))
return set(agents)
def reset(self, *, seed=None, options=None):
self.terminateds = set()
self.truncateds = set()
self._observations = {}
self._infos = {}
obs = self.env.reset()
obs_dict = {}
for agent in self._agent_ids:
obs_dict[agent] = obs[agent]
return obs
def step(self, action):
obs, rewards, done, info = self.env.step(action_dict)
assert isinstance(obs[0]["action_mask"], np.ndarray)
observation = {agent_id: obs[agent_id] for agent_id in _agent_ids}
done_ = {agent_id: done['__all__'] for agent_id in _agent_ids}
return observation, rewards, done_, info
@property
def n_agents(self):
return self.env.n_agents
@property
def summary(self):
last_completion_metrics = self.env.previous_episode_metrics
if last_completion_metrics is None:
return {}
last_completion_metrics["completions"] = int(self.env._completions)
return last_completion_metrics
@staticmethod
def get_action_space(agent_id, env):
if agent_id == 'p':
if env.world.planner.multi_action_mode:
return spaces.MultiDiscrete(env.get_agent(agent_id).action_spaces)
else:
return spaces.Discrete(env.get_agent(agent_id).action_spaces)
else:
if env.world.agents[agent_id].multi_action_mode:
return spaces.MultiDiscrete(env.get_agent(agent_id).action_spaces)
else:
return spaces.Discrete(env.get_agent(agent_id).action_spaces)
@staticmethod
def get_observation_space(agent_id, obs):
return EconomicsWrapper.dict_to_spaces_dict(obs[agent_id])
@staticmethod
def dict_to_spaces_dict(obs):
BIG_NUMBER = 1e20
dict_of_spaces = {}
for k, v in obs.items():
# list of lists are listified np arrays
_v = v
if isinstance(v, list):
_v = np.array(v)
elif isinstance(v, (int, float, np.floating, np.integer)):
_v = np.array([v])
# assign Space
if isinstance(_v, np.ndarray):
x = float(BIG_NUMBER)
# Warnings for extreme values
if np.max(_v) > x:
warnings.warn("Input is too large!")
if np.min(_v) < -x:
warnings.warn("Input is too small!")
box = spaces.Box(low=-x, high=x, shape=_v.shape, dtype=_v.dtype)
low_high_valid = (box.low < 0).all() and (box.high > 0).all()
# This loop avoids issues with overflow to make sure low/high are good.
while not low_high_valid:
x = x // 2
box = spaces.Box(low=-x, high=x, shape=_v.shape, dtype=_v.dtype)
low_high_valid = (box.low < 0).all() and (box.high > 0).all()
dict_of_spaces[k] = box
elif isinstance(_v, dict):
dict_of_spaces[k] = EconomicsWrapper.dict_to_spaces_dict(_v)
else:
raise TypeError
return spaces.Dict(dict_of_spaces)
—x------
File “/Users/lokeshwarana/opt/anaconda3/envs/env11/lib/python3.11/site-packages/ray/rllib/env/wrappers/multi_agent_env_compatibility.py”, line 46, in reset
self.env.seed(seed)
^^^^^^^^^^^^^
AttributeError: ‘EconomicsWrapper’ object has no attribute ‘seed’
The above exception was the direct cause of the following exception:
ray::RolloutWorker.init() (pid=759, ip=127.0.0.1, actor_id=17f9fd9a045eed4e19fe51bc01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x1138cbf90>)
File “/Users/lokeshwarana/opt/anaconda3/envs/env11/lib/python3.11/site-packages/ray/rllib/utils/pre_checks/env.py”, line 77, in check_env
check_multiagent_environments(env)
File “/Users/lokeshwarana/opt/anaconda3/envs/env11/lib/python3.11/site-packages/ray/rllib/utils/pre_checks/env.py”, line 299, in check_multiagent_environments
raise ValueError(
ValueError: Your environment () does not abide to the new gymnasium-style API!
From Ray 2.3 on, RLlib only supports the new (gym>=0.26 or gymnasium) Env APIs.
In particular, the reset()
method seems to be faulty.
Learn more about the most important changes here:
GitHub - openai/gym: A toolkit for developing and comparing reinforcement learning algorithms. and here: GitHub - Farama-Foundation/Gymnasium: An API standard for single-agent reinforcement learning environments, with popular reference environments and related utilities (formerly Gym)
In order to fix this problem, do the following:
- Run
pip install gymnasium
on your command line. - Change all your import statements in your code from
import gym
→import gymnasium as gym
OR
from gym.space import Discrete
→from gymnasium.spaces import Discrete
For your custom (single agent) gym.Env classes:
3.1) Either wrap your old Env class via the provided from gymnasium.wrappers import EnvCompatibility
wrapper class.
3.2) Alternatively to 3.1:
- Change your
reset()
method to have the call signature ‘def reset(self, *,
seed=None, options=None)’ - Return an additional info dict (empty dict should be fine) from your
reset()
method. - Return an additional
truncated
flag from yourstep()
method (betweendone
and
info
). This flag should indicate, whether the episode was terminated prematurely
due to some time constraint or other kind of horizon setting.
For your custom RLlib MultiAgentEnv
classes:
4.1) Either wrap your old MultiAgentEnv via the provided
from ray.rllib.env.wrappers.multi_agent_env_compatibility import MultiAgentEnvCompatibility
wrapper class.
4.2) Alternatively to 4.1:
- Change your
reset()
method to have the call signature
‘def reset(self, *, seed=None, options=None)’ - Return an additional per-agent info dict (empty dict should be fine) from your
reset()
method. - Rename
dones
intoterminateds
and only set this to True, if the episode is really
done (as opposed to has been terminated prematurely due to some horizon/time-limit
setting). - Return an additional
truncateds
per-agent dictionary flag from yourstep()
method, including the__all__
key (100% analogous to yourdones/terminateds
per-agent dict).
Return this newtruncateds
dict betweendones/terminateds
andinfos
. This
flag should indicate, whether the episode (for some agent or all agents) was
terminated prematurely due to some time constraint or other kind of horizon setting.