AttributeError: 'EconomicsWrapper' object has no attribute 'seed'

Can anyone please guide me how to resolve this error

algo = PPOConfig()
.environment(env=env_wrapper)
.multi_agent(
policies={
“policy_0”: (
None, EconomicsWrapper.get_action_space(0, env), EconomicsWrapper.get_observation_space(‘0’, obs), {“gamma”: 0.80}
),
“policy_1”: (
None, EconomicsWrapper.get_action_space(1, env), EconomicsWrapper.get_observation_space(‘1’, obs), {“gamma”: 0.80}
),
“policy_2”: (
None, EconomicsWrapper.get_action_space(2, env), EconomicsWrapper.get_observation_space(‘2’, obs), {“gamma”: 0.80}
),
“policy_3”: (
None, EconomicsWrapper.get_action_space(3, env), EconomicsWrapper.get_observation_space(‘3’, obs), {“gamma”: 0.80}
),
“policy_p”: (
None, EconomicsWrapper.get_action_space(‘p’, env), EconomicsWrapper.get_observation_space(‘p’, obs), {“gamma”: 0.80}
)
},
policy_mapping_fn=lambda agent_id: f"policy_{agent_id}"
).build()

-----x-------

from ray.rllib.env.multi_agent_env import MultiAgentEnv
from gym import spaces
import numpy as np
import warnings

class EconomicsWrapper(MultiAgentEnv):

def __init__(self, env_config):       
    super().__init__()
    print("lokesh")
    print(env_config["env"])
    self.env = env_config["env"]
    self._agent_ids = self.get_agent_ids()
    self._infos = {}
    self.terminateds = set()
    self.truncateds = set()

def get_agent_ids(self):
    agents = []
    for agent in self.env.all_agents:
        agents.append(str(agent.idx))
    return set(agents)

def reset(self, *, seed=None, options=None):
    self.terminateds = set()
    self.truncateds = set()
    self._observations = {}
    self._infos = {}
    obs = self.env.reset()
    obs_dict = {}

    for agent in self._agent_ids:
        obs_dict[agent] = obs[agent]

    return obs

def step(self, action):
    obs, rewards, done, info = self.env.step(action_dict)
    assert isinstance(obs[0]["action_mask"], np.ndarray)

    observation = {agent_id: obs[agent_id] for agent_id in _agent_ids}
    done_ = {agent_id: done['__all__'] for agent_id in _agent_ids}

    return observation, rewards, done_, info

@property
def n_agents(self):
    return self.env.n_agents

@property
def summary(self):
    last_completion_metrics = self.env.previous_episode_metrics
    if last_completion_metrics is None:
        return {}
    last_completion_metrics["completions"] = int(self.env._completions)
    return last_completion_metrics

@staticmethod
def get_action_space(agent_id, env):
    if agent_id == 'p':
        if env.world.planner.multi_action_mode:
            return spaces.MultiDiscrete(env.get_agent(agent_id).action_spaces)
        else:
            return spaces.Discrete(env.get_agent(agent_id).action_spaces)
    else:
        if env.world.agents[agent_id].multi_action_mode:
            return spaces.MultiDiscrete(env.get_agent(agent_id).action_spaces)
        else:
            return spaces.Discrete(env.get_agent(agent_id).action_spaces)

@staticmethod
def get_observation_space(agent_id, obs):
    return EconomicsWrapper.dict_to_spaces_dict(obs[agent_id])

@staticmethod
def dict_to_spaces_dict(obs):
    BIG_NUMBER = 1e20
    dict_of_spaces = {}
    for k, v in obs.items():

        # list of lists are listified np arrays
        _v = v
        if isinstance(v, list):
            _v = np.array(v)
        elif isinstance(v, (int, float, np.floating, np.integer)):
            _v = np.array([v])

        # assign Space
        if isinstance(_v, np.ndarray):
            x = float(BIG_NUMBER)
            # Warnings for extreme values
            if np.max(_v) > x:
                warnings.warn("Input is too large!")
            if np.min(_v) < -x:
                warnings.warn("Input is too small!")
            box = spaces.Box(low=-x, high=x, shape=_v.shape, dtype=_v.dtype)
            low_high_valid = (box.low < 0).all() and (box.high > 0).all()

            # This loop avoids issues with overflow to make sure low/high are good.
            while not low_high_valid:
                x = x // 2
                box = spaces.Box(low=-x, high=x, shape=_v.shape, dtype=_v.dtype)
                low_high_valid = (box.low < 0).all() and (box.high > 0).all()

            dict_of_spaces[k] = box

        elif isinstance(_v, dict):
            dict_of_spaces[k] = EconomicsWrapper.dict_to_spaces_dict(_v)
        else:
            raise TypeError
    return spaces.Dict(dict_of_spaces)

—x------

File “/Users/lokeshwarana/opt/anaconda3/envs/env11/lib/python3.11/site-packages/ray/rllib/env/wrappers/multi_agent_env_compatibility.py”, line 46, in reset
self.env.seed(seed)
^^^^^^^^^^^^^
AttributeError: ‘EconomicsWrapper’ object has no attribute ‘seed’

The above exception was the direct cause of the following exception:

ray::RolloutWorker.init() (pid=759, ip=127.0.0.1, actor_id=17f9fd9a045eed4e19fe51bc01000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x1138cbf90>)
File “/Users/lokeshwarana/opt/anaconda3/envs/env11/lib/python3.11/site-packages/ray/rllib/utils/pre_checks/env.py”, line 77, in check_env
check_multiagent_environments(env)
File “/Users/lokeshwarana/opt/anaconda3/envs/env11/lib/python3.11/site-packages/ray/rllib/utils/pre_checks/env.py”, line 299, in check_multiagent_environments
raise ValueError(
ValueError: Your environment () does not abide to the new gymnasium-style API!
From Ray 2.3 on, RLlib only supports the new (gym>=0.26 or gymnasium) Env APIs.
In particular, the reset() method seems to be faulty.
Learn more about the most important changes here:
GitHub - openai/gym: A toolkit for developing and comparing reinforcement learning algorithms. and here: GitHub - Farama-Foundation/Gymnasium: An API standard for single-agent reinforcement learning environments, with popular reference environments and related utilities (formerly Gym)

In order to fix this problem, do the following:

  1. Run pip install gymnasium on your command line.
  2. Change all your import statements in your code from
    import gymimport gymnasium as gym OR
    from gym.space import Discretefrom gymnasium.spaces import Discrete

For your custom (single agent) gym.Env classes:
3.1) Either wrap your old Env class via the provided from gymnasium.wrappers import EnvCompatibility wrapper class.
3.2) Alternatively to 3.1:

  • Change your reset() method to have the call signature ‘def reset(self, *,
    seed=None, options=None)’
  • Return an additional info dict (empty dict should be fine) from your reset()
    method.
  • Return an additional truncated flag from your step() method (between done and
    info). This flag should indicate, whether the episode was terminated prematurely
    due to some time constraint or other kind of horizon setting.

For your custom RLlib MultiAgentEnv classes:
4.1) Either wrap your old MultiAgentEnv via the provided
from ray.rllib.env.wrappers.multi_agent_env_compatibility import MultiAgentEnvCompatibility wrapper class.
4.2) Alternatively to 4.1:

  • Change your reset() method to have the call signature
    ‘def reset(self, *, seed=None, options=None)’
  • Return an additional per-agent info dict (empty dict should be fine) from your
    reset() method.
  • Rename dones into terminateds and only set this to True, if the episode is really
    done (as opposed to has been terminated prematurely due to some horizon/time-limit
    setting).
  • Return an additional truncateds per-agent dictionary flag from your step()
    method, including the __all__ key (100% analogous to your dones/terminateds
    per-agent dict).
    Return this new truncateds dict between dones/terminateds and infos. This
    flag should indicate, whether the episode (for some agent or all agents) was
    terminated prematurely due to some time constraint or other kind of horizon setting.