How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi,
I have been trying to use QMIX to train on a PettingZoo environment, however after using multiple wrappers to fix compatibility issues (see this) i get this error:
File “/ray/rllib/utils/deprecation.py”, line 106, in patched_init
return obj_init(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File “/ray/rllib/utils/deprecation.py”, line 106, in patched_init
return obj_init(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File “/ray/rllib/algorithms/algorithm.py”, line 517, in init
super().init(
File “/ray/tune/trainable/trainable.py”, line 185, in init
self.setup(copy.deepcopy(self.config))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/ray/rllib/algorithms/algorithm.py”, line 639, in setup
self.workers = WorkerSet(
^^^^^^^^^^
File “/ray/rllib/evaluation/worker_set.py”, line 179, in init
raise e.args[0].args[2]
ValueError: Traceback (most recent call last):
File “ray/rllib/utils/pre_checks/env.py”, line 307, in check_multiagent_environments
obs_and_infos = env.reset(seed=42, options={})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “ray/rllib/env/wrappers/group_agents_wrapper.py”, line 84, in reset
self._group_items(obs),
^^^^^^^^^^^^^^^^^^^^^^
File “ray/rllib/env/wrappers/group_agents_wrapper.py”, line 149, in _group_items
raise ValueError(
ValueError: Missing member of group group_1: drone_0: {‘observer_0’: array([-1000., 0., 0., 0., 169., 286., 0., 0.,
0., 0., 0.], dtype=float32)}
See my configuration below, anything unusual?
import os
import ray
import supersuit as ss
from ray import tune
from ray.rllib.algorithms.qmix import QMixConfig
from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
from ray.rllib.env.wrappers.group_agents_wrapper import GroupAgentsWrapper
from ray.tune.registry import register_env
from gymnasium.spaces import Box, Tuple
from hemarl.pettingzooenv import customenv
import numpy as np
_GROUPED_OBSERVATION_SPACE = Tuple([
Box(low=-10000, high=10000, shape=(9 + 1 * 2,)), # Hardcoded for 1 drone!
Box(low=-10000, high=10000, shape=(9 + 1 * 2,)), # Hardcoded for 1 drone!
])
_GROUPED_ACTION_SPACE = Tuple([
Box(low=-10, high=10, shape=(3,)), # hardcoded for max_speed = 10
Box(low=-10, high=10, shape=(3,)),
])
def env_creator(args):
env = customenv.env(
render_mode="None",
max_cycles=args["max_cycles"],
n_observers=args["n_observers"],
n_drones=args["n_drones"]
)
env = ss.dtype_v0(env, "float32")
env = ss.pad_observations_v0(env)
env = ss.pad_action_space_v0(env)
# wrap env with Rlib PettingZoo Env
env = PettingZooEnv(env)
agents = env.env.agents
drones = [agent for agent in agents if "drone" in agent]
observers = [agent for agent in agents if "observer" in agent]
if args["grouped_agents"]:
groups = {"group_1": drones + observers}
env = GroupAgentsWrapper(env, groups,
obs_space=_GROUPED_OBSERVATION_SPACE,
act_space=_GROUPED_ACTION_SPACE)
return env
if name == “main”:
ray.init(num_gpus=1)
env_name = "customenv"
env_config = dict(
time_factor=1,
area_size=(1000, 1000),
max_cycles=600,
n_observers=1,
n_drones=1,
grouped_agents=1
)
register_env(env_name, lambda config: env_creator(env_config))
config = (
QMixConfig()
.environment(env=env_name, clip_actions=True)
.rollouts(
num_rollout_workers=12,
rollout_fragment_length=50,
)
.multi_agent(
policies={"drone_0", "observer_0"},
# Simple mapping fn, mapping agent0 to main0 and agent1 to main1.
policy_mapping_fn=(lambda aid, episode, **kw: aid),
policies_to_train=["drone_0", "observer_0"],
)
.training(
train_batch_size=600,
lr=3e-4,
gamma=0.99,
tau=0.005
)
.framework(framework="torch")
.resources(num_gpus=1)
)
print(ray.available_resources())
analysis = tune.run(
"QMIX",
name="QMIX",
resume=True,
stop={"timesteps_total": 1000000 if not os.environ.get("CI") else 500},
checkpoint_freq=10,
config=config.to_dict(),
local_dir=os.path.expanduser("~/ray_results")
)
ray.shutdown()