Hello,
I’ve been trying to get the QMIX algorithm to work with the PettingZoo environments library, trying to follow the example script with the two-step game provided. I have tried the following:
def env_creator(args):
#env_adv_grouped = ss.agent_indicator_v0(ss.pad_observations_v0(simple_adversary_v2.env(N=5)), type_only=True)
env_adv_ungrouped = ss.pad_observations_v0(simple_adversary_v2.env(N=5))
return PettingZooEnv(env_adv_ungrouped)
env_pz = env_creator({})
register_env(
"ungrouped_adv",
lambda config: env_pz().with_agent_groups(
grouping ))
if __name__ == "__main__":
ray.init(num_cpus=12, ignore_reinit_error=True)
config = {
"rollout_fragment_length": 4,
"train_batch_size": 32,
"exploration_config": { "final_epsilon": 0.0, },
"num_workers": 4,
"mixer": "qmix",
"env_config": {
"separate_state_space": True,
"one_hot_state_encoding": True,
},
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
}
stop = {
"episode_reward_mean": 8.0 ,
"timesteps_total": 70000,
"training_iteration": 200,
}
config = dict(
config,
**{
"env": "ungrouped_adv"
}
)
results = tune.run("QMIX", stop=stop, config=config, verbose=2)
if args.as_test:
check_learning_achieved(results, args.stop_reward)
ray.shutdown()
But I get the error that " PettingZoo is not callable". I generally do not understand at all how to get this thing working so any help would be appreciated.