QMIX with PettingZoo

Hello,

I’ve been trying to get the QMIX algorithm to work with the PettingZoo environments library, trying to follow the example script with the two-step game provided. I have tried the following:

def env_creator(args):
    #env_adv_grouped = ss.agent_indicator_v0(ss.pad_observations_v0(simple_adversary_v2.env(N=5)), type_only=True)
    env_adv_ungrouped = ss.pad_observations_v0(simple_adversary_v2.env(N=5))

    return PettingZooEnv(env_adv_ungrouped)

env_pz = env_creator({})

register_env(
        "ungrouped_adv",
        lambda config: env_pz().with_agent_groups(
            grouping ))




if __name__ == "__main__":

    ray.init(num_cpus=12, ignore_reinit_error=True)

    config = {
            "rollout_fragment_length": 4,
            "train_batch_size": 32,
            "exploration_config": { "final_epsilon": 0.0,    },
            "num_workers": 4,
            "mixer": "qmix",
            "env_config": {
                "separate_state_space": True,
                "one_hot_state_encoding": True,
            },
            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        }

    stop = {
        "episode_reward_mean": 8.0 ,
        "timesteps_total": 70000,
        "training_iteration": 200,
    }

    config = dict(
        config,
        **{
            "env": "ungrouped_adv"
        }
    )

    results = tune.run("QMIX", stop=stop, config=config, verbose=2)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)

    ray.shutdown()

But I get the error that " PettingZoo is not callable". I generally do not understand at all how to get this thing working so any help would be appreciated.

I think your issue is here:
env_pz = env_creator({})

it looks like you’re not actually passing a constructor the environment, but instead you’re passing an instantiated environment. Please correct me if I’m wrong of course.

But if that is the case, what you’d need to do is rewrite your environment creation lambda to create env_pz and then call with agent groups.

TLDR; Your trying to construct an already constructed environment.