Multi algorithms in hieralchical training

Is it possible to use different algorithms for hierarchical training?( hierarchical_training.py)
For example, PPO for low level steps and DQN for high level steps.

@sven1977 please take a look at this question

I did this.But it’s not worked.I could’nt divide in two by agnent_id.
How to get agent_id in my_train_fn?

def policy_mapping_fn(agent_id):
  if agent_id.startswith("low_level_"):
      return "low_level_policy"
  else:
      return "high_level_policy"

def my_train_fn(config):
  if agent_id.startswith("low_level_"):
    return PPOTrainer(config=config)
  else:
    return DQNTrainer(config=config)

stop = {
    "training_iteration": args.stop_iters,
    "timesteps_total": args.stop_timesteps,
}

if args.flat:
    results = tune.run(
        my_train_fn,
        stop=stop,
        config={
            "env": WindyMazeEnv,
            "num_workers": 0,
            "framework": "torch" if args.torch else "tf",
        },
    )
else:
    maze = WindyMazeEnv(None)
    config = {
        "env": HierarchicalWindyMazeEnv,
        "num_workers": 0,
        "entropy_coeff": 0.01,
        "multiagent": {
            "policies": {
                "high_level_policy": (None, maze.observation_space,
                                      Discrete(4), {
                                          "gamma": 0.9
                                      }),
                "low_level_policy": (None,
                                     Tuple([
                                         maze.observation_space,
                                         Discrete(4)
                                     ]), maze.action_space, {
                                         "gamma": 0.0
                                     }),
            },
            "policy_mapping_fn": policy_mapping_fn,
        },
        "framework": "torch" if args.torch else "tf",
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
    }
    results = tune.run(my_train_fn, stop=stop, config=config, verbose=1)

@hosokawa-taiji,

"multiagent": {
            "policies": {
                "high_level_policy": (None, maze.observation_space,
                                      Discrete(4), {
                                          "gamma": 0.9
                                      }),
                "low_level_policy": (None,
                                     

You see those Nones as the first element of the tuples? It indicates that the policy should use the default trainer but you can specify an alternative trainer for each policy if you want.

Thank you for your advice!
It works fine!