Multi agent checkpoints - KeyError: 'default_policy'

I am training a multi agent system with agents and 3 policies. I have trained using the following configuration dictionary:

{
        "training": {
            "env": "market-v0",
            "run": "A3C",
            "stop": {
                "training_iteration": 5,
            },
            "checkpoint_at_end": True,
            "checkpoint_freq": 1,
            # "export_formats": ["h5"],
            "config": {
                "env_config": {
                        "file_name": "/opt/ml/output/intermediate",
                    },
                "lr": 0.001,
                "train_batch_size": 1,
                "collect_metrics_timeout": 10080,
                "model": {
                    # https://docs.ray.io/en/master/rllib-models.html#default-model-config-settings
                },
                "multiagent": {
                    "policies": {
                        "agent_DA": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(344,),dtype=np.float32),spaces.Box(20, 200, shape=(1,)), {}),
                        "agent_BM_Ask": (None,spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {}),
                        "agent_BM_Bid": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {})
                    },
                    "policy_mapping_fn": lambda x: policy_mapping(x),
                },
                #"batch_mode": "complete_episodes",
                "num_gpus": self.num_gpus,
                "seed": 1
            }
        }
    }

The checkpoints are saved in a dubdirectory with the following structure (for episode 5)

checkpoint_5/
------------- .is_checkpoint
------------- checkpoint-5
------------- checkpoint-5.tune_metadata

I am using the checkpoint “checkpoint-5”. This is the script that I am using for evaluating:

import argparse
import json
import os

import gym
import numpy as np
import ray
from gym import wrappers
from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env
from ray.rllib.agents.registry import get_agent_class
from env_9_agents import MarketSimulatorEnv

OUTPUT_DIR = "/opt/ml/output/intermediate"


def create_parser(parser_creator=None):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--checkpoint",
        default="/opt/ml/input/data/model/checkpoint",
        type=str,
        help="Checkpoint from which to roll out.",
    )
    parser.add_argument(
        "--algorithm",
        type=str,
        required=True,
        help="The algorithm or model to train. This may refer to the name "
        "of a built-on algorithm (e.g. RLLib's DQN or PPO), or a "
        "user-defined trainable function or class registered in the "
        "tune registry.",
    )
    parser.add_argument("--env", type=str, help="The gym environment to use.")
    parser.add_argument("--evaluate_episodes", default=None, help="Number of episodes to roll out.")
    parser.add_argument(
        "--config",
        default="{}",
        help="Algorithm-specific configuration (e.g. env, hyperparams). "
        "Surpresses loading of configuration from checkpoint.",
    )
    return parser
def policy_mapping(x):
    #If the agents are passed as a list to the function then this will always return "agent_DA"
    if isinstance(x,list):
        raise TypeError('Change Policy Mapping It returns lists ',len(x))
    if x in ['agent_BM_Ask_1', 'agent_BM_Ask_2', 'agent_BM_Ask_3', 'agent_BM_Ask_4']:
        return "agent_BM_Ask"
    elif x in ['agent_BM_Bid_1', 'agent_BM_Bid_2', 'agent_BM_Bid_3', 'agent_BM_Bid_4']:
        return "agent_BM_Bid"
    else:
        return "agent_DA"


def run(args, parser):
    def create_environment(env_config):
    # This import must happen inside the method so that worker processes import this code
        from env_9_agents import MarketSimulatorEnv

        return MarketSimulatorEnv()
#        return gym.make(args.env)

    import gym.spaces as spaces

    config={
                "env_config": {
                        "file_name": "/opt/ml/output/intermediate",
                    },
                "lr": 0.0001,
                "train_batch_size": 1,
                "collect_metrics_timeout": 10080,
                "model": {
                    # https://docs.ray.io/en/master/rllib-models.html#default-model-config-settings
                },
                "multiagent": {
                    "policies": {
                        "agent_DA": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(344,),dtype=np.float32),spaces.Box(20, 200, shape=(1,)), {}),
                        "agent_BM_Ask": (None,spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {}),
                        "agent_BM_Bid": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {})

                    },
                    "policy_mapping_fn": lambda x: policy_mapping(x),
                },
                "batch_mode": "complete_episodes",
                "seed": 1
            }
        
    print(type(config),config)
    if not args.env:
        if not args.config.get("env"):
            parser.error("the following arguments are required: --env")
        args.env = args.config.get("env")

        
    ray.init()
    
    register_env(args.env, create_environment)

    from ray.rllib.agents.registry import get_agent_class
    
    env = MarketSimulatorEnv()
       
    env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True)
    state = env.reset()
    cls = get_agent_class(args.algorithm)
    args.checkpoint = "/opt/ml/input/data/model/checkpoint-5"
    agent = cls(env=args.env, config=config)
    agent.restore(args.checkpoint)
    num_episodes = int(args.evaluate_episodes)

    all_rewards = []
    for episode in range(num_episodes):
        steps = 0
        state = env.reset()
        done = False
        reward_total = 0.0
        while not done:
            action = agent.compute_action(state)
            next_state, reward, done, _ = env.step(action)
            reward_total += reward
            steps += 1
            state = next_state
            env.render()
        all_rewards.append(reward_total)
        print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))
    print("Mean Reward:", np.mean(all_rewards))
    print("Max Reward:", np.max(all_rewards))
    print("Min Reward:", np.min(all_rewards))


if __name__ == "__main__":
    parser = create_parser()
    args = parser.parse_args()
    run(args, parser)


I am getting the following error:

Traceback (most recent call last):
File "evaluate.py", line 151, in <module>
run(args, parser)
File "evaluate.py", line 113, in run
print('state1: ',agent.compute_action(np.ones(len(state))))
File "/usr/local/lib/python3.6/dist-packages/ray/rllib/agents/trainer.py", line 819, in compute_action
policy_id].transform(observation)
KeyError: 'default_policy'

What is the cause of the error? Is this the correct way to evaluate a multi-agent model?

Edit: I have figured it out, it seems that I had to specify the policy while predicting

Hi @carlorop,

Internally rllib treats evening as a multiagent environment. When you pass in an observation that is not wrapped in a dictionary it assumes you are in single agent mode and wrapped it internally in a dictionary with the key default_policy.

Since your environment is multiagent the environment reset and step methods should return a multiagent dictionary that you can use directly, as you already figured out.