I am training a multi agent system with agents and 3 policies. I have trained using the following configuration dictionary:
{
"training": {
"env": "market-v0",
"run": "A3C",
"stop": {
"training_iteration": 5,
},
"checkpoint_at_end": True,
"checkpoint_freq": 1,
# "export_formats": ["h5"],
"config": {
"env_config": {
"file_name": "/opt/ml/output/intermediate",
},
"lr": 0.001,
"train_batch_size": 1,
"collect_metrics_timeout": 10080,
"model": {
# https://docs.ray.io/en/master/rllib-models.html#default-model-config-settings
},
"multiagent": {
"policies": {
"agent_DA": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(344,),dtype=np.float32),spaces.Box(20, 200, shape=(1,)), {}),
"agent_BM_Ask": (None,spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {}),
"agent_BM_Bid": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {})
},
"policy_mapping_fn": lambda x: policy_mapping(x),
},
#"batch_mode": "complete_episodes",
"num_gpus": self.num_gpus,
"seed": 1
}
}
}
The checkpoints are saved in a dubdirectory with the following structure (for episode 5)
checkpoint_5/
------------- .is_checkpoint
------------- checkpoint-5
------------- checkpoint-5.tune_metadata
I am using the checkpoint “checkpoint-5”. This is the script that I am using for evaluating:
import argparse
import json
import os
import gym
import numpy as np
import ray
from gym import wrappers
from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env
from ray.rllib.agents.registry import get_agent_class
from env_9_agents import MarketSimulatorEnv
OUTPUT_DIR = "/opt/ml/output/intermediate"
def create_parser(parser_creator=None):
parser = argparse.ArgumentParser()
parser.add_argument(
"--checkpoint",
default="/opt/ml/input/data/model/checkpoint",
type=str,
help="Checkpoint from which to roll out.",
)
parser.add_argument(
"--algorithm",
type=str,
required=True,
help="The algorithm or model to train. This may refer to the name "
"of a built-on algorithm (e.g. RLLib's DQN or PPO), or a "
"user-defined trainable function or class registered in the "
"tune registry.",
)
parser.add_argument("--env", type=str, help="The gym environment to use.")
parser.add_argument("--evaluate_episodes", default=None, help="Number of episodes to roll out.")
parser.add_argument(
"--config",
default="{}",
help="Algorithm-specific configuration (e.g. env, hyperparams). "
"Surpresses loading of configuration from checkpoint.",
)
return parser
def policy_mapping(x):
#If the agents are passed as a list to the function then this will always return "agent_DA"
if isinstance(x,list):
raise TypeError('Change Policy Mapping It returns lists ',len(x))
if x in ['agent_BM_Ask_1', 'agent_BM_Ask_2', 'agent_BM_Ask_3', 'agent_BM_Ask_4']:
return "agent_BM_Ask"
elif x in ['agent_BM_Bid_1', 'agent_BM_Bid_2', 'agent_BM_Bid_3', 'agent_BM_Bid_4']:
return "agent_BM_Bid"
else:
return "agent_DA"
def run(args, parser):
def create_environment(env_config):
# This import must happen inside the method so that worker processes import this code
from env_9_agents import MarketSimulatorEnv
return MarketSimulatorEnv()
# return gym.make(args.env)
import gym.spaces as spaces
config={
"env_config": {
"file_name": "/opt/ml/output/intermediate",
},
"lr": 0.0001,
"train_batch_size": 1,
"collect_metrics_timeout": 10080,
"model": {
# https://docs.ray.io/en/master/rllib-models.html#default-model-config-settings
},
"multiagent": {
"policies": {
"agent_DA": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(344,),dtype=np.float32),spaces.Box(20, 200, shape=(1,)), {}),
"agent_BM_Ask": (None,spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {}),
"agent_BM_Bid": (None, spaces.Box(low=-np.inf, high=np.inf, shape=(156,),dtype=np.float32), spaces.Box(-200, 200, shape=(1,)), {})
},
"policy_mapping_fn": lambda x: policy_mapping(x),
},
"batch_mode": "complete_episodes",
"seed": 1
}
print(type(config),config)
if not args.env:
if not args.config.get("env"):
parser.error("the following arguments are required: --env")
args.env = args.config.get("env")
ray.init()
register_env(args.env, create_environment)
from ray.rllib.agents.registry import get_agent_class
env = MarketSimulatorEnv()
env = wrappers.Monitor(env, OUTPUT_DIR, force=True, video_callable=lambda episode_id: True)
state = env.reset()
cls = get_agent_class(args.algorithm)
args.checkpoint = "/opt/ml/input/data/model/checkpoint-5"
agent = cls(env=args.env, config=config)
agent.restore(args.checkpoint)
num_episodes = int(args.evaluate_episodes)
all_rewards = []
for episode in range(num_episodes):
steps = 0
state = env.reset()
done = False
reward_total = 0.0
while not done:
action = agent.compute_action(state)
next_state, reward, done, _ = env.step(action)
reward_total += reward
steps += 1
state = next_state
env.render()
all_rewards.append(reward_total)
print("Episode reward: %s. Episode steps: %s" % (reward_total, steps))
print("Mean Reward:", np.mean(all_rewards))
print("Max Reward:", np.max(all_rewards))
print("Min Reward:", np.min(all_rewards))
if __name__ == "__main__":
parser = create_parser()
args = parser.parse_args()
run(args, parser)
I am getting the following error:
Traceback (most recent call last):
File "evaluate.py", line 151, in <module>
run(args, parser)
File "evaluate.py", line 113, in run
print('state1: ',agent.compute_action(np.ones(len(state))))
File "/usr/local/lib/python3.6/dist-packages/ray/rllib/agents/trainer.py", line 819, in compute_action
policy_id].transform(observation)
KeyError: 'default_policy'
What is the cause of the error? Is this the correct way to evaluate a multi-agent model?
Edit: I have figured it out, it seems that I had to specify the policy while predicting