Hello,
I am trying to train a Multi-Agent DQN model, save the checkpoint and then use the model to perform a single action.
# Custom Environment
meta_environment = MultiMetaEnv(env,action_sample_n,bonus_reward=True)
ray.init()
register_env("meta_environment", lambda config: MultiMetaEnv(config['env'], config['action_sample_n'], config["bonus_reward"]))
# Define your policies
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
if agent_id == "0":
return "policy0"
if agent_id == "1":
return "policy1"
if agent_id == "2":
return "policy2"
# Defining Config
multi_agent_config = DQNConfig()
multi_agent_config = multi_agent_config.environment("meta_environment", env_config={'env':env, 'action_sample_n':action_sample_n, 'bonus_reward':False})
multi_agent_config = multi_agent_config.multi_agent(
policies={
"policy0": (None, meta_environment.observation_space, meta_environment.action_space, {}),
"policy1": (None, meta_environment.observation_space, meta_environment.action_space, {}),
"policy2": (None, meta_environment.observation_space, meta_environment.action_space, {}),},
policy_mapping_fn=policy_mapping_fn)
multi_agent_config = multi_agent_config.framework("tf2")
# Trainable
algo = multi_agent_config.build()
for _ in range(1):
algo.train()
checkpoint_dir = algo.save()
print("Checkpoint Directory, ", checkpoint_dir)
obs = meta_environment.reset()
print(algo.compute_single_action(obs))
The error is as follows:
Traceback (most recent call last):
File "C:\Users\pnamala\Desktop\Project_3_6_2\MADDPG\experiments\meta_learn_with_rllib.py", line 710, in <module>
learn(args)
File "C:\Users\pnamala\Desktop\Project_3_6_2\MADDPG\experiments\meta_learn_with_rllib.py", line 695, in learn
print(algo.compute_single_action(obs))
File "C:\Users\pnamala\Anaconda3\envs\rllib\lib\site-packages\ray\rllib\algorithms\algorithm.py", line 1472, in compute_single_action
raise KeyError(
KeyError: "PolicyID 'default_policy' not found in PolicyMap of the Trainer's local worker!"
Finished...
How severe does this issue affect your experience of using Ray?
- High: It blocks me from completing my task.