When solving a hierarchical multi-agent problem in rllib, I went about adapting from this hierarchical example (ray/hierarchical_training.py at 7d029f8e710a4a16eac5ec6e370b816027ec86bb · ray-project/ray · GitHub) and the ray\rllib\examples\self_play_with_open_spiel_connect_4.py example. In the open_spiel case, they use a fully connected network. And in the hierarchical_training example, the networks are never specified. What if we wanted to specify different custom networks for each agent in the game agents might have different responsibilities?
e.g.
def policy_mapping_fn(agent_id):
if agent_id.startswith('antagonist'):
return 'antagonist'
elif agent_id.startswith('protagonist'):
return 'protagonist'
else:
return 'builder'
ModelCatalog.register_custom_model('AIIDE', AIIDEActor)
ModelCatalog.register_custom_model('PCGRL', PCGRLAdversarial)
register_env(env_name, make_env)
h_env = make_env(config)
_ = h_env.reset()
config = {
'env': env_name,
'num_workers': 0,
'env_config': env_config,
'multiagent': {
'policies': {
'builder': (None, h_env.builder_env.observation_space,
h_env.builder_env.action_space, {}),
'antagonist': (None, h_env.player_env.observation_space,
h_env.player_env.action_space, {}),
'protagonist': (None, h_env.player_env.observation_space,
h_env.player_env.action_space, {})
},
'policy_mapping_fn': function(policy_mapping_fn)
},
"framework": 'torch',
'num_gpus': 1
}
result = tune.run('PPO', stop=stop, config=config, verbose=1)
How do I set the agents to have different networks?
I would expect something like:
config = {
'env': env_name,
'num_workers': 0,
'env_config': env_config,
'model': { 'custom_model': {
'antagonist': 'AIIDE',
'protagonist': 'AIIDE',
'builder': 'PCGRL',
}
}
'multiagent': {
'policies': {
'builder': (None, h_env.builder_env.observation_space,
h_env.builder_env.action_space, {}),
'antagonist': (None, h_env.player_env.observation_space,
h_env.player_env.action_space, {}),
'protagonist': (None, h_env.player_env.observation_space,
h_env.player_env.action_space, {})
},
'policy_mapping_fn': function(policy_mapping_fn)
},
"framework": 'torch',
'num_gpus': 1
}