I have managed to get it working. Here is the extract of my code which resulted in a problem.
The following works:
module_specs = {
f"p{i}": RLModuleSpec()
for i in range(env_config["num_agents"])
}
layer_sizes = [8, 16, 32]
num_layers = [1, 2, 4]
hidden_architectures = [[layer_size] * n_layers for n_layers in num_layers for layer_size in layer_sizes]
config.rl_module(
rl_module_spec=MultiRLModuleSpec(module_specs=module_specs),
# rl_module_spec=spec,
# IMPORTANT: the model config dict needs to be defined here; it gets ignored if defined for individual policies.
# Noticed when resetting workers
model_config_dict={
"use_lstm": True,
# "fcnet_hiddens": tune.choice([[16, 16, 16], [32, 32]]),
"fcnet_hiddens": tune.choice(hidden_architectures),
# encoder lstm cell size
"lstm_cell_size": tune.choice([8, 16, 32]),
"fcnet_activation": tune.choice(["relu", "tanh", "elu"]),
}
)
But this one does not:
layer_sizes = [8, 16, 32]
num_layers = [1, 2, 4]
hidden_architectures = [[layer_size] * n_layers for n_layers in num_layers for layer_size in layer_sizes]
module_specs = {
f"p{i}": RLModuleSpec(
model_config_dict={
"use_lstm": True,
# "fcnet_hiddens": tune.choice([[16, 16, 16], [32, 32]]),
"fcnet_hiddens": tune.choice(hidden_architectures),
# encoder lstm cell size
"lstm_cell_size": tune.choice([8, 16, 32]),
"fcnet_activation": tune.choice(["relu", "tanh", "elu"]),
})
for i in range(env_config["num_agents"])
}
config.rl_module(
rl_module_spec=MultiRLModuleSpec(module_specs=module_specs),
)
The model_config_dict gets ignored if it is defined for individual policies. This feels like a bug, but it is not relevant for me.