Hello, I am trying to specify a custom torch model when I define my PPOConfig
object. When I proceed to train my model, however, it seems that my algorithm is using the default fully connected network and not my custom one. Note that I am on the following versions:
ray=2.33.0
torch=2.33.0
python=3.11.5
Can anyone advise on what I am doing wrong in the below code that sets up my algorithm?
First I define and register my custom model:
from ray.rllib.models import ModelCatalog
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
class CustomTorchModel(TorchModelV2, nn.Module):
def __init__(self, obs_space, action_space, num_outputs, model_config, name):
TorchModelV2.__init__(self, obs_space, action_space, num_outputs, model_config, name)
nn.Module.__init__(self)
hidden_dim_size = model_config["custom_model_config"].get("hidden_dim_size", 128)
self._hidden_layers = nn.Sequential(
nn.Linear(obs_space.shape[0], hidden_dim_size),
nn.ReLU(),
nn.Linear(hidden_dim_size, hidden_dim_size),
nn.ReLU()
)
self._logits = nn.Linear(hidden_dim_size, num_outputs)
def forward(self, input_dict, state, seq_lens):
x = self._hidden_layers(input_dict["obs"].float())
logits = self._logits(x)
return logits, state
def value_function(self):
return self._value_branch(x)
ModelCatalog.register_custom_model("custom_torch_model", CustomTorchModel)
I then define my config:
config = (
PPOConfig()
.api_stack(enable_rl_module_and_learner=True, enable_env_runner_and_connector_v2=True)
.framework("torch")
.rl_module(
model_config_dict={
"uses_new_env_runners": True,
"vf_share_layers": False,
"custom_model": "custom_torch_model", # Use the registered custom model.
"custom_model_config": { # Parameters for the custom model.
"hidden_dim_size": 256,
},
},
)
.environment("CartPole-v1")
.resources(
num_gpus=0,
)
.learners(
num_learners=0,
num_gpus_per_learner=0,
)
.env_runners(
num_env_runners=4,
num_envs_per_env_runner=8,
rollout_fragment_length="auto",
)
.training(
gamma=0.9,
lr=0.001,
train_batch_size_per_learner=1024,
)
)
algo = config.build()
I then perform training, as usual. It doesn’t throw any errors but instead proceeds with the default fully-connected model. Has the interface for utilizing custom models changed?