Custom Action Masking model to Ray.tune and Trials not stopping

I have created a custom action masking model and i am currently searching a config variable that will take in the name of the model for Ray Tune. In this way, when Ray tune is carrying out rollouts in my custom open ai gym environment , only the legal actions are taken .

Also , I did not understand the importance of

config = {
“env”: “fss-v1”,
“num_workers”: 1, # parallelism
“horizon”: 66,
“rollout_fragment_length”: 10, ---------------------------------> what does this mean ?
“train_batch_size”: 1000,
}

Since i did not set a stopping condition , my Ray tune is still running without stopping.

Hey @Archana_R , thanks for your question. I’m not exactly sure what you mean by the config parameter allowing you to set a name.

You can use RLlib in combination with Ray Tune according to this following simple example. Also, make sure you read the Ray documentation on Tune and RLlib, which contains lots of starter examples and explains, how things work in more detail.

import ray
from ray import air, tune
from ray.rllib.models import ModelCatalog
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.tune.registry import get_trainable_cls

# Or "torch" or "tf2":
framework = "tf"

tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()


class CustomModel(TFModelV2):
    """Example of a keras custom model that just delegates to an fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        super(CustomModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name
        )
        self.model = FullyConnectedNetwork(
            obs_space, action_space, num_outputs, model_config, name
        )

    def forward(self, input_dict, state, seq_lens):
        return self.model.forward(input_dict, state, seq_lens)

    def value_function(self):
        return self.model.value_function()


class TorchCustomModel(TorchModelV2, nn.Module):
    """Example of a PyTorch custom model that just delegates to a fc-net."""

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(
            self, obs_space, action_space, num_outputs, model_config, name
        )
        nn.Module.__init__(self)

        self.torch_sub_model = TorchFC(
            obs_space, action_space, num_outputs, model_config, name
        )

    def forward(self, input_dict, state, seq_lens):
        input_dict["obs"] = input_dict["obs"].float()
        fc_out, _ = self.torch_sub_model(input_dict, state, seq_lens)
        return fc_out, []

    def value_function(self):
        return torch.reshape(self.torch_sub_model.value_function(), [-1])


# Can also register the env creator function explicitly with:
# register_env("corridor", lambda config: SimpleCorridor(config))
ModelCatalog.register_custom_model(
    "my_model", TorchCustomModel if framework == "torch" else CustomModel
)

config = (
    get_trainable_cls("PPO")
    .get_default_config()
    .environment("CartPole-v1")
    .framework(framework)  # or "torch"
    .training(
        # Link your custom model into the RLlib config here:
        model={
            "custom_model": "my_model",
        },
        # Tell tune to try 2 different learning rates.
        lr=tune.grid_search([0.0001, 0.0002]),
    )
)

stop = {
    "training_iteration": 100,
    "timesteps_total": 100000,
    "episode_reward_mean": 150.0,
}

# automated run with Tune and grid search and TensorBoard
print("Training automatically with Ray Tune")
tuner = tune.Tuner(
    "PPO",
    param_space=config,
    run_config=air.RunConfig(stop=stop),
)
results = tuner.fit()

ray.shutdown()
1 Like