How to integrate a custom FIFO policy?

I guess I found the answer with the help of this RLlib example :+1:

from typing import Type

from ray.rllib.agents.trainer import Trainer
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.typing import ModelWeights, TrainerConfigDict


class FIFO(Policy):
    """FIFO policy"""
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.model = None
        self.exploration = self._create_exploration()

    def compute_actions(self,
                        obs_batch,
                        state_batches=None,
                        prev_action_batch=None,
                        prev_reward_batch=None,
                        info_batch=None,
                        episodes=None,
                        **kwargs):
        # TODO: Should return action for transport order according to fifo logic
        return ...

    def learn_on_batch(self, samples):
        # implement your learning code here
        return {}  # return stats

    def get_weights(self) -> ModelWeights:
        """No weights to save."""
        return {}


class FIFOTrainer(Trainer):
    def get_default_policy_class(
        self, config: TrainerConfigDict
    ) -> Type[Policy]:
        # default policy class for this Trainer is FIFO
        return FIFO
1 Like