Multiagent PPO with custom model gives actions that are outside of the action space

aviskarkc10 · October 4, 2021, 2:49pm

I am running a Multi Agent PPO model where the action space is defined as follows:

self.action_space = gym.spaces.Discrete(5)

The code is very similar to the custom keras model implementation found here:

github.com

ray-project/ray/blob/master/rllib/examples/custom_keras_model.py

"""Example of using a custom ModelV2 Keras-style model."""

import argparse
import os

import ray
from ray import tune
from ray.rllib.agents.dqn.distributional_q_tf_model import \
    DistributionalQTFModel
from ray.rllib.models import ModelCatalog
from ray.rllib.models.tf.misc import normc_initializer
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.visionnet import VisionNetwork as MyVisionNetwork
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, \
    LEARNER_STATS_KEY

tf1, tf, tfv = try_import_tf()

This file has been truncated. show original

When I call the self.client.get_action function or self.client.compute_single_actionfunction, I get actions that are not bound by the action space. I am expecting actions that are between 0-4, meanwhile I receive values that are 4000, 1335 and so on.

mannyv · October 4, 2021, 9:30pm

Do you have a reproduction script?

Maybe using ray/random_env.py at master · ray-project/ray · GitHub

aviskarkc10 · October 5, 2021, 2:25am

class RLAgent_v0(MultiAgentEnv):
    def __init__(self, return_agent_actions=False, part=False):
        low = np.full((16, 256, 10), -1000)
        high = np.full((16, 256, 10), 1000)
        self.obs = []
        self.action_space = gym.spaces.Discrete(5)
        self.observation_space = gym.spaces.Box(
            low=low, high=high, shape=(16, 256, 10), dtype=np.float32
        )


class KerasModel(TFModelV2):

    """Custom model for policy gradient algorithms."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        super(KerasModel, self).__init__(obs_space, action_space,
                                         num_outputs, model_config, name)
        import tensorflow as tf
        import tensorflow.keras.metrics
        import tensorflow.keras.losses

        self.inputs = tf.keras.layers.Input(
            shape=obs_space.shape, name="observations")

        layer_1 = tf.keras.layers.Conv2D(
            2, 3,
            name="layer1",
            padding='same',
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(self.inputs)
        layer_2 = tf.keras.layers.Flatten()(layer_1)
        # layer_out = tf.reshape(layer_1, (1, -1))
        value_out = tf.keras.layers.Dense(
            1,
            name="value_out",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(0.01))(layer_2)

        self.base_model = tf.keras.Model(self.inputs, [layer_2, value_out])

        self.register_variables(self.base_model.variables)

        self.base_model.summary()

    # @tf.function
    def forward(self, input_dict, state, seq_lens):
        model_out, self._value_out = self.base_model(input_dict["obs"])
        return model_out, state

    # @tf.function
    def value_function(self):
        import tensorflow as tf
        import tensorflow.keras.metrics
        import tensorflow.keras.losses
        return tf.reshape(self._value_out, [-1])

    # @tf.function
    def metrics(self):
        import tensorflow as tf
        import tensorflow.keras.metrics
        import tensorflow.keras.losses

        return {"foo": tf.constant(42.0)}

This is the environment and model I have defined

Topic		Replies	Views
[RLlib] PPO custom model only get flattened observations RLlib	5	1540	May 5, 2021
Continuous actions go beyond defined action_space and then nan for multi-agent PPO RLlib	0	324	July 3, 2021
PPO Policy not respecting action-space bounds RLlib	0	39	June 27, 2024
Implementing _forward() Method in PPO Custom Multi-Agent Shared Policy RLlib	1	26	February 19, 2025
How to use Custom Model in MultiAgent PPO Policy RLlib	3	1192	August 9, 2023

Multiagent PPO with custom model gives actions that are outside of the action space

Related topics