Issue creating custom action mask enviorment

Hi all,
I’m trying to set up an action masking environment by following the examples on GitHub.

from gym.spaces import Dict
from gym import spaces

from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.torch_utils import FLOAT_MIN

tf1, tf, tfv = try_import_tf()
import tensorflow as tf_real


def find_shape(obs):
    n = 0
    for i in obs:
        if isinstance(i, spaces.Box):
            n += i.shape[0]
        elif isinstance(i, spaces.Discrete):
            n += i.n
        elif isinstance(i, spaces.MultiDiscrete):
            n += i.nvec.sum()
        else:
            raise TypeError
    return (n,)

class ActionMaskModel(TFModelV2):
    """Model that handles simple discrete action masking.
    This assumes the outputs are logits for a single Categorical action dist.
    Getting this to work with a more complex output (e.g., if the action space
    is a tuple of several distributions) is also possible but left as an
    exercise to the reader.
    """

    def __init__(
        self, obs_space, action_space, num_outputs, model_config, name, **kwargs
    ):

        orig_space = getattr(obs_space, "original_space", obs_space)
        assert (
            isinstance(orig_space, Dict)
            and "action_mask" in orig_space.spaces
            and "observations" in orig_space.spaces
        )

        super().__init__(obs_space, action_space, num_outputs, model_config, name)

        # Tuple Shape is None by default, calculate the flattened shape of its contents and update accoridngly
        tmp_shape = orig_space["observations"]
        tmp_shape.shape = find_shape(tmp_shape)
        self.internal_model = FullyConnectedNetwork(
            tmp_shape,
            action_space,
            num_outputs,
            model_config,
            name + "_internal",
        )

        # disable action masking --> will likely lead to invalid actions
        self.no_masking = model_config["custom_model_config"].get("no_masking", False)

    def forward(self, input_dict, state, seq_lens):
        # Extract the available actions tensor from the observation.
        action_mask = input_dict["obs"]["action_mask"]

        # Compute the unmasked logits.

        # tmp_stack = tf_real.concat(input_dict["obs"]["observations"], axis = 1)
        logits, _ = self.internal_model({"obs": input_dict["obs"]["observations"]})

        # If action masking is disabled, directly return unmasked logits
        if self.no_masking:
            return logits, state

        # Convert action_mask into a [0.0 || -inf]-type mask.
        # inf_mask = tf.maximum(tf.math.log(action_mask), tf.float32.min)
        # masked_logits = logits + inf_mask
        masked_logits = logits
        # Return masked logits.
        return masked_logits, state

    def value_function(self):
        return self.internal_model.value_function()

This is the modified file from github: https://github.com/ray-project/ray/blob/master/rllib/examples/models/action_mask_model.py

Crashes on the following line logits, _ = self.internal_model({"obs": input_dict["obs"]["observations"]})
and prints out the following error:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 892, in setup
    self._init(self.config, self.env_creator)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 1021, in _init
    raise NotImplementedError
NotImplementedError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:/Users/destr/PycharmProjects/Underlords/code/policy_server.py", line 310, in <module>
    trainer = PPOTrainer(config=DEFAULT_CONFIG, env= ActionMaskEnv)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 826, in __init__
    super().__init__(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\tune\trainable.py", line 142, in __init__
    self.setup(copy.deepcopy(self.config))
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 907, in setup
    self.workers = self._make_workers(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 2161, in _make_workers
    return WorkerSet(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 161, in __init__
    self._local_worker = self._make_worker(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 553, in _make_worker
    worker = cls(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 613, in __init__
    self._build_policy_map(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1698, in _build_policy_map
    self.policy_map.create_policy(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\policy_map.py", line 140, in create_policy
    self[policy_id] = class_(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 256, in __init__
    DynamicTFPolicy.__init__(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 360, in __init__
    dist_inputs, self._state_out = self.model(self._input_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 251, in __call__
    res = self.forward(restored, state or [], seq_lens)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\tf\recurrent_net.py", line 221, in forward
    wrapped_out, _ = self._wrapped_forward(input_dict, [], None)
  File "C:\Users\destr\PycharmProjects\Underlords\code\mask_model.py", line 71, in forward
    logits, _ = self.internal_model({"obs": input_dict["obs"]["observations"]})
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 251, in __call__
    res = self.forward(restored, state or [], seq_lens)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\tf\fcnet.py", line 146, in forward
    model_out, self._value_out = self.base_model(input_dict["obs_flat"])
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer_v1.py", line 760, in __call__
    input_spec.assert_input_compatibility(self.input_spec, inputs,
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\input_spec.py", line 204, in assert_input_compatibility
    raise ValueError('Layer ' + layer_name + ' expects ' +
ValueError: Layer model expects 1 input(s), but it received 56 input tensors. Inputs received: [<tf.Tensor 'default_policy/Reshape_2:0' shape=(?, 9) dtype=float32>, <tf.Tensor 'default_policy/Reshape_3:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_4:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_5:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_6:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_7:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_8:0' shape=(?, 2) dtype=float32>, <tf.Tensor 'default_policy/Reshape_9:0' shape=(?, 2) dtype=float32>, <tf.Tensor 'default_policy/Reshape_10:0' shape=(?, 6) dtype=float32>, <tf.Tensor 'default_policy/Reshape_11:0' shape=(?, 13) dtype=float32>, <tf.Tensor 'default_policy/Reshape_12:0' shape=(?, 13) dtype=float32>, <tf.Tensor 'default_policy/Reshape_13:0' shape=(?, 3) dtype=float32>, <tf.Tensor 'default_policy/Reshape_14:0' shape=(?, 2) dtype=float32>, <tf.Tensor 'default_policy/Reshape_15:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_16:0' shape=(?, 435) dtype=float32>, <tf.Tensor 'default_policy/Reshape_17:0' shape=(?, 19) dtype=float32>, <tf.Tensor 'default_policy/Reshape_18:0' shape=(?, 19) dtype=float32>, <tf.Tensor 'default_policy/Reshape_19:0' shape=(?, 27) dtype=float32>, <tf.Tensor 'default_policy/Reshape_20:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_21:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_22:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_23:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_24:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_25:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_26:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_27:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_28:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_29:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_30:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_31:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_32:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_33:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_34:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_35:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_36:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_37:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_38:0' shape=(?, 115) dtype=float32>, <tf.Tensor 'default_policy/Reshape_39:0' shape=(?, 32) dtype=float32>, <tf.Tensor 'default_policy/Reshape_40:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_41:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_42:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_43:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_44:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_45:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_46:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_47:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_48:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_49:0' shape=(?, 92) dtype=float32>, <tf.Tensor 'default_policy/Reshape_50:0' shape=(?, 210) dtype=float32>, <tf.Tensor 'default_policy/Reshape_51:0' shape=(?, 9) dtype=float32>, <tf.Tensor 'default_policy/Reshape_52:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_53:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_54:0' shape=(?, 1) dtype=float32>, <tf.Tensor 'default_policy/Reshape_55:0' shape=(?, 10) dtype=float32>, <tf.Tensor 'default_policy/Reshape_56:0' shape=(?, 810) dtype=float32>, <tf.Tensor 'default_policy/Reshape_57:0' shape=(?, 120) dtype=float32>]

Process finished with exit code 1

I’ve tried fixed that line with the following:

        tmp_stack = tf_real.concat(input_dict["obs"]["observations"], axis = 1)
        logits, _ = self.internal_model(tmp_stack)

but get the following error:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 892, in setup
    self._init(self.config, self.env_creator)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 1021, in _init
    raise NotImplementedError
NotImplementedError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:/Users/destr/PycharmProjects/Underlords/code/policy_server.py", line 310, in <module>
    trainer = PPOTrainer(config=DEFAULT_CONFIG, env= ActionMaskEnv)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 826, in __init__
    super().__init__(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\tune\trainable.py", line 142, in __init__
    self.setup(copy.deepcopy(self.config))
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 907, in setup
    self.workers = self._make_workers(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 2161, in _make_workers
    return WorkerSet(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 161, in __init__
    self._local_worker = self._make_worker(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\worker_set.py", line 553, in _make_worker
    worker = cls(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 613, in __init__
    self._build_policy_map(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 1698, in _build_policy_map
    self.policy_map.create_policy(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\policy_map.py", line 140, in create_policy
    self[policy_id] = class_(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 256, in __init__
    DynamicTFPolicy.__init__(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 360, in __init__
    dist_inputs, self._state_out = self.model(self._input_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 251, in __call__
    res = self.forward(restored, state or [], seq_lens)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\tf\recurrent_net.py", line 221, in forward
    wrapped_out, _ = self._wrapped_forward(input_dict, [], None)
  File "C:\Users\destr\PycharmProjects\Underlords\code\mask_model.py", line 70, in forward
    logits, _ = self.internal_model(tmp_stack)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 216, in __call__
    restored = input_dict.copy()
AttributeError: 'Tensor' object has no attribute 'copy'

Any advice please and thank you.

1 Like

Hi @Ramie_Yahya,

The forward method of FullyConnectedNetwork is expecting the input to be a dictionary with the inputs in the key obs_flat. You are putting it in
obs


Hi Mannyv,
Thanks for the quick response. Following your advice I’ve tried this modification:

        tmp_stack = tf_real.concat(input_dict["obs"]["observations"], axis = 1)
        tmp_dict = input_dict
        tmp_dict["obs_flat"] = tmp_stack
        logits, _ = self.internal_model(tmp_dict)

However I received the following error on execution:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 892, in setup
    self._init(self.config, self.env_creator)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 1021, in _init
    raise NotImplementedError
NotImplementedError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\contextlib.py", line 131, in __exit__
    self.gen.throw(type, value, traceback)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 5588, in get_controller
    yield g
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\policy_map.py", line 140, in create_policy
    self[policy_id] = class_(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 256, in __init__
    DynamicTFPolicy.__init__(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 360, in __init__
    dist_inputs, self._state_out = self.model(self._input_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 251, in __call__
    res = self.forward(restored, state or [], seq_lens)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\tf\recurrent_net.py", line 221, in forward
    wrapped_out, _ = self._wrapped_forward(input_dict, [], None)
  File "C:\Users\destr\PycharmProjects\Underlords\code\mask_model.py", line 71, in forward
    logits, _ = self.internal_model(tmp_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 239, in __call__
    restored["obs"] = restore_original_dimensions(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 402, in restore_original_dimensions
    return _unpack_obs(obs, original_space, tensorlib=tensorlib)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 434, in _unpack_obs
    elif len(obs.shape) < 2 or obs.shape[-1] != prep.shape[0]:
AttributeError: 'collections.OrderedDict' object has no attribute 'shape'

Process finished with exit code -1

I’m not exactly sure on how the format the dictionary. Your help is appreciated.
Thank you

@Ramie_Yahya,

I think I led you astray. Looking back at your original post I think you were on the right track. If you keep the same code you just posted but put it in “obs” instead of obs_flat it does not work?

I just tried that by doing the following:

        tmp_stack = tf_real.concat(input_dict["obs"]["observations"], axis = 1)
        tmp_dict = input_dict
        tmp_dict["obs"] = tmp_stack
        logits, _ = self.internal_model(tmp_dict)

The following error message:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 892, in setup
    self._init(self.config, self.env_creator)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 1021, in _init
    raise NotImplementedError
NotImplementedError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\contextlib.py", line 131, in __exit__
    self.gen.throw(type, value, traceback)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 5588, in get_controller
    yield g
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\policy_map.py", line 140, in create_policy
    self[policy_id] = class_(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 256, in __init__
    DynamicTFPolicy.__init__(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 438, in __init__
    self._initialize_loss_from_dummy_batch(auto_remove_unneeded_view_reqs=True)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 714, in _initialize_loss_from_dummy_batch
    _ = self.postprocess_trajectory(dummy_batch)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 286, in postprocess_trajectory
    return postprocess_fn(self, sample_batch, other_agent_batches, episode)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\postprocessing.py", line 178, in compute_gae_for_sample_batch
    last_r = policy._value(**input_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\utils\tf_utils.py", line 384, in call
    symbolic_out[0] = fn(*args_placeholders, **kwargs_placeholders)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\ppo\ppo_tf_policy.py", line 322, in value
    model_out, _ = self.model(input_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 251, in __call__
    res = self.forward(restored, state or [], seq_lens)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\tf\recurrent_net.py", line 262, in forward
    return super().forward(input_dict, state, seq_lens)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\tf\recurrent_net.py", line 73, in forward
    output, new_state = self.forward_rnn(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\tf\recurrent_net.py", line 268, in forward_rnn
    model_out, self._value_out, h, c = self._rnn_model([inputs, seq_lens] + state)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer_v1.py", line 760, in __call__
    input_spec.assert_input_compatibility(self.input_spec, inputs,
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\input_spec.py", line 271, in assert_input_compatibility
    raise ValueError('Input ' + str(input_index) +
ValueError: Input 0 is incompatible with layer model_1: expected shape=(None, None, 4856), found shape=(1, 1, 1280)

Process finished with exit code -1

Again thanks for you help.

@Ramie_Yahya

No problem. That is some progress. It made it through your model and into the lstm wrapper. Not sure whg the shaoes are wrong there but I do have some other bad news.

Action masking wont work the way your are planning on doing it. When you use an lstm wrapper it overrides the outpus. So the outputs of the custom model you are writing become the inputs to the lstm. The lstm then returns the logits directly.

If you want to do action masking and use an lstm you are going to have to write a custom rnn model instead. Then run with use_lstm=False.

Hi mannyv,

I’m working with Ramie here. Can you link some github example for setting up the LSTM with custom RNN

@Denys_Ashikhin,

You would apply the masking to the ouput of the final layer after the lstm. “logits” in the example.

I think I kind of got it.

Using your above example, I see it is extending

Which in it’s forward pass calls the forward_rnn

 def forward(
        self,
        input_dict: Dict[str, TensorType],
        state: List[TensorType],
        seq_lens: TensorType,
    ) -> (TensorType, List[TensorType]):
        """Adds time dimension to batch before sending inputs to forward_rnn().
        You should implement forward_rnn() in your subclass."""
        assert seq_lens is not None
        padded_inputs = input_dict["obs_flat"]
        max_seq_len = tf.shape(padded_inputs)[0] // tf.shape(seq_lens)[0]
      output, new_state = self.forward_rnn(
          add_time_dimension(padded_inputs, max_seq_len=max_seq_len, framework="tf"),
          state,
          seq_lens,
      )
        return tf.reshape(output, [-1, self.num_outputs]), new_state

Correct me if I’m wrong, but we would need to override the forward pass (keeping the RNN as in your example).

And what we would apply the action mask logic is right before the return tf.reshape(output, [-1, self.num_outputs]), new_state?

Something like this?:

 def forward(
        self,
        input_dict: Dict[str, TensorType],
        state: List[TensorType],
        seq_lens: TensorType,
    ) -> (TensorType, List[TensorType]):
        """Adds time dimension to batch before sending inputs to forward_rnn().
        You should implement forward_rnn() in your subclass."""
        assert seq_lens is not None
        padded_inputs = input_dict["obs_flat"]
        max_seq_len = tf.shape(padded_inputs)[0] // tf.shape(seq_lens)[0]
        output, new_state = self.forward_rnn(
            add_time_dimension(padded_inputs, max_seq_len=max_seq_len, framework="tf"),
            state,
            seq_lens,
        )
inf_mask = tf.maximum(tf.math.log(action_mask), tf.float32.min)
masked_logits = output + inf_mask
return tf.reshape(masked_logits , [-1, self.num_outputs]), new_state

?

Thanks once more!

3 Likes

Hey everyone,
great catch @mannyv on the LSTM-wrapping not working with action masking. We are indeed thinking of discontinuing the use_lstm=True flag as it can cause lots of confusion on the user end, not knowing what exactly is happening under the hood. It’s probably always better to let the user write the LSTM/RNN model themselves.

@Denys_Ashikhin and @Ramie_Yahya , yes you can mask those action logits that should not be selected with tf.float32.min values. This way, these will never be sampled by your policy’s exploration component (e.g. StochasticSampling for PPO/APPO/IMPALA/etc… or EpsilonGreedy for DQN).
The code you wrote looks good. Can you share, whether this is working now?

1 Like

Thanks your response.
We currently trying to set the custom RNN model without action masking and are running into the following error:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 892, in setup
    self._init(self.config, self.env_creator)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\trainer.py", line 1021, in _init
    raise NotImplementedError
NotImplementedError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\contextlib.py", line 131, in __exit__
    self.gen.throw(type, value, traceback)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\ops.py", line 5588, in get_controller
    yield g
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\policy_map.py", line 140, in create_policy
    self[policy_id] = class_(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 256, in __init__
    DynamicTFPolicy.__init__(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 438, in __init__
    self._initialize_loss_from_dummy_batch(auto_remove_unneeded_view_reqs=True)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\dynamic_tf_policy.py", line 714, in _initialize_loss_from_dummy_batch
    _ = self.postprocess_trajectory(dummy_batch)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\policy\tf_policy_template.py", line 286, in postprocess_trajectory
    return postprocess_fn(self, sample_batch, other_agent_batches, episode)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\evaluation\postprocessing.py", line 178, in compute_gae_for_sample_batch
    last_r = policy._value(**input_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\utils\tf_utils.py", line 384, in call
    symbolic_out[0] = fn(*args_placeholders, **kwargs_placeholders)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\agents\ppo\ppo_tf_policy.py", line 322, in value
    model_out, _ = self.model(input_dict)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\models\modelv2.py", line 251, in __call__
    res = self.forward(restored, state or [], seq_lens)
  File "C:\Users\destr\PycharmProjects\Underlords\code\mask_rnn_model.py", line 119, in forward
    output, new_state = self.forward_rnn(
  File "C:\Users\destr\PycharmProjects\Underlords\code\mask_rnn_model.py", line 90, in forward_rnn
    model_out, self._value_out, h, c = self.rnn_model([inputs, seq_lens] + state)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\base_layer_v1.py", line 760, in __call__
    input_spec.assert_input_compatibility(self.input_spec, inputs,
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\keras\engine\input_spec.py", line 271, in assert_input_compatibility
    raise ValueError('Input ' + str(input_index) +
ValueError: Input 0 is incompatible with layer model: expected shape=(None, None, 4856), found shape=(1, 1, 4855)

Here is our custom model:


class RNNModel(RecurrentNetwork):
    """Example of using the Keras functional API to define a RNN model."""

    def __init__(
        self,
        obs_space,
        action_space,
        num_outputs,
        model_config,
        name,
        hiddens_size=1280,
        cell_size=512,
    ):
        model_config["max_seq_len"] = 32
        model_config["vf_share_layers"] = False
        model_config["fcnet_activation"] = 'relu'

        super(RNNModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name
        )
        self.cell_size = cell_size


        # Define input layers
        input_layer = tf.keras.layers.Input(
            shape=(None, obs_space.shape[0]), name="inputs"
        )
        state_in_h = tf.keras.layers.Input(shape=(cell_size,), name="h")
        state_in_c = tf.keras.layers.Input(shape=(cell_size,), name="c")
        seq_in = tf.keras.layers.Input(shape=(), name="seq_in", dtype=tf.int32)

        # Preprocess observation with a hidden layer and send to LSTM cell
        dense0 = tf.keras.layers.Dense(hiddens_size, activation=tf.nn.relu, name = "dense0")(input_layer)

        dense1 = tf.keras.layers.Dense(
            hiddens_size, activation=tf.nn.relu, name="dense1"
        )(dense0)
        lstm_out, state_h, state_c = tf.keras.layers.LSTM(
            cell_size, return_sequences=True, return_state=True, name="lstm"
        )(
            inputs=dense1,
            mask=tf.sequence_mask(seq_in),
            initial_state=[state_in_h, state_in_c],
        )

        # Postprocess LSTM output with another hidden layer and compute values
        logits = tf.keras.layers.Dense(
            self.num_outputs, activation=tf.keras.activations.linear, name="logits"
        )(lstm_out)
        values = tf.keras.layers.Dense(1, activation=None, name="values")(lstm_out)

        # Create the RNN model
        self.rnn_model = tf.keras.Model(
            inputs=[input_layer, seq_in, state_in_h, state_in_c],
            outputs=[logits, values, state_h, state_c],
        )
        self.rnn_model.summary()

    @override(RecurrentNetwork)
    def forward_rnn(self, inputs, state, seq_lens):
        model_out, self._value_out, h, c = self.rnn_model([inputs, seq_lens] + state)
        return model_out, [h, c]

    @override(ModelV2)
    def get_initial_state(self):
        return [
            np.zeros(self.cell_size, np.float32),
            np.zeros(self.cell_size, np.float32),
        ]

    @override(ModelV2)
    def value_function(self):
        return tf.reshape(self._value_out, [-1])

    @override(ModelV2)
    def forward(
            self,
            input_dict: Dict[str, TensorType],
            state: List[TensorType],
            seq_lens: TensorType,
    ) -> (TensorType, List[TensorType]):
        """Adds time dimension to batch before sending inputs to forward_rnn().
        You should implement forward_rnn() in your subclass."""
        assert seq_lens is not None
        # padded_inputs = input_dict["obs_flat"]
        tmp_stack = tf_real.concat(input_dict["obs"]["observations"], axis=1)

        padded_inputs = tmp_stack
        max_seq_len = tf.shape(padded_inputs)[0] // tf.shape(seq_lens)[0]
        output, new_state = self.forward_rnn(
            add_time_dimension(padded_inputs, max_seq_len=max_seq_len, framework="tf"),
            state,
            seq_lens,
        )
        return tf.reshape(output, [-1, self.num_outputs]), new_state

Also the environment we are using:

from gym.spaces import Box, Dict, Discrete, MultiDiscrete
import numpy as np

from ray.rllib.examples.env.random_env import RandomEnv


class ActionMaskEnv(RandomEnv):
    """A randomly acting environment that publishes an action-mask each step."""

    def __init__(self, config):
        super().__init__(config)
        # Masking only works for Discrete actions.
        assert isinstance(self.action_space, MultiDiscrete)
        # Add action_mask to observations.
        self.observation_space = Dict(
            {
                "action_mask": Box(low=np.array([0]), high=np.array([1]), dtype=np.float32),
                "observations": self.observation_space,
            }
        )
        self.valid_actions = None

    def reset(self):
        obs = super().reset()
        self._fix_action_mask(obs)
        return obs

    def step(self, action):
        # Check whether action is valid.
        if not self.valid_actions[action]:
            raise ValueError(
                f"Invalid action sent to env! " f"valid_actions={self.valid_actions}"
            )

        obs, rew, done, info = super().step(action)

        self._fix_action_mask(obs)
        return obs, rew, done, info

    def _fix_action_mask(self, obs):
        # Fix action-mask: Everything larger 0.5 is 1.0, everything else 0.0.
        self.valid_actions = np.round(obs["action_mask"])
        obs["action_mask"] = self.valid_actions

Again any help is much appreciated.
Thank you

Hi @Ramie_Yahya,

The size of obs_space.shape[0] is the sum of your features plus the action mask. You are only passing the features through the model so you need to subtract the size of your action mask from that shape size.

Hi Manny,
Your suggestion worked, but now we’re running into another issue. I believe it stems from this line of code:

 tmp_stack = tf_real.concat(input_dict["obs"]["observations"], axis=1)

        padded_inputs = tmp_stack
       
        max_seq_len = tf.shape(padded_inputs)[0] // tf.shape(seq_lens)[0]
        output, new_state = self.forward_rnn(
            add_time_dimension(padded_inputs, max_seq_len=max_seq_len, framework="tf"),
            state,
            seq_lens,
        )
        return tf.reshape(output, [-1, self.num_outputs]), new_state

In particular, this line here tmp_stack = tf_real.concat(input_dict["obs"]["observations"], axis=1).

I’m not sure this is the correct way to combine the observation space. The original observation is as follows:

The exact issue we’re getting:

WARNING:urllib3.connectionpool:Failed to parse headers (url=http://127.0.0.1:55556/): [MissingHeaderBodySeparatorDefect(), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\policy_server_input.py", line 184, in do_POST\n'), FirstHeaderLineIsContinuationDefect('    response = self.execute_command(parsed_input)\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\policy_server_input.py", line 220, in execute_command\n'), FirstHeaderLineIsContinuationDefect('    response["action"] = child_rollout_worker.env.get_action(\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\external_env.py", line 132, in get_action\n'), FirstHeaderLineIsContinuationDefect('    return episode.wait_for_action(observation)\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\external_env.py", line 289, in wait_for_action\n'), FirstHeaderLineIsContinuationDefect('    return self.action_queue.get(True, timeout=300.0)\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\queue.py", line 178, in get\n'), FirstHeaderLineIsContinuationDefect('    raise Empty\n')], unparsed data: '_queue.Empty\n\r\n'
Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\connectionpool.py", line 446, in _make_request
    assert_header_parsing(httplib_response.msg)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\urllib3\util\response.py", line 71, in assert_header_parsing
    raise HeaderParsingError(defects=defects, unparsed_data=unparsed_data)
urllib3.exceptions.HeaderParsingError: [MissingHeaderBodySeparatorDefect(), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\policy_server_input.py", line 184, in do_POST\n'), FirstHeaderLineIsContinuationDefect('    response = self.execute_command(parsed_input)\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\policy_server_input.py", line 220, in execute_command\n'), FirstHeaderLineIsContinuationDefect('    response["action"] = child_rollout_worker.env.get_action(\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\external_env.py", line 132, in get_action\n'), FirstHeaderLineIsContinuationDefect('    return episode.wait_for_action(observation)\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\site-packages\\ray\\rllib\\env\\external_env.py", line 289, in wait_for_action\n'), FirstHeaderLineIsContinuationDefect('    return self.action_queue.get(True, timeout=300.0)\n'), FirstHeaderLineIsContinuationDefect('  File "C:\\Users\\destr\\AppData\\Local\\Programs\\Python\\Python38\\lib\\queue.py", line 178, in get\n'), FirstHeaderLineIsContinuationDefect('    raise Empty\n')], unparsed data: '_queue.Empty\n\r\n'
2022-02-10 20:48:23,703 ERROR policy_client.py:243 -- Request failed Server: SimpleHTTP/0.6 Python/3.8.5
Date: Fri, 11 Feb 2022 01:48:23 GMT
Connection: close
Content-Type: text/html;charset=utf-8
Content-Length: 1374

<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
        "http://www.w3.org/TR/html4/strict.dtd">
<html>
    <head>
        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">
        <title>Error response</title>
    </head>
    <body>
        <h1>Error response</h1>
        <p>Error code: 500</p>
        <p>Message: Traceback (most recent call last):
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\env\policy_server_input.py", line 184, in do_POST
    response = self.execute_command(parsed_input)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\env\policy_server_input.py", line 220, in execute_command
    response["action"] = child_rollout_worker.env.get_action(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\env\external_env.py", line 132, in get_action
    return episode.wait_for_action(observation)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\env\external_env.py", line 289, in wait_for_action
    return self.action_queue.get(True, timeout=300.0)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\queue.py", line 178, in get
    raise Empty
_queue.Empty
.</p>
        <p>Error code explanation: 500 - Server got itself in trouble.</p>
    </body>
</html>
: {'command': 'GET_ACTION', 'observation': (0, [1.0], [0.05], [0.3], [0.02], [0.0], 0, 0, 0, [0, 0], 0, 2, 0, [0.9476190476190476], [8, 19, 0, 1, 1, 16, 0, 1, 7, 9, 0, 1, 17, 26, 0, 1, 17, 26, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0], 2, [1.0], [0.0], [0.1], [1, 1, 1, 0, 0, 0, 0, 0, 0, 0], [20, 26, 0, 9, 26, 0, 7, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [4, 0, 3, 0, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), 'episode_id': 'b2122549517c4c6fa7b2b0ff64d7ba6a'}
Traceback (most recent call last):
  File ".\policy_client.py", line 236, in <module>
    action = client.get_action(episode_id=episode_id, observation=gameObservation)
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\env\policy_client.py", line 135, in get_action
    return self._send(
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\ray\rllib\env\policy_client.py", line 244, in _send
    response.raise_for_status()
  File "C:\Users\destr\AppData\Local\Programs\Python\Python38\lib\site-packages\requests\models.py", line 941, in raise_for_status
    raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 500 Server Error: Traceback (most recent call last): for url: http://127.0.0.1:55556/

In the past, we’ve seen this issue all time due to a mismatch between the sent observation and the defined observation space. However we tested this using:

test = (0, [1.0], [0.05], [0.3], [0.02], [0.0], 0, 0, 0, [0, 0], 0, 2, 0, [0.9476190476190476], [8, 19, 0, 1, 1, 16, 0, 1, 7, 9, 0, 1, 17, 26, 0, 1, 17, 26, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0], 2, [1.0], [0.0], [0.1], [1, 1, 1, 0, 0, 0, 0, 0, 0, 0], [20, 26, 0, 9, 26, 0, 7, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [4, 0, 3, 0, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
print(obs_space.contains(test))

Where test is the observation, and obs_space is the define space. This prints true, so it is not a mismatch in that sense.

Could you advise if this is the correct way to condense the observation into a single tensor?

@mannyv @sven1977
Any suggestions?

@Denys_Ashikhin @Ramie_Yahya
Did you ever get your combined LSTM+action mask model working? If so, would you mind posting the code?