Tf modelv3 cartpole example and use of previous actions

I have just prepared a cartpole test for tf modelv3.py :

I would like to ask you if it is possible to use previous actions in the call method and how:

def call(self, sample_batch):
    #import pdb; pdb.set_trace()
    inputs = sample_batch["obs"]
    dense_out = self.dense(inputs)
    B = tf.shape(sample_batch[SampleBatch.SEQ_LENS])[0]
    lstm_in = tf.reshape(dense_out, [B, -1, dense_out.shape.as_list()[1]])
    lstm_out, h, c = self.lstm(
        inputs=lstm_in,
        mask=tf.sequence_mask(sample_batch[SampleBatch.SEQ_LENS]),
        initial_state=[
            sample_batch["state_in_0"], sample_batch["state_in_1"]
        ],
    )
    lstm_out = tf.reshape(lstm_out, [-1, lstm_out.shape.as_list()[2]])
    logits = self.logits(lstm_out)
    values = tf.reshape(self.values(lstm_out), [-1])
    return logits, [h, c], {SampleBatch.VF_PREDS: values}

The second question is what is the meaning of:

{SampleBatch.VF_PREDS: values}

and why it is returned by call method?

I have tried to change the init to:

class RNNModel(tf.keras.models.Model if tf else object):
“”“Example of using the Keras functional API to define an RNN model.”""

def __init__(self,
             input_space,
             action_space,
             num_outputs,
             *,
             name="",
             num_frames=20,
             hiddens_size=256,
             cell_size=64):
    super().__init__(name=name)

    self.cell_size = cell_size
    self.num_frames = num_frames

    # Preprocess observation with a hidden layer and send to LSTM cell
    self.dense = tf.keras.layers.Dense(
        hiddens_size, activation=tf.nn.relu, name="dense1")
    self.lstm = tf.keras.layers.LSTM(
        cell_size, return_sequences=True, return_state=True, name="lstm")

    # Postprocess LSTM output with another hidden layer and compute
    # values.
    self.logits = tf.keras.layers.Dense(
        num_outputs, activation=tf.keras.activations.linear, name="logits")
    self.values = tf.keras.layers.Dense(1, activation=None, name="values")

    self.view_requirements = {
        "prev_n_actions": ViewRequirement(
        data_col="actions",
        shift="-{}:-1".format(self.num_frames - 1),
        space=input_space)
    }

but if fails with errors:

[2021-12-28 10:19:15,831 E 22717 22717] core_worker.cc:1286: Pushed Error with JobID: 01000000 of type: task with message: ray::RolloutWorker.__init__() (pid=22717, ip=192.168.1.38, repr=<ray.rllib.evaluation.rollout_worker.modify_class.<locals>.Class object at 0x7fa8d8344850>)
  File "/home/marco/repos/RL/ray/python/ray/rllib/agents/trainer.py", line 878, in _init
    raise NotImplementedError
NotImplementedError

During handling of the above exception, another exception occurred:

ray::RolloutWorker.__init__() (pid=22717, ip=192.168.1.38, repr=<ray.rllib.evaluation.rollout_worker.modify_class.<locals>.Class object at 0x7fa8d8344850>)
  File "/home/marco/repos/RL/ray/python/ray/rllib/evaluation/rollout_worker.py", line 588, in __init__
    self._build_policy_map(
  File "/home/marco/repos/RL/ray/python/ray/rllib/evaluation/rollout_worker.py", line 1555, in _build_policy_map
    self.policy_map.create_policy(name, orig_cls, obs_space, act_space,
  File "/home/marco/repos/RL/ray/python/ray/rllib/policy/policy_map.py", line 133, in create_policy
    self[policy_id] = class_(
  File "/home/marco/repos/RL/ray/python/ray/rllib/policy/tf_policy_template.py", line 238, in __init__
    DynamicTFPolicy.__init__(
  File "/home/marco/repos/RL/ray/python/ray/rllib/policy/dynamic_tf_policy.py", line 402, in __init__
    self._initialize_loss_from_dummy_batch(
  File "/home/marco/repos/RL/ray/python/ray/rllib/policy/dynamic_tf_policy.py", line 662, in _initialize_loss_from_dummy_batch
    _ = self.postprocess_trajectory(dummy_batch)
  File "/home/marco/repos/RL/ray/python/ray/rllib/policy/tf_policy_template.py", line 269, in postprocess_trajectory
    return postprocess_fn(self, sample_batch, other_agent_batches,
  File "/home/marco/repos/RL/ray/python/ray/rllib/evaluation/postprocessing.py", line 169, in compute_gae_for_sample_batch
    input_dict = sample_batch.get_single_step_input_dict(
  File "/home/marco/repos/RL/ray/python/ray/rllib/policy/sample_batch.py", line 1022, in get_single_step_input_dict
    np.concatenate(
  File "<__array_function__ internals>", line 5, in concatenate
ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s) at time: 1.64068e+09
2021-12-28 10:19:15,832	ERROR actor.py:852 -- Exception raised in creation task: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=22717, ip=192.168.1.38)
  File "/home/marco/repos/RL/ray/python/ray/rllib/agents/trainer.py", line 878, in _init
    raise NotImplementedError
NotImplementedError