DQNConfig LSTM assert seq_lens is not None error

I have such algorithm config with impala which works just fine;

from ray.rllib.algorithms import ImpalaConfig
        config = (
            ImpalaConfig()
            .environment(env=env_name, disable_env_checking=True)
            .rollouts(
                num_rollout_workers=args.num_rollout_workers,
                num_envs_per_worker=args.num_envs_per_worker,
                rollout_fragment_length=200 * args.num_envs_per_worker,
                batch_mode="truncate_episodes"
            )
            .evaluation(
                evaluation_parallel_to_training=False,
                evaluation_interval=100,
                evaluation_duration=10,
                evaluation_num_workers=0,
                evaluation_sample_timeout_s=60
            )
            .callbacks(partial(callback, path=output_folder_path))
            .training(
                gamma=0.99,  # Discount factor
                lr=1e-5,  # Learning rate
                train_batch_size=200 * args.num_envs_per_worker * args.num_rollout_workers,  # Batch size
                entropy_coeff=0.001,  # Entropy cost
                vf_loss_coeff=40,  # Baseline cost
                grad_clip=42,  # Max norm gradient
                optimizer={"type": "RMSProp"},
                model={
                    "dim": 88,
                    "conv_filters": [
                        [32, [3, 3], 5],  # Layer 1
                        [64, [3, 3], 5],  # Layer 2
                        [128, [3, 3], 2],  # Layer 3
                    ],
                    "conv_activation": "relu",
                    "fcnet_hiddens": [1024, 1024],
                    "post_fcnet_activation": "tanh",
                    "use_lstm": True,
                    "lstm_cell_size": 1024,
                    "max_seq_len": 16,  # LSTM unroll length
                    "vf_share_layers": False,
                    "lstm_use_prev_action": True,
                    "lstm_use_prev_reward": True,
                    "post_fcnet_hiddens": [1024],
                }
            )
            .resources(
                num_gpus=args.num_gpus,
                num_cpus_per_worker=1,
                num_gpus_per_worker=(args.num_gpus / args.num_rollout_workers if args.num_rollout_workers > 0 else 0),
            )
            .framework("torch")
            .fault_tolerance(
                recreate_failed_workers=True,
                restart_failed_sub_environments=True
            )
        )

Then using similar configuration, i would like to implement DQNConfig;

from ray.rllib.algorithms.dqn import DQNConfig
        config = (
            DQNConfig()
            .environment(env=env_name, disable_env_checking=True)
            .rollouts(
                num_rollout_workers=args.num_rollout_workers,
                num_envs_per_worker=args.num_envs_per_worker,
                rollout_fragment_length=200 * args.num_envs_per_worker,
                batch_mode="truncate_episodes",  # Necessary for RNNs
            )
            .exploration(
                explore=True,
                exploration_config={
                    "type": "EpsilonGreedy",
                    "initial_epsilon": 1.0,
                    "final_epsilon": 0.1,
                    "epsilon_timesteps": 10000,
                }
            ).evaluation(
                evaluation_parallel_to_training=False,
                evaluation_interval=100,
                evaluation_duration=10,
                evaluation_num_workers=0,
                evaluation_sample_timeout_s=60
            )
            .callbacks(partial(callback, path=output_folder_path))
            .training(
                gamma=0.99,  # Discount factor
                lr=1e-5,  # Learning rate
                train_batch_size=200 * args.num_envs_per_worker * args.num_rollout_workers,  # Batch size
                grad_clip=42,  # Max norm gradient
                optimizer={"type": "RMSProp"},
                model={
                    "dim": 88,
                    "conv_filters": [
                        [32, [3, 3], 5],  # Layer 1
                        [64, [3, 3], 5],  # Layer 2
                        [128, [3, 3], 2],  # Layer 3
                    ],
                    "conv_activation": "relu",
                    "fcnet_hiddens": [1024, 1024],
                    "post_fcnet_activation": "tanh",
                    "use_lstm": True,
                    "lstm_cell_size": 1024,
                    "max_seq_len": 16,  # LSTM unroll length
                    "vf_share_layers": False,
                    "lstm_use_prev_action": True,
                    "lstm_use_prev_reward": True,
                    "post_fcnet_hiddens": [1024],
                },
                dueling=True,
                double_q=True,
                n_step=3,
                target_network_update_freq=500,
                replay_buffer_config={
                    "type": "ReplayBuffer",
                    "capacity": 50000,  # Replay buffer capacity
                    "replay_sequence_length": 16,
                    "seq_lens": 16,
                }
            )
            .resources(
                num_gpus=args.num_gpus,
                num_cpus_per_worker=1,
                num_gpus_per_worker=args.num_gpus / args.num_rollout_workers,
            )
            .framework("torch")
            .fault_tolerance(
                recreate_failed_workers=True,
                restart_failed_sub_environments=True
            )
        )

But then i gives me error of ;

File "/Users/berkayeren/PycharmProjects/rl-learning/.venv/lib/python3.9/site-packages/ray/rllib/models/torch/recurrent_net.py", line 217, in forward [repeated 7x across cluster]
(RolloutWorker pid=8413)     assert seq_lens is not None [repeated 7x across cluster]

It seems like in ModelV2 line 237;

237        if seq_lens is None:
238            seq_lens = input_dict.get(SampleBatch.SEQ_LENS)

seq_lens is none and it also sets to again none at line 238 which causes to assertion in next lines.

I cannot solve the issue somehow.

I’m using;

python 3.9
ray 2.9.3
torch 2.5.1
minigrid 3.0.0
gymnasium 1.0.0

Hi @Berkay_Eren,

Welcome to the forum.

Recurrent layers are only supported for policy gradient algorithms. It does not work with the off-policy algorithms like DQN.

https://docs.ray.io/en/latest/rllib/rllib-models.html#built-in-auto-lstm-and-auto-attention-wrappers