Error: IndexError: list index out of range

I am trying to train an agent for a custom env and model.
However, I have encountered with the following error. I have searched the forum but I could not fix.

Trial Progress
Trial name
PPO_jrs_tsn_env_29bf9_00000
(PPO pid=4760) 2024-03-21 12:00:36,300	ERROR actor_manager.py:506 -- Ray error, taking actor 1 out of service. ray::RolloutWorker.apply() (pid=6236, ip=127.0.0.1, actor_id=4a3bb5373139f5bd3d98b84401000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001EDFE5DA0D0>)
(PPO pid=4760)   File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
(PPO pid=4760)   File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
(PPO pid=4760)     return method(__ray_actor, *args, **kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760)     return method(self, *_args, **_kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
(PPO pid=4760)     raise e
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
(PPO pid=4760)     return func(self, *args, **kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 84, in <lambda>
(PPO pid=4760)     lambda w: w.sample(), local_worker=False, healthy_only=True
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760)     return method(self, *_args, **_kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
(PPO pid=4760)     batches = [self.input_reader.next()]
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
(PPO pid=4760)     batches = [self.get_data()]
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
(PPO pid=4760)     item = next(self._env_runner)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
(PPO pid=4760)     outputs = self.step()
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 389, in step
(PPO pid=4760)     ] = self._process_policy_eval_results(
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1141, in _process_policy_eval_results
(PPO pid=4760)     env_id: int = eval_data[i].env_id
(PPO pid=4760) IndexError: list index out of range
(PPO pid=4760) 2024-03-21 12:00:36,300	ERROR actor_manager.py:506 -- Ray error, taking actor 2 out of service. ray::RolloutWorker.apply() (pid=12536, ip=127.0.0.1, actor_id=563016c557ae0bb384d30c2801000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000020D7589A100>)
(PPO pid=4760)   File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
(PPO pid=4760)   File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
(PPO pid=4760)     return method(__ray_actor, *args, **kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760)     return method(self, *_args, **_kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
(PPO pid=4760)     raise e
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
(PPO pid=4760)     return func(self, *args, **kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 84, in <lambda>
(PPO pid=4760)     lambda w: w.sample(), local_worker=False, healthy_only=True
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760)     return method(self, *_args, **_kwargs)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
(PPO pid=4760)     batches = [self.input_reader.next()]
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
(PPO pid=4760)     batches = [self.get_data()]
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
(PPO pid=4760)     item = next(self._env_runner)
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
(PPO pid=4760)     outputs = self.step()
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 389, in step
(PPO pid=4760)     ] = self._process_policy_eval_results(
(PPO pid=4760)   File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1141, in _process_policy_eval_results
(PPO pid=4760)     env_id: int = eval_data[i].env_id
(PPO pid=4760) IndexError: list index out of range

I would be grateful if anyone can help me with that.
Here is the config:

env_config = {
    'network_topology': np.array([
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [1, 1, 1, 0],
    ]),
    'node_tags': {0: 'EndSystem', 1: 'EndSystem', 2: 'EndSystem', 3: 'Switch'},
    'initial_flows': [
        {'size': 30, 'deadline': 1500, 'type': 'TT', 'start': 0, 'end': 2, 'period': 150},
        {'size': 10, 'deadline': 4500, 'type': 'TT', 'start': 1, 'end': 2, 'period': 100},
    ],
    'link_capacity': 1000,
    'forwarding_delay': 1,
    'max_flow_size': 1500,
    'min_transmission_time': 1,
    'extra_queue_penalty': 10,
    'improper_queue_penalty': 50,
    'hyperperiod': 300
}

# PPO Trainer
config = {
    "env": "jrs_tsn_env",
    "env_config": env_config,
    "model": {
        "custom_model": "custom_gps_model",
        "custom_model_config": {
            "network_topology": env_config["network_topology"],
            "num_node_features": 17,
            "num_edge_features": 2,
        },
    },
    "framework": "torch",

    "log_level": "DEBUG",  
    "num_gpus": 0, 
}
result = tune.run(
    "PPO",
    config=config,
    stop={"training_iteration": 1},  
    checkpoint_at_end=True,
)

It seems that there was a similar bug in sampler.py that has been fixed but it still exists in
env_runner_v2.py.
Error when setting done=true: eval_data[i].env_id yields IndexError: list index out of range