I am trying to train an agent for a custom env and model.
However, I have encountered with the following error. I have searched the forum but I could not fix.
Trial Progress
Trial name
PPO_jrs_tsn_env_29bf9_00000
(PPO pid=4760) 2024-03-21 12:00:36,300 ERROR actor_manager.py:506 -- Ray error, taking actor 1 out of service. ray::RolloutWorker.apply() (pid=6236, ip=127.0.0.1, actor_id=4a3bb5373139f5bd3d98b84401000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x000001EDFE5DA0D0>)
(PPO pid=4760) File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
(PPO pid=4760) File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
(PPO pid=4760) return method(__ray_actor, *args, **kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760) return method(self, *_args, **_kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
(PPO pid=4760) raise e
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
(PPO pid=4760) return func(self, *args, **kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 84, in <lambda>
(PPO pid=4760) lambda w: w.sample(), local_worker=False, healthy_only=True
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760) return method(self, *_args, **_kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
(PPO pid=4760) batches = [self.input_reader.next()]
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
(PPO pid=4760) batches = [self.get_data()]
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
(PPO pid=4760) item = next(self._env_runner)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
(PPO pid=4760) outputs = self.step()
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 389, in step
(PPO pid=4760) ] = self._process_policy_eval_results(
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1141, in _process_policy_eval_results
(PPO pid=4760) env_id: int = eval_data[i].env_id
(PPO pid=4760) IndexError: list index out of range
(PPO pid=4760) 2024-03-21 12:00:36,300 ERROR actor_manager.py:506 -- Ray error, taking actor 2 out of service. ray::RolloutWorker.apply() (pid=12536, ip=127.0.0.1, actor_id=563016c557ae0bb384d30c2801000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x0000020D7589A100>)
(PPO pid=4760) File "python\ray\_raylet.pyx", line 1813, in ray._raylet.execute_task
(PPO pid=4760) File "python\ray\_raylet.pyx", line 1754, in ray._raylet.execute_task.function_executor
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\_private\function_manager.py", line 726, in actor_method_executor
(PPO pid=4760) return method(__ray_actor, *args, **kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760) return method(self, *_args, **_kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 189, in apply
(PPO pid=4760) raise e
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\utils\actor_manager.py", line 178, in apply
(PPO pid=4760) return func(self, *args, **kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 84, in <lambda>
(PPO pid=4760) lambda w: w.sample(), local_worker=False, healthy_only=True
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\util\tracing\tracing_helper.py", line 467, in _resume_span
(PPO pid=4760) return method(self, *_args, **_kwargs)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 694, in sample
(PPO pid=4760) batches = [self.input_reader.next()]
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 91, in next
(PPO pid=4760) batches = [self.get_data()]
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\sampler.py", line 276, in get_data
(PPO pid=4760) item = next(self._env_runner)
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 344, in run
(PPO pid=4760) outputs = self.step()
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 389, in step
(PPO pid=4760) ] = self._process_policy_eval_results(
(PPO pid=4760) File "E:\Users\Arash\anaconda3\envs\newenv\lib\site-packages\ray\rllib\evaluation\env_runner_v2.py", line 1141, in _process_policy_eval_results
(PPO pid=4760) env_id: int = eval_data[i].env_id
(PPO pid=4760) IndexError: list index out of range
I would be grateful if anyone can help me with that.
Here is the config:
env_config = {
'network_topology': np.array([
[0, 0, 0, 1],
[0, 0, 0, 1],
[0, 0, 0, 1],
[1, 1, 1, 0],
]),
'node_tags': {0: 'EndSystem', 1: 'EndSystem', 2: 'EndSystem', 3: 'Switch'},
'initial_flows': [
{'size': 30, 'deadline': 1500, 'type': 'TT', 'start': 0, 'end': 2, 'period': 150},
{'size': 10, 'deadline': 4500, 'type': 'TT', 'start': 1, 'end': 2, 'period': 100},
],
'link_capacity': 1000,
'forwarding_delay': 1,
'max_flow_size': 1500,
'min_transmission_time': 1,
'extra_queue_penalty': 10,
'improper_queue_penalty': 50,
'hyperperiod': 300
}
# PPO Trainer
config = {
"env": "jrs_tsn_env",
"env_config": env_config,
"model": {
"custom_model": "custom_gps_model",
"custom_model_config": {
"network_topology": env_config["network_topology"],
"num_node_features": 17,
"num_edge_features": 2,
},
},
"framework": "torch",
"log_level": "DEBUG",
"num_gpus": 0,
}
result = tune.run(
"PPO",
config=config,
stop={"training_iteration": 1},
checkpoint_at_end=True,
)