Hi,
I’m having trouble using PPO when a episode is not finished. The following minimal example fails when using PPO but works with DQN.
import ray
from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig, PPO
from ray.rllib.algorithms.dqn import DQNConfig, DQN
from ray.rllib.examples.env.simple_rpg import SimpleRPG
from ray.rllib.examples.models.simple_rpg_model import CustomTorchRPGModel
from ray.rllib.examples.env.simple_corridor import SimpleCorridor
class MySimpleRPG(SimpleRPG):
def step(self, action):
# in SimpleRPG, 'done' is always True v
return self.observation_space.sample(), 1, False, {}
ALGO = "PPO"
if ALGO == "PPO":
config = PPOConfig()
trainer = PPO
elif ALGO == "DQN":
config = DQNConfig()
trainer = DQN
if __name__ == "__main__":
config.framework_str = "torch"
config.num_rollout_workers = 0
# Repeated env
config.env = MySimpleRPG
# Non-repeated
#config.env = SimpleCorridor
ray.init(local_mode=True)
tune.run(trainer, config=config)
The code reaches as far as to the PPO ‘compute_gae_for_sample_batch’ but when the ‘new_obs’ column of the view should be created the following error happens:
input_dict = sample_batch.get_single_step_input_dict(
File "/home/*/lib/python3.8/site-packages/ray/rllib/policy/sample_batch.py", line 1152, in get_single_step_input_dict
self[data_col],
File "/home/*/lib/python3.8/site-packages/ray/rllib/policy/sample_batch.py", line 818, in __getitem__
value = dict.__getitem__(self, key)
The following method is the culprit where index=“last”.
def get_single_step_input_dict(
self,
view_requirements: ViewRequirementsDict,
index: Union[str, int] = "last",
) -> "SampleBatch":
Has there been any changes so that the examples no longer work? I know that RLLib changed a lot a few versions ago (?especially regarding views and how batches are structured?) but did this also break the examples? I also tried with a ‘SimpleRPG’ environment which is closer to what I’m actually working with and where I originally encountered this error. To encounter the error I needed to change the ‘SimpleRPG’ environment to not always finish the episode. However, it fails for much simpler environments such as ‘SimpleCorridor’ also.
The below error trace and call trace is the complete error so to speak.
2023-01-12 10:04:20,110 ERROR trial_runner.py:1088 -- Trial PPO_MySimpleRPG_236b7_00000: Error processing event.
ray.exceptions.RayTaskError(KeyError): ray::PPO.train() (pid=316653, ip=192.36.221.166, repr=PPO)
File "/home/*/lib/python3.8/site-packages/ray/tune/trainable/trainable.py", line 367, in train
raise skipped from exception_cause(skipped)
File "/home/*/lib/python3.8/site-packages/ray/tune/trainable/trainable.py", line 364, in train
result = self.step()
File "/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 749, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 2623, in _run_one_training_iteration
results = self.training_step()
File "/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 318, in training_step
train_batch = synchronous_parallel_sample(
File "/home/*/lib/python3.8/site-packages/ray/rllib/execution/rollout_ops.py", line 82, in synchronous_parallel_sample
sample_batches = [worker_set.local_worker().sample()]
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 900, in sample
batches = [self.input_reader.next()]
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 92, in next
batches = [self.get_data()]
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 285, in get_data
item = next(self._env_runner)
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 671, in _env_runner
active_envs, to_eval, outputs = _process_observations(
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py", line 1155, in _process_observations
sample_collector.try_build_truncated_episode_multi_agent_batch()
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/collectors/simple_list_collector.py", line 644, in try_build_truncated_episode_multi_agent_batch
self.postprocess_episode(episode, is_done=False)
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/collectors/simple_list_collector.py", line 512, in postprocess_episode
post_batches[agent_id] = policy.postprocess_trajectory(
File "/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py", line 218, in postprocess_trajectory
return compute_gae_for_sample_batch(
File "/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/postprocessing.py", line 178, in compute_gae_for_sample_batch
input_dict = sample_batch.get_single_step_input_dict(
File "/home/*/lib/python3.8/site-packages/ray/rllib/policy/sample_batch.py", line 1152, in get_single_step_input_dict
self[data_col],
File "/home/*/lib/python3.8/site-packages/ray/rllib/policy/sample_batch.py", line 818, in __getitem__
value = dict.__getitem__(self, key)
get_single_step_input_dict (/home/*/lib/python3.8/site-packages/ray/rllib/policy/sample_batch.py:1150)
compute_gae_for_sample_batch (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/postprocessing.py:178)
postprocess_trajectory (/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py:218)
postprocess_episode (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/collectors/simple_list_collector.py:512)
try_build_truncated_episode_multi_agent_batch (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/collectors/simple_list_collector.py:644)
_process_observations (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py:1155)
_env_runner (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py:671)
get_data (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py:285)
next (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/sampler.py:92)
sample (/home/*/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py:900)
synchronous_parallel_sample (/home/*/lib/python3.8/site-packages/ray/rllib/execution/rollout_ops.py:82)
training_step (/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo.py:318)
_resume_span (/home/*/lib/python3.8/site-packages/ray/util/tracing/tracing_helper.py:466)
_run_one_training_iteration (/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py:2623)
_resume_span (/home/*/lib/python3.8/site-packages/ray/util/tracing/tracing_helper.py:466)
step (/home/*/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py:749)
_resume_span (/home/*/lib/python3.8/site-packages/ray/util/tracing/tracing_helper.py:466)
train (/home/*/lib/python3.8/site-packages/ray/tune/trainable/trainable.py:364)
_resume_span (/home/*/lib/python3.8/site-packages/ray/util/tracing/tracing_helper.py:466)
actor_method_executor (/home/*/lib/python3.8/site-packages/ray/_private/function_manager.py:674)
Thanks in advance,
Johan