I have a pretty minimalistic setup of training PPO+PBT.
from gym import Env
from gym.spaces import Discrete
import ray
from ray.tune.schedulers import PopulationBasedTraining
from ray.tune import run
from ray.tune.registry import register_env
class CrashingEnv(Env):
action_space = Discrete(2)
observation_space = Discrete(3)
def reset(self):
return self.observation_space.sample()
def step(self, action):
raise RuntimeError()
def make_crashing_env(env_config) -> CrashingEnv:
del env_config
return CrashingEnv()
def test_train_crashing_env_ray_pbt_ppo():
ray.init(local_mode=True)
pbt = PopulationBasedTraining(metric="episode_reward_mean", mode="max")
register_env("crashing_env", make_crashing_env)
config = {
"env": "crashing_env",
"num_workers": 0,
}
run(
run_or_experiment="PPO",
name="crashing_env_ray_pbt_ppo",
scheduler=pbt,
config=config,
)
if __name__ == "__main__":
test_train_crashing_env_ray_pbt_ppo()
The environment deliberately raises an error on step(...)
. This would cause the rollout collection to fail and then the trial to fail.
How can I configure the training to drop/ignore the faulty rollout, remake the crashed environment in the rollout worker and continue training?