Hi folks, I’m trying to render my RL environment on every evaluation. I have a somewhat working solution however, when I actualy run my code, I end up with two videos instead of one. Any insight would be much appreciated.
1. Severity of the issue: (select one)
Low: Annoying but doesn’t hinder my work.
Medium: Significantly affects my productivity but can find a workaround.
2. Environment:
- Ray version: 2.50.1
- Python version: 3.11
- OS: MacOS
- Cloud/Infrastructure: local
- Other libs/tools (if relevant): gymnasium, MetaDrive
3. What happened vs. what you expected:
- Expected: 1 gif video uploaded to wandb
- Actual: 2 gif videos. Sometimes, just a still frame.
This is my custom RLLibCallback. This should look similar to this example.
class EpisodeReturn(RLlibCallback):
"""A custom callback that records episode videos and logs evalulation episodes."""
def __init__(self, env_runner_indices: Sequence[int] | None = None):
super().__init__()
# Keep some global state in between individual callback events.
self.overall_sum_of_rewards = 0.0
self._env_runner_indices = env_runner_indices
self.episode_and_return = (None, float("-inf"))
def on_episode_step(
self,
*,
episode, # noqa: ANN001
env_runner, # noqa: ANN001
metrics_logger, # noqa: ANN001, ARG002
env, # noqa: ANN001
env_index, # noqa: ARG002, ANN001
rl_module, # noqa: ARG002, ANN001
**kwargs, # noqa: ANN003, ARG002
) -> None:
"""Capture render images at each step of the episode."""
if not env_runner.config.in_evaluation:
return
if self._env_runner_indices is not None and env_runner.worker_index not in self._env_runner_indices:
return
if isinstance(env.unwrapped, gym.vector.VectorEnv | VectorMultiAgentEnv):
image = env.unwrapped.envs[0].render()
else:
image = env.unwrapped.render()
image = resize(image, 64, 96)
image = np.transpose(image, axes=[2, 0, 1])
if "render_images" not in episode.custom_data:
episode.custom_data["render_images"] = []
episode.custom_data["render_images"].append(image)
def on_episode_end(
self,
*,
episode, # noqa: ANN001
env_runner, # noqa: ANN001
env, # noqa: ARG002, ANN001
env_index, # noqa: ARG002, ANN001
rl_module, # noqa: ANN001, ARG002
**kwargs, # noqa: ANN003, ARG002
) -> None:
"""Log the episode return and save episode videos during evaluation."""
episode_return = episode.get_return()
# Save episode videos during evaluation.
if not env_runner.config.in_evaluation:
return
images = episode.custom_data["render_images"]
video = np.expand_dims(np.stack(images, axis=0), axis=0)
self.episode_and_return = (video, episode_return)
def on_sample_end(
self,
*,
env_runner, # noqa: ANN001
metrics_logger, # noqa: ANN001
**kwargs, # noqa: ANN003, ARG002
) -> None:
"""Log the best and worst video to this EnvRunner's MetricsLogger."""
if not env_runner.config.in_evaluation:
return
if self.episode_and_return[0] is not None:
metrics_logger.log_value(
"episode_videos_best",
self.episode_and_return[0],
reduce=None,
clear_on_reduce=True,
)
self.episode_and_return = (None, float("-inf"))
My PPO configs are as follows:
config = (
PPOConfig()
.environment(
MyScenarioEnv,
env_config=env_config,
)
.env_runners(
num_env_runners=8,
num_envs_per_env_runner=1,
)
.learners(
num_learners=1,
)
.training(
lr=0.0001,
)
.framework(
"torch",
torch_compile_learner=True,
torch_compile_learner_dynamo_backend="inductor",
torch_compile_learner_dynamo_mode="reduce-overhead",
)
.callbacks(EpisodeReturn)
.debugging(
logger_config={
"type": MyPrintLogger,
"prefix": "LOG:",
},
)
.evaluation(
evaluation_interval=args.eval_interval,
evaluation_num_env_runners=1,
evaluation_duration_unit="episodes",
evaluation_duration=1,
)
)
ppo = config.build_algo()