[RLlib] Writing to tensorboard during custom evaluation

I have a self-made Gym environment that generates image data during evaluation, e.g. an image at end of an episode. How can I save this image into the tensorboard, e.g. the images tab in tensorboard?

Using

I created the following concept program and use a custom evaluation functions that returns a scalar metric, counter. This metric does show up in tensorboard. How do I handle image data?

import argparse
import gym
from gym.spaces import Discrete, Box
import numpy as np
import os

import ray
from ray import tune
from ray.tune import grid_search
from ray.rllib.models import ModelCatalog
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.test_utils import check_learning_achieved

tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()

parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
parser.add_argument("--torch", action="store_true")
parser.add_argument("--as-test", action="store_true")
parser.add_argument("--stop-iters", type=int, default=50)
parser.add_argument("--stop-timesteps", type=int, default=10000)
parser.add_argument("--stop-reward", type=float, default=0.1)


def eval_fn(trainer, eval_workers):
  if not hasattr(eval_fn, "counter"):
     eval_fn.counter = 0  # it doesn't exist yet, so initialize it
  eval_fn.counter += 1
  print("type(trainer) {}".format(type(trainer)))
  return {"counter" : eval_fn.counter}


class SimpleCorridor(gym.Env):
    """Example of a custom env in which you have to walk down a corridor.

    You can configure the length of the corridor via the env config."""

    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1.0 if done else -0.1, done, {}



if __name__ == "__main__":
    args = parser.parse_args()
    ray.init()

    # Can also register the env creator function explicitly with:
    # register_env("corridor", lambda config: SimpleCorridor(config))

    config = {
        "env": SimpleCorridor,  # or "corridor" if registered above
        "env_config": {
            "corridor_length": 5,
        },
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        #"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "lr": 1e-4,  # try different lrs
        "num_workers": 1,  # parallelism
        "framework": "torch" if args.torch else "tf",

        "custom_eval_function": eval_fn,

        # Enable evaluation, once per training iteration.
        "evaluation_interval": 1,
        "evaluation_num_episodes": 2,
    }

    stop = {
        "training_iteration": args.stop_iters,
        "timesteps_total": args.stop_timesteps,
        "episode_reward_mean": args.stop_reward,
    }

    results = tune.run(args.run, name="corridor", config=config, stop=stop)

    if args.as_test:
        check_learning_achieved(results, args.stop_reward)
    ray.shutdown()

It seems that TFRunBuilder is used to log to tensorboard, e.g.

How can I use this in the custome eval function?

If it is not possible to save the image data inside tensorboard, how can I save it as a file in the same folder as events.out.tfevents?

RLlib 1.1.0

Great question @RickLan . Actually, there is no image support currently in our TBXLogger :confused:
, only video and single scalar support.
You could simply add this by creating a PR changing this block of code here inside ray/python/ray/tune/logger.py:

class TBXLogger(Logger):
    """TensorBoardX Logger.

    Note that hparams will be written only after a trial has terminated.
    This logger automatically flattens nested dicts to show on TensorBoard:

        {"a": {"b": 1, "c": 2}} -> {"a/b": 1, "a/c": 2}
    """

    ...
    
    def on_result(self, result: Dict):
        ...
        for attr, value in flat_result.items():
            full_attr = "/".join(path + [attr])
            if (isinstance(value, tuple(VALID_SUMMARY_TYPES))
                    and not np.isnan(value)):
                valid_result[full_attr] = value
                self._file_writer.add_scalar(
                    full_attr, value, global_step=step)
            elif ((isinstance(value, list) and len(value) > 0)
                  or (isinstance(value, np.ndarray) and value.size > 0)):
                valid_result[full_attr] = value


                # Must be video
                if isinstance(value, np.ndarray) and value.ndim == 5:
                    self._file_writer.add_video(
                        full_attr, value, global_step=step, fps=20)
                    continue

                # -------------------------------------------------------
                # ---->>> Add image support HERE <<<----
                # -------------------------------------------------------

                try:
                    self._file_writer.add_histogram(
                        full_attr, value, global_step=step)
                # In case TensorboardX still doesn't think it's a valid value
                # (e.g. `[[]]`), warn and move on.
                except (ValueError, TypeError):
                ...

Thank you @sven1977 for pointing out where to make the mod. I added the following code and have a small test. I read the contribution guidelines and it will take some time for me to get there, so I am posting my code here for those who are interested.

Add following to TBXLogger class (for location see @sven1977 message above):

                # Image Support (3, H, W)
                if isinstance(value, np.ndarray) \
                        and value.ndim == 3 \
                        and value.shape[0] == 3:
                    self._file_writer.add_image(
                        full_attr, value, global_step=step)
                    continue

Add following to TBXLoggerCallback class (for location see @sven1977 message above):

                # Image (3, H, W)
                if isinstance(value, np.ndarray) \
                        and value.ndim == 3 \
                        and value.shape[0] == 3:
                    self._trial_writer[trial].add_image(
                        full_attr, value, global_step=step)
                    continue

Test code.

import argparse
import gym
from gym.spaces import Discrete, Box
import numpy as np
import os

import ray
from ray import tune

parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
parser.add_argument("--torch", action="store_true")
#parser.add_argument("--as-test", action="store_true")
parser.add_argument("--stop-iters", type=int, default=4)
#parser.add_argument("--stop-timesteps", type=int, default=10000)
#parser.add_argument("--stop-reward", type=float, default=0.1)

image_size = 16

def eval_fn(trainer, eval_workers):
    if not hasattr(eval_fn, "counter"):
        eval_fn.counter = 0  # it doesn't exist yet, so initialize it
        eval_fn.pic = np.zeros((3, image_size, image_size), dtype=np.uint8)  # it doesn't exist yet, so initialize it
        eval_fn.color = np.array([255, 0, 0])  # it doesn't exist yet, so initialize it

    eval_fn.pic[:, eval_fn.counter, eval_fn.counter] = eval_fn.color
    eval_fn.color = np.roll(eval_fn.color, 1)
    eval_fn.counter = (eval_fn.counter + 1) % image_size
    return {"counter" : eval_fn.counter, "pic" : eval_fn.pic}


class SimpleCorridor(gym.Env):
    """Example of a custom env in which you have to walk down a corridor.

    You can configure the length of the corridor via the env config."""

    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1.0 if done else -0.1, done, {}



if __name__ == "__main__":
    args = parser.parse_args()
    ray.init()

    # Can also register the env creator function explicitly with:
    # register_env("corridor", lambda config: SimpleCorridor(config))

    config = {
        "env": SimpleCorridor,  # or "corridor" if registered above
        "env_config": {
            "corridor_length": 5,
        },
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        #"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "lr": 1e-4,  # try different lrs
        "num_workers": 1,  # parallelism
        "framework": "torch" if args.torch else "tf",

        "custom_eval_function": eval_fn,

        # Enable evaluation, once per training iteration.
        "evaluation_interval": 1,
        "evaluation_num_episodes": 1,
    }

    stop = {
        "training_iteration": args.stop_iters,
        #"timesteps_total": args.stop_timesteps,
        #"episode_reward_mean": args.stop_reward,
    }

    results = tune.run(args.run, name="test_image_logger", config=config, stop=stop)

    ray.shutdown()

2 Likes

Prior code was tested in v1.1. Still works in v1.2.0.

1 Like