I am trying to plot some of the variables returned by the info dictionary. I implemented the custom callback by subclassing the DefaultCallbacks
class and added some variables to episode.user_data dictionary by appending them as lists. However, none of them show up in Tensorboard
My code snippet looks like
import custom_env as myenv
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.callbacks import DefaultCallbacks
class customCB(DefaultCallbacks):
def on_episode_start(self, *, worker, base_env, policies, episode, env_index, **kwargs):
episode.user_data['action'] = []
episode.user_data['battery SOC'] = []
def on_episode_step(self, *, worker, base_env, policies, episode, env_index, **kwargs):
env1 = base_env.get_sub_environments()[0] # this step works
episode.user_data['action'].append(env1.info['action'])
episode.user_data['battery SOC'].append(env1.info['battery SOC'])
episode.custom_metrics['test_data'] = 10 # does not show up in Tensorboard either!
config = PPOConfig()
config = config.training(gamma=0.9, lr=0.001, kl_coeff=0.3, use_gae=True, clip_param=0.1)
config = config.rollouts(num_rollout_workers=0)
config = config.framework('torch')
config = config.callbacks(customCB)
env_config= {'max_bat_cap' : 50, 'charging_rate' : 0.5}
config = config.environment(env = myenv.env_v3, env_config=env_config)
rllib_algo = config.build()
for i in range(1000):
result = rllib_algo.train()
# print(pretty_print(result))
if i % 5 == 0:
checkpoint_dir = rllib_algo.save()
print(f"Checkpoint saved in directory {checkpoint_dir}")
print("End program")
Tensorboard doesn’t show any of the metrics I added to episode.user_data.
As a starting point, I tried running the rllib/examples/custom_metrics_and_callbacks.py
but this generates a separate error ‘AttributeError: ‘EpisodeV2’ object has no attribute ‘last_observation_for’’