Hi I am trying to log distribution of means and std using PPO algorithm during ray.tune.
I was able to save such values in custom training process but since I cannot use eager mode in tune, is there a easy way to log to tensorboard /W&B values of means and stds during training?
Below I am attaching my code in eager mode (without ray.tune):
def get_dists(_trainer):
policy = _trainer.get_policy()
logits, _ = policy.model.from_batch({"obs": np.array([[0]])})
dist = policy.dist_class(logits, policy.model)
return dist.mean.numpy(), dist.std.numpy()
trainer = ppo.PPOTrainer(config=ppo_config, env=FlightEnv)
for n in range(stop_iters):
result = trainer.train()
means, stds = get_dists(trainer)
logs = {"mean_actions": means, "std_actions": stds}
wandb.log(logs)
ray.shutdown()
I am trying to achieve something like this in “tune” mode using custom callbacks,
but it’s obviously not working because it’s not eager mode:
class MyCallbacks(DefaultCallbacks):
def on_train_result(self, trainer, result: dict, **kwargs):
print("trainer.train() result: {} -> {} episodes".format(
trainer, result["episodes_this_iter"]))
# you can mutate the result dict to add new fields to return
result["callback_ok"] = True
result["FCC_net"] = trainer.get_policy().get_weights()
def get_dists(_trainer):
policy = _trainer.get_policy()
logits, _ = policy.model.from_batch({"obs": np.array([[0]])})
dist = policy.dist_class(logits, policy.model)
return dist.mean.numpy(), dist.std.numpy()
means, stds = get_dists(trainer)
result["mean"] = means
result["stds"] = stds