Render_env after each iteration

Hello,

I am using render_env and it works but the rendering is for each batch.
Consider the following code:

N_ITER = 100
s = "{:3d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:6.2f} saved {}"

for n in range(N_ITER):
  result = agent.train()
  file_name = agent.save(CHECKPOINT_ROOT)

  print(s.format(
    n + 1,
    result["episode_reward_min"],
    result["episode_reward_mean"],
    result["episode_reward_max"],
    result["episode_len_mean"],
    file_name
   ))

Is it possible to call env.render() after each iteration?

cc @sven1977 can you help with this?

Hey @mg64ve , I think you can do after the .train() calls (example is for only rendering the 1st remote worker’s env):

agent.workers.foreach_env_with_context(
    lambda base_env, ctx: base_env.try_render() if ctx.worker_index == 1 else None
)

Thank you @sven1977 for your hint!
I have just used in the following code:

import argparse
import os
import shutil
import gym

from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
from ray.rllib.utils.test_utils import check_learning_achieved
import ray.rllib.agents.ppo as ppo

parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
parser.add_argument("--num-cpus", type=int, default=0)
parser.add_argument("--framework", choices=["tf2", "tf", "tfe", "torch"], default="tf")
parser.add_argument("--as-test", action="store_true")
parser.add_argument("--use-prev-action", action="store_true")
parser.add_argument("--use-prev-reward", action="store_true")
parser.add_argument("--stop-iters", type=int, default=200)
parser.add_argument("--stop-timesteps", type=int, default=100000)
parser.add_argument("--stop-reward", type=float, default=150.0)

if __name__ == "__main__":
import ray
from ray import tune

args = parser.parse_args()

ray.init(num_cpus=args.num_cpus or None)

configs = {
    "PPO": {
        "num_sgd_iter": 5,
        "model": {
            "vf_share_layers": True,
        },
        "vf_loss_coeff": 0.0001,
    },
    "IMPALA": {
        "num_workers": 2,
        "num_gpus": 0,
        "vf_loss_coeff": 0.01,
    },
}

config = dict(
    configs[args.run],
    **{
        "env": StatelessCartPole,
        # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
        "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
        "model": {
            "use_lstm": True,
            "lstm_cell_size": 256,
            "lstm_use_prev_action": args.use_prev_action,
            "lstm_use_prev_reward": args.use_prev_reward,
        },
        "framework": args.framework,
        # Run with tracing enabled for tfe/tf2.
        "eager_tracing": args.framework in ["tfe", "tf2"],
    })


CHECKPOINT_ROOT = "tmp/ppo/cart"
shutil.rmtree(CHECKPOINT_ROOT, ignore_errors=True, onerror=None)

ray_results = os.getenv("HOME") + "/ray_results/"
shutil.rmtree(ray_results, ignore_errors=True, onerror=None)

SELECT_ENV = "CartPole-v1"

config = ppo.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"

agent = ppo.PPOTrainer(config, env=SELECT_ENV)

N_ITER = 4
s = "{:3d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:6.2f} saved {}"

for n in range(N_ITER):
    result = agent.train()
    file_name = agent.save(CHECKPOINT_ROOT)

    print(s.format(
    n + 1,
    result["episode_reward_min"],
    result["episode_reward_mean"],
    result["episode_reward_max"],
    result["episode_len_mean"],
    file_name
    ))
    agent.workers.foreach_env_with_context(
        lambda base_env, ctx: base_env.render() if ctx.worker_index == 1 else None
    )


# instantiate env class
env = gym.make(SELECT_ENV)

# run until episode ends
episode_reward = 0
done = False
obs = env.reset()
while not done:
    action = agent.compute_action(obs)
    obs, reward, done, info = env.step(action)
    episode_reward += reward

print("reward: {}".format(episode_reward))

ray.shutdown()

but I am getting the following error:

WARNING:tensorflow:From /home/condauser/.local/lib/python3.8/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
2021-05-24 13:52:08,391	INFO services.py:1267 -- View the Ray dashboard at http://127.0.0.1:8265
2021-05-24 13:52:08,395	WARNING services.py:1716 -- WARNING: The object store is using /tmp instead of /dev/shm because /dev/shm has only 67108864 bytes available. This will harm performance! You may be able to free up space by deleting files in /dev/shm. If you are inside a Docker container, you can increase /dev/shm size by passing '--shm-size=10.24gb' to 'docker run' (or add it to the run_options list in a Ray cluster config). Make sure to set this to more than 30% of available RAM.
2021-05-24 13:52:10,194	INFO trainer.py:669 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2021-05-24 13:52:10,194	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
(pid=27899) WARNING:tensorflow:From /home/condauser/.local/lib/python3.8/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
(pid=27899) Instructions for updating:
(pid=27899) non-resource variables are not supported in the long term
(pid=27903) WARNING:tensorflow:From /home/condauser/.local/lib/python3.8/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
(pid=27903) Instructions for updating:
(pid=27903) non-resource variables are not supported in the long term
2021-05-24 13:52:15,630	WARNING util.py:53 -- Install gputil for GPU system monitoring.
2021-05-24 13:52:19,939	WARNING deprecation.py:33 -- DeprecationWarning: `SampleBatch.data[..]` has been deprecated. Use `SampleBatch[..]` instead. This will raise an error in the future!
  1 reward   9.00/ 23.43/ 70.00 len  23.43 saved tmp/ppo/cart/checkpoint_000001/checkpoint-1
Traceback (most recent call last):
  File "cartpole_lstm2.py", line 90, in <module>
agent.workers.foreach_env_with_context(
AttributeError: 'WorkerSet' object has no attribute 'foreach_env_with_context'