Thank you @sven1977 for your hint!
I have just used in the following code:
import argparse
import os
import shutil
import gym
from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
from ray.rllib.utils.test_utils import check_learning_achieved
import ray.rllib.agents.ppo as ppo
parser = argparse.ArgumentParser()
parser.add_argument("--run", type=str, default="PPO")
parser.add_argument("--num-cpus", type=int, default=0)
parser.add_argument("--framework", choices=["tf2", "tf", "tfe", "torch"], default="tf")
parser.add_argument("--as-test", action="store_true")
parser.add_argument("--use-prev-action", action="store_true")
parser.add_argument("--use-prev-reward", action="store_true")
parser.add_argument("--stop-iters", type=int, default=200)
parser.add_argument("--stop-timesteps", type=int, default=100000)
parser.add_argument("--stop-reward", type=float, default=150.0)
if __name__ == "__main__":
import ray
from ray import tune
args = parser.parse_args()
ray.init(num_cpus=args.num_cpus or None)
configs = {
"PPO": {
"num_sgd_iter": 5,
"model": {
"vf_share_layers": True,
},
"vf_loss_coeff": 0.0001,
},
"IMPALA": {
"num_workers": 2,
"num_gpus": 0,
"vf_loss_coeff": 0.01,
},
}
config = dict(
configs[args.run],
**{
"env": StatelessCartPole,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"model": {
"use_lstm": True,
"lstm_cell_size": 256,
"lstm_use_prev_action": args.use_prev_action,
"lstm_use_prev_reward": args.use_prev_reward,
},
"framework": args.framework,
# Run with tracing enabled for tfe/tf2.
"eager_tracing": args.framework in ["tfe", "tf2"],
})
CHECKPOINT_ROOT = "tmp/ppo/cart"
shutil.rmtree(CHECKPOINT_ROOT, ignore_errors=True, onerror=None)
ray_results = os.getenv("HOME") + "/ray_results/"
shutil.rmtree(ray_results, ignore_errors=True, onerror=None)
SELECT_ENV = "CartPole-v1"
config = ppo.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
agent = ppo.PPOTrainer(config, env=SELECT_ENV)
N_ITER = 4
s = "{:3d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:6.2f} saved {}"
for n in range(N_ITER):
result = agent.train()
file_name = agent.save(CHECKPOINT_ROOT)
print(s.format(
n + 1,
result["episode_reward_min"],
result["episode_reward_mean"],
result["episode_reward_max"],
result["episode_len_mean"],
file_name
))
agent.workers.foreach_env_with_context(
lambda base_env, ctx: base_env.render() if ctx.worker_index == 1 else None
)
# instantiate env class
env = gym.make(SELECT_ENV)
# run until episode ends
episode_reward = 0
done = False
obs = env.reset()
while not done:
action = agent.compute_action(obs)
obs, reward, done, info = env.step(action)
episode_reward += reward
print("reward: {}".format(episode_reward))
ray.shutdown()
but I am getting the following error:
WARNING:tensorflow:From /home/condauser/.local/lib/python3.8/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
2021-05-24 13:52:08,391 INFO services.py:1267 -- View the Ray dashboard at http://127.0.0.1:8265
2021-05-24 13:52:08,395 WARNING services.py:1716 -- WARNING: The object store is using /tmp instead of /dev/shm because /dev/shm has only 67108864 bytes available. This will harm performance! You may be able to free up space by deleting files in /dev/shm. If you are inside a Docker container, you can increase /dev/shm size by passing '--shm-size=10.24gb' to 'docker run' (or add it to the run_options list in a Ray cluster config). Make sure to set this to more than 30% of available RAM.
2021-05-24 13:52:10,194 INFO trainer.py:669 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2021-05-24 13:52:10,194 INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
(pid=27899) WARNING:tensorflow:From /home/condauser/.local/lib/python3.8/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
(pid=27899) Instructions for updating:
(pid=27899) non-resource variables are not supported in the long term
(pid=27903) WARNING:tensorflow:From /home/condauser/.local/lib/python3.8/site-packages/tensorflow/python/compat/v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
(pid=27903) Instructions for updating:
(pid=27903) non-resource variables are not supported in the long term
2021-05-24 13:52:15,630 WARNING util.py:53 -- Install gputil for GPU system monitoring.
2021-05-24 13:52:19,939 WARNING deprecation.py:33 -- DeprecationWarning: `SampleBatch.data[..]` has been deprecated. Use `SampleBatch[..]` instead. This will raise an error in the future!
1 reward 9.00/ 23.43/ 70.00 len 23.43 saved tmp/ppo/cart/checkpoint_000001/checkpoint-1
Traceback (most recent call last):
File "cartpole_lstm2.py", line 90, in <module>
agent.workers.foreach_env_with_context(
AttributeError: 'WorkerSet' object has no attribute 'foreach_env_with_context'