Hello, I have just taken custom_experiment.py example and made few adjustments for torch and LSTM:
"""Example of a custom experiment wrapped around an RLlib trainer."""
import argparse
import numpy as np
import ray
from ray import tune
from ray.rllib.agents import ppo
parser = argparse.ArgumentParser()
parser.add_argument("--train-iterations", type=int, default=10)
def experiment(config):
iterations = config.pop("train-iterations")
train_agent = ppo.PPOTrainer(config=config, env="CartPole-v0")
checkpoint = None
train_results = {}
# Train
for i in range(iterations):
train_results = train_agent.train()
if i % 2 == 0 or i == iterations - 1:
checkpoint = train_agent.save(tune.get_trial_dir())
tune.report(**train_results)
train_agent.stop()
# Manual Eval
config["num_workers"] = 0
eval_agent = ppo.PPOTrainer(config=config, env="CartPole-v0")
eval_agent.restore(checkpoint)
env = eval_agent.workers.local_worker().env
lstm_cell_size = config["model"]["lstm_cell_size"]
init_state = state = [np.zeros([lstm_cell_size], np.float32) for _ in range(2)]
prev_a = 0
prev_r = 0.0
obs = env.reset()
done = False
eval_results = {"eval_reward": 0, "eval_eps_length": 0}
state = init_state
while not done:
action, state, _ = eval_agent.compute_single_action(obs, state, prev_a, prev_r)
obs, reward, done, info = env.step(action)
prev_a = action
prev_r = reward
eval_results["eval_reward"] += reward
eval_results["eval_eps_length"] += 1
results = {**train_results, **eval_results}
tune.report(results)
if __name__ == "__main__":
args = parser.parse_args()
ray.init(num_cpus=3)
config = ppo.DEFAULT_CONFIG.copy()
config["train-iterations"] = args.train_iterations
config["framework"] = 'torch'
config["model"]["use_lstm"] = True
config["model"]["lstm_cell_size"] = 256
config["model"]["lstm_use_prev_action"] = True
config["model"]["lstm_use_prev_reward"] = True
config["env"] = "CartPole-v0"
tune.run(
experiment,
config=config,
resources_per_trial=ppo.PPOTrainer.default_resource_request(config))
this works fine. Nice!
This uses “CartPole-v0”, so I have tried to replace with StatelessCartPole:
"""Example of a custom experiment wrapped around an RLlib trainer."""
import argparse
import numpy as np
import ray
from ray import tune
from ray.rllib.agents import ppo
from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
parser = argparse.ArgumentParser()
parser.add_argument("--train-iterations", type=int, default=10)
def experiment(config):
iterations = config.pop("train-iterations")
train_agent = ppo.PPOTrainer(config=config, env=StatelessCartPole)
checkpoint = None
train_results = {}
# Train
for i in range(iterations):
train_results = train_agent.train()
if i % 2 == 0 or i == iterations - 1:
checkpoint = train_agent.save(tune.get_trial_dir())
tune.report(**train_results)
train_agent.stop()
# Manual Eval
config["num_workers"] = 0
eval_agent = ppo.PPOTrainer(config=config, env=StatelessCartPole)
eval_agent.restore(checkpoint)
env = eval_agent.workers.local_worker().env
lstm_cell_size = config["model"]["lstm_cell_size"]
init_state = state = [np.zeros([lstm_cell_size], np.float32) for _ in range(2)]
prev_a = 0
prev_r = 0.0
obs = env.reset()
done = False
eval_results = {"eval_reward": 0, "eval_eps_length": 0}
state = init_state
while not done:
action, state, _ = eval_agent.compute_single_action(obs, state, prev_a, prev_r)
obs, reward, done, info = env.step(action)
prev_a = action
prev_r = reward
eval_results["eval_reward"] += reward
eval_results["eval_eps_length"] += 1
results = {**train_results, **eval_results}
tune.report(results)
if __name__ == "__main__":
args = parser.parse_args()
ray.init(num_cpus=3)
config = ppo.DEFAULT_CONFIG.copy()
config["train-iterations"] = args.train_iterations
config["framework"] = 'torch'
config["model"]["use_lstm"] = True
config["model"]["lstm_cell_size"] = 256
config["model"]["lstm_use_prev_action"] = True
config["model"]["lstm_use_prev_reward"] = True
config["env"] = StatelessCartPole
tune.run(
experiment,
config=config,
resources_per_trial=ppo.PPOTrainer.default_resource_request(config))
this code fails with the following error:
File "/srv/local_projects/docker/ray/examples/custom/custom_experiment2.py", line 46, in experiment
action, state, _ = eval_agent.compute_single_action(obs, state, prev_a, prev_r)
TypeError: compute_single_action() takes from 1 to 3 positional arguments but 5 were given
any good idea?