Hi!
I am tryining to use the Behavioural Cloning algorithm with a custom environment as you can see here:
from gym_jsbsim.environments.environment import GuidanceEnvContinuos
import ray
from gym_jsbsim.wrappers.normalise_observation import NormalizeObservation
from ray.rllib.agents.ddpg import TD3Trainer, td3
from ray.tune import register_env
import datetime
import os
from ray import tune
from gym_jsbsim.aircraft import Aircraft, cessna172P
from ray.rllib.agents.marwil.bc import BCTrainer, BC_DEFAULT_CONFIG
from gym_jsbsim.tests.CustomCallbacks import CustomCallbacks
dir_path = os.path.dirname(os.path.realpath(__file__))
def env_creator(config=None):
return GuidanceEnvContinuos(config)
def my_train_fn(config, reporter):
agent = TD3Trainer(config=config, env="guidance-continuous-v0")
for i in range(100):
agent.train()
agent.stop()
if __name__ == "__main__":
ray.init()
default_config = BC_DEFAULT_CONFIG
custom_config = {
"framework": "torch",
"model": {
"vf_share_layers": False,
"fcnet_hiddens": [32, 16]
},
"input": f"{dir_path}/out.json",
"replay_buffer_size": 100,
"env_config": {
# some env config
},
}
config = {**default_config, **custom_config}
register_env("guidance-continuous-v0", lambda config: env_creator(config))
resources = BCTrainer.default_resource_request(config).to_json()
# start training
tune.run(my_train_fn,
name="bc",
resources_per_trial=resources,
config=config)
However, I do get the following error:
(pid=28265) 2021-05-19 22:08:49,574 ERROR function_runner.py:254 -- Runner Thread raised error.
(pid=28265) Traceback (most recent call last):
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 248, in run
(pid=28265) self._entrypoint()
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 316, in entrypoint
(pid=28265) self._status_reporter.get_checkpoint())
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 576, in _trainable_func
(pid=28265) output = fn()
(pid=28265) File "bc.py", line 29, in my_train_fn
(pid=28265) result = agent.train()
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 529, in train
(pid=28265) raise e
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 515, in train
(pid=28265) result = Trainable.train(self)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/trainable.py", line 226, in train
(pid=28265) result = self.step()
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py", line 148, in step
(pid=28265) res = next(self.train_exec_impl)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 756, in __next__
(pid=28265) return next(self.built_iterator)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 1075, in build_union
(pid=28265) item = next(it)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 756, in __next__
(pid=28265) return next(self.built_iterator)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 791, in apply_foreach
(pid=28265) result = fn(item)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/execution/train_ops.py", line 69, in __call__
(pid=28265) info = self.workers.local_worker().learn_on_batch(batch)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 860, in learn_on_batch
(pid=28265) .learn_on_batch(samples)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
(pid=28265) return func(self, *a, **k)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 359, in learn_on_batch
(pid=28265) grads, fetches = self.compute_gradients(postprocessed_batch)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
(pid=28265) return func(self, *a, **k)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 391, in compute_gradients
(pid=28265) self._loss(self, self.model, self.dist_class, train_batch))
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/marwil/marwil_torch_policy.py", line 48, in marwil_loss
(pid=28265) advantages = train_batch[Postprocessing.ADVANTAGES]
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/tracking_dict.py", line 35, in __getitem__
(pid=28265) value = dict.__getitem__(self, key)
(pid=28265) KeyError: 'advantages'
(pid=28265) Exception in thread Thread-2:
(pid=28265) Traceback (most recent call last):
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/threading.py", line 917, in _bootstrap_inner
(pid=28265) self.run()
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 267, in run
(pid=28265) raise e
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 248, in run
(pid=28265) self._entrypoint()
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 316, in entrypoint
(pid=28265) self._status_reporter.get_checkpoint())
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 576, in _trainable_func
(pid=28265) output = fn()
(pid=28265) File "bc.py", line 29, in my_train_fn
(pid=28265) result = agent.train()
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 529, in train
(pid=28265) raise e
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 515, in train
(pid=28265) result = Trainable.train(self)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/trainable.py", line 226, in train
(pid=28265) result = self.step()
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py", line 148, in step
(pid=28265) res = next(self.train_exec_impl)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 756, in __next__
(pid=28265) return next(self.built_iterator)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 1075, in build_union
(pid=28265) item = next(it)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 756, in __next__
(pid=28265) return next(self.built_iterator)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
(pid=28265) for item in it:
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 791, in apply_foreach
(pid=28265) result = fn(item)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/execution/train_ops.py", line 69, in __call__
(pid=28265) info = self.workers.local_worker().learn_on_batch(batch)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 860, in learn_on_batch
(pid=28265) .learn_on_batch(samples)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
(pid=28265) return func(self, *a, **k)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 359, in learn_on_batch
(pid=28265) grads, fetches = self.compute_gradients(postprocessed_batch)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
(pid=28265) return func(self, *a, **k)
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 391, in compute_gradients
(pid=28265) self._loss(self, self.model, self.dist_class, train_batch))
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/marwil/marwil_torch_policy.py", line 48, in marwil_loss
(pid=28265) advantages = train_batch[Postprocessing.ADVANTAGES]
(pid=28265) File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/tracking_dict.py", line 35, in __getitem__
(pid=28265) value = dict.__getitem__(self, key)
(pid=28265) KeyError: 'advantages'
(pid=28265)
2021-05-19 22:08:49,738 ERROR trial_runner.py:616 -- Trial my_train_fn_None_0341f_00000: Error processing event.
Traceback (most recent call last):
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 586, in _process_trial
results = self.trial_executor.fetch_result(trial)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 609, in fetch_result
result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/_private/client_mode_hook.py", line 47, in wrapper
return func(*args, **kwargs)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/worker.py", line 1456, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(TuneError): ray::ImplicitFunc.train_buffered() (pid=28265, ip=192.168.1.238)
File "python/ray/_raylet.pyx", line 480, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 432, in ray._raylet.execute_task.function_executor
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/trainable.py", line 167, in train_buffered
result = self.train()
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/trainable.py", line 226, in train
result = self.step()
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 366, in step
self._report_thread_runner_error(block=True)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 513, in _report_thread_runner_error
("Trial raised an exception. Traceback:\n{}".format(err_tb_str)
ray.tune.error.TuneError: Trial raised an exception. Traceback:
ray::ImplicitFunc.train_buffered() (pid=28265, ip=192.168.1.238)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 248, in run
self._entrypoint()
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 316, in entrypoint
self._status_reporter.get_checkpoint())
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/function_runner.py", line 576, in _trainable_func
output = fn()
File "bc.py", line 29, in my_train_fn
result = agent.train()
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 529, in train
raise e
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer.py", line 515, in train
result = Trainable.train(self)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/tune/trainable.py", line 226, in train
result = self.step()
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/trainer_template.py", line 148, in step
res = next(self.train_exec_impl)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 756, in __next__
return next(self.built_iterator)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 843, in apply_filter
for item in it:
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 1075, in build_union
item = next(it)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 756, in __next__
return next(self.built_iterator)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 783, in apply_foreach
for item in it:
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/util/iter.py", line 791, in apply_foreach
result = fn(item)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/execution/train_ops.py", line 69, in __call__
info = self.workers.local_worker().learn_on_batch(batch)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py", line 860, in learn_on_batch
.learn_on_batch(samples)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
return func(self, *a, **k)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 359, in learn_on_batch
grads, fetches = self.compute_gradients(postprocessed_batch)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/threading.py", line 21, in wrapper
return func(self, *a, **k)
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/policy/torch_policy.py", line 391, in compute_gradients
self._loss(self, self.model, self.dist_class, train_batch))
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/agents/marwil/marwil_torch_policy.py", line 48, in marwil_loss
advantages = train_batch[Postprocessing.ADVANTAGES]
File "/Users/walter/.pyenv/versions/3.7.0/lib/python3.7/site-packages/ray/rllib/utils/tracking_dict.py", line 35, in __getitem__
value = dict.__getitem__(self, key)
KeyError: 'advantages'
Result for my_train_fn_None_0341f_00000:
My json file which Iāve generated manually looks like this (shortened of course)
{
"type": "SampleBatch",
"action_logp": [0.0, 0.0, 0.0, ...], # What is action_logp by the way? And is 0 for all okay?
"actions": [[-1.0, -1.0], [-1.0, -1.0], [-1.0, -1.0], ...],
"obs": "BCJNGGhAzZUAAAAAAADsH4kAAGGABZXClQABAPMZjBJudW1weS..., ...",
"rewards": [-2.5538009330660003, -2.555342497778234, -2.5568629691827502, ...],
"dones": [false, false, false, ...],
"infos": ['is_heading_correct': True, 'is_aircraft_altitude_to_low': False, "test": True, ...],
"eps_id": [0, 0, 0, ...]
}
to create the SampleBatch I collected all actions, observations etc. from my expert and did something like this:
...
samples = SampleBatch({
"action_logp": np.zeros(len(actions)),
"actions": np.array(actions),
"obs": np.array(observations),
"new_obs": np.array(new_observations),
"rewards": np.array(rewards),
"dones": np.array(dones),
"infos": np.array(infos).astype(str),
"eps_id": eps_ids,
"t": np.array(ts)
})
writer.write(samples)
Any idea what Iām missing?