Hey thanks for your help.
Here is the traceback from the whole return
2021-07-22 09:29:43,535 ERROR trial_runner.py:748 – Trial PPO_CartPole-v1_ed8d9_00000: Error processing event.
Traceback (most recent call last):
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/tune/trial_runner.py”, line 718, in _process_trial
results = self.trial_executor.fetch_result(trial)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/tune/ray_trial_executor.py”, line 688, in fetch_result
result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/_private/client_mode_hook.py”, line 62, in wrapper
return func(*args, **kwargs)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/worker.py”, line 1495, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray::PPO.train_buffered() (pid=223572, ip=192.168.50.7)
File “python/ray/_raylet.pyx”, line 501, in ray._raylet.execute_task
File “python/ray/_raylet.pyx”, line 451, in ray._raylet.execute_task.function_executor
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/_private/function_manager.py”, line 563, in actor_method_executor
return method(__ray_actor, *args, **kwargs)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/tune/trainable.py”, line 173, in train_buffered
result = self.train()
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/rllib/agents/trainer.py”, line 608, in train
raise e
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/rllib/agents/trainer.py”, line 594, in train
result = Trainable.train(self)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/tune/trainable.py”, line 232, in train
result = self.step()
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/rllib/agents/trainer_template.py”, line 173, in step
res = next(self.train_exec_impl)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/util/iter.py”, line 756, in next
return next(self.built_iterator)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/util/iter.py”, line 783, in apply_foreach
for item in it:
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/util/iter.py”, line 791, in apply_foreach
result = fn(item)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/rllib/execution/metric_ops.py”, line 88, in call
res = summarize_episodes(episodes, orig_episodes)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/rllib/evaluation/metrics.py”, line 176, in summarize_episodes
custom_metrics[k + “_mean”] = np.mean(filt)
File “<array_function internals>”, line 5, in mean
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/numpy/core/fromnumeric.py”, line 3372, in mean
return _methods._mean(a, axis=axis, dtype=dtype,
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/numpy/core/_methods.py”, line 144, in _mean
arr = asanyarray(a)
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/numpy/core/_asarray.py”, line 136, in asanyarray
return array(a, dtype, copy=False, order=order, subok=True)
ValueError: could not broadcast input array from shape (12,3,400,600) into shape (1)
Result for PPO_CartPole-v1_ed8d9_00000:
{}
== Status ==
Memory usage on this node: 25.6/125.7 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/64 CPUs, 0/1 GPUs, 0.0/74.57 GiB heap, 0.0/35.95 GiB objects (0.0/1.0 accelerator_type:RTX)
Result logdir: /home/quessy/Dev/flyer/flyer/ray_checkpoints/PPO
Number of trials: 1/1 (1 ERROR)
±----------------------------±---------±------+
| Trial name | status | loc |
|-----------------------------±---------±------|
| PPO_CartPole-v1_ed8d9_00000 | ERROR | |
±----------------------------±---------±------+
Number of errored trials: 1
±----------------------------±-------------±-------------------------------------------------------------------------------------------------------------+
| Trial name | # failures | error file
|
|-----------------------------±-------------±-------------------------------------------------------------------------------------------------------------|
| PPO_CartPole-v1_ed8d9_00000 | 1 | /home/quessy/Dev/flyer/flyer/ray_checkpoints/PPO/PPO_CartPole-v1_ed8d9_00000_0_2021-07-22_09-29-27/error.txt |
±----------------------------±-------------±-------------------------------------------------------------------------------------------------------------+
== Status ==
Memory usage on this node: 25.6/125.7 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/64 CPUs, 0/1 GPUs, 0.0/74.57 GiB heap, 0.0/35.95 GiB objects (0.0/1.0 accelerator_type:RTX)
Result logdir: /home/quessy/Dev/flyer/flyer/ray_checkpoints/PPO
Number of trials: 1/1 (1 ERROR)
±----------------------------±---------±------+
| Trial name | status | loc |
|-----------------------------±---------±------|
| PPO_CartPole-v1_ed8d9_00000 | ERROR | |
±----------------------------±---------±------+
Number of errored trials: 1
±----------------------------±-------------±-------------------------------------------------------------------------------------------------------------+
| Trial name | # failures | error file
|
|-----------------------------±-------------±-------------------------------------------------------------------------------------------------------------|
| PPO_CartPole-v1_ed8d9_00000 | 1 | /home/quessy/Dev/flyer/flyer/ray_checkpoints/PPO/PPO_CartPole-v1_ed8d9_00000_0_2021-07-22_09-29-27/error.txt |
±----------------------------±-------------±-------------------------------------------------------------------------------------------------------------+
Traceback (most recent call last):
File “ray_rllib.py”, line 180, in
drl.train()
File “ray_rllib.py”, line 70, in train
analysis = tune.run(
File “/home/quessy/Dev/flyer/flyervenv/lib/python3.8/site-packages/ray/tune/tune.py”, line 543, in run
raise TuneError(“Trials did not complete”, incomplete_trials)
ray.tune.error.TuneError: (‘Trials did not complete’, [PPO_CartPole-v1_ed8d9_00000])
I tried episode.custom_metrics.update({'video': video})
as suggested but still get the same broadcast error. I can get images out as you suggest. Im not sure where the broadcast error comes from?
If its useful I have put together the simpler example I have been using to try and get the program running:
class RAYDRL:
"""
Use ray to undertake DRL on a given environment configuration
"""
def __init__(self,
env: str = 'CartPole-v0',
algo: str = "PPO",
env_config: dict = None,
model : dict = {},
stop: dict = {"episode_reward_mean": 0.35},
num_gpus: int = 1,
num_workers: int = 16,
lr_space: list = [0.01, 0.001, 0.0001]
):
"""
Constructor for ray Deep Reinforcement Learning (DRL) library
:param env: single env.Gym subclass
:param env_config: the configuration file for the flyer gym environment
:param algo: algorithm to use passed by name as a string
:param stop: stopping condition for the algorithm
:param num_gpus: number of gpus to use
:param num_workers: number of CPU workers to use
:param lr_space: list of learning space for grid_search
"""
self.algo = algo
self.stop = stop
self.env = env
self.config = {
"framework": "torch",
"log_level": "INFO",
"env": env,
"env_config": env_config,
"model": model,
"num_gpus": num_gpus,
"num_workers": num_workers,
"callbacks": VideoCallback,
# "lr": tune.grid_search(lr_space)
}
self.checkpoint_path = None
def train(self):
"""
train algorithm with ray
"""
path = os.path.dirname(__file__)
path = os.path.join(path, 'ray_checkpoints')
if not os.path.isdir(path):
os.mkdir(path)
checkpoint_path = path
# self.config['record_env'] = path
# self.config['render_env'] = True
analysis = tune.run(
self.algo,
stop=self.stop,
config=self.config,
restore=self.checkpoint_path,
local_dir=checkpoint_path,
checkpoint_at_end=True,
loggers=[TBXLogger]
)
self.checkpoint_path = analysis.get_last_checkpoint()
class VideoCallback(DefaultCallbacks):
def on_episode_step(self, *, worker, base_env, episode, env_index, **kwargs):
img = base_env.get_unwrapped()[0].render(mode='rgb_array')
if episode.user_data.get('video', False):
episode.user_data['video'].append(img)
else:
episode.user_data['video'] = [img]
def on_episode_end(self, *, base_env, policies, episode, env_index, **kwargs):
video = np.stack(episode.user_data["video"])
imgs = [Image.fromarray(img) for img in episode.user_data["video"]]
path = os.path.dirname(__file__)
path = os.path.join(path, 'ray_checkpoints')
path = os.path.join(path, 'video')
if not os.path.isdir(path):
os.mkdir(path)
imgs[0].save(os.path.join(path, 'render.gif'), save_all= True, append_images= imgs[1:], duration=0.001, loop=0)
# print(video.shape)
# video = np.expand_dims(video, axis=0)
video = np.moveaxis(video, 3, 1)
# video = np.expand_dims(video, axis=0)
# print(video.shape)
# episode.custom_metrics.update({'video': video})
episode.custom_metrics[{'video': video}] = video
# episode.user_data['video'] = None
if __name__ == '__main__':
drl = RAYDRL(env='CartPole-v1', stop={"episode_reward_mean": 200})
drl.train()