I also posted this on github yesterday, but nobody answered.
RLlib crashes with this env, when lowering the number of workers and envs/worker it runs a while longer, but it still crashes.
The same thing happens with PPO, APPO, IMPALA
My hardware config is: amd 16-core, 128gb ram, rtx3060 12gb
Ubuntu 20 and Windows 11, gym 0.21.0
I have tried ray 1.9.2 to 1.13.0 all of them crashes it seems that 1.9.2 and 1.11.0 runs somewhat longer than newer versions until it crashes.
This is the error
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan],
[nan, nan]], device='cuda:0', grad_fn=)
In tower 0 on device cuda:0
Traceback (most recent call last):
File "/home/usr1/Proj/ray_test.py", line 57, in
result = agent.train()
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/tune/trainable.py", line 360, in train
result = self.step()
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/rllib/agents/trainer.py", line 1136, in step
raise e
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/rllib/agents/trainer.py", line 1112, in step
step_attempt_results = self.step_attempt()
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/rllib/agents/trainer.py", line 1214, in step_attempt
step_results = self._exec_plan_or_training_iteration_fn()
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/rllib/agents/trainer.py", line 2211, in _exec_plan_or_training_iteration_fn
results = next(self.train_exec_impl)
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 779, in next
return next(self.built_iterator)
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 807, in apply_foreach
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 807, in apply_foreach
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 869, in apply_filter
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 869, in apply_filter
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 807, in apply_foreach
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 869, in apply_filter
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 1108, in build_union
item = next(it)
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 779, in next
return next(self.built_iterator)
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 807, in apply_foreach
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/iter.py", line 807, in apply_foreach
for item in it:
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/rllib/execution/concurrency_ops.py", line 143, in base_iterator
raise RuntimeError(
RuntimeError: Dequeue check() returned False! Exiting with Exception from Dequeue iterator.
Exception ignored in: <function RolloutWorker.del at 0x7f5ba552c3a0>
Traceback (most recent call last):
File "/home/usr1/Proj/.env/lib/python3.9/site-packages/ray/util/tracing/tracing_helper.py", line 461, in _resume_span
TypeError: 'NoneType' object is not callable
And this is the reproduction script
import gym
import numpy as np
import ray
from ray import tune
from ray.tune.logger import pretty_print
from ray.rllib.agents import impala
import random
class MyEnv(gym.Env):
def __init__(self, config=None):
super(MyEnv, self).__init__()
self.action_space = gym.spaces.Box(
low=-1, high=1, shape=(2,), dtype=np.float32)
self.observation_space = gym.spaces.Box(low=-np.inf, high=np.inf, shape=(40500,), dtype=np.float32)
def _next_observation(self):
obs = np.random.rand(40500)
return obs
def _take_action(self, action):
self._reward = random.randrange(-1,1)
def step(self, action):
# Execute one time step within the environment
self._reward = 0
self._take_action(action)
done = False
obs = self._next_observation()
return obs, self._reward, done, {}
def reset(self):
self._reward = 0
self.total_reward = 0
self.visualization = None
return self._next_observation()
if __name__ == "__main__":
ray.init()
cfg = impala.DEFAULT_CONFIG.copy()
cfg["env"] = MyEnv
cfg["num_gpus"] = 1
cfg["num_workers"] = 4
cfg["num_envs_per_worker"] = 4
cfg["framework"] = "torch"
cfg["horizon"] = 500
cfg["model"] = {
"fcnet_hiddens": [512, 512],
}
cfg["rollout_fragment_length"]=500
agent = impala.ImpalaTrainer(config=cfg, env=MyEnv)
i = 0
while True:
result = agent.train()
#result = tune.run("IMPALA", config=cfg, verbose=1)
if i % 35 == 0: # save every 100th training iteration
checkpoint_path = agent.save()
#checkpoint_path = tuner.save()
print(pretty_print(result))
print(checkpoint_path)
i += 1
when using the default fc_hiddens it runs a little longer then it crashes
- High: It blocks me to complete my task.