How severe does this issue affect your experience of using Ray?
- Medium: It contributes to significant difficulty to complete my task, but I can work around it.
I am running a derivative of the SimpleCorridor environment to train with DreamerV3:
This is the environment:
class SimpleCorridorReward(gym.Env):
def __init__(self, config=None):
config = config or {}
self.end_pos = config.get("corridor_length", 10)
self.cur_pos = 0
self.action_space = Discrete(2)
self.observation_space = Box(low=0.0, high=999.0, shape=(1,), dtype=np.float32)
self.observations = None
def set_corridor_length(self, length):
self.end_pos = length
self.observation_space = Box(low=0.0, high=self.end_pos, shape=(1,), dtype=np.float32)
print("Updated corridor length to {}".format(length))
def reset(self, *, seed=None, options=None):
self.cur_pos = 0.0
self.observations = np.array([self.cur_pos], dtype=np.float32)
return self.observations, {}
def step(self, action):
assert action in [0, 1], action
if action == 0 and self.cur_pos > 0:
self.cur_pos -= 1.0
elif action == 1:
self.cur_pos += 1.0
done = truncated = self.cur_pos >= self.end_pos
reward = 100 if done else -1
self.observations = np.array([self.cur_pos], dtype=np.float32)
return self.observations, reward, done, truncated, {}
import gymnasium as gym
from gymnasium.envs.registration import register
from ray import tune
from test_envs import SimpleCorridorReward
from ray.rllib.utils import check_env
from ray.tune.logger import pretty_print
from ray.rllib.algorithms.dreamerv3 import DreamerV3Config
#tune.register_env("SimpleCorridorReward", lambda config: SimpleCorridorReward())
register(
id='SimpleCorridorReward-v0',
entry_point='test_envs:SimpleCorridorReward',
)
env = gym.make("SimpleCorridorReward-v0")
stop = {"timesteps_total": 100000}
config = DreamerV3Config()
config = config.environment("SimpleCorridorReward-v0")
config = config.training(model_size="XS",
training_ratio=1,
model = {'batch_size_B': 1,
'batch_length_T': 1,
'horizon_H': 1,
'gamma': 0.997,
'model_size': 'XS'})
config = config.resources(num_learner_workers=0)
for _ in range(5):
algo = config.build()
result = algo.train()
print(pretty_print(result))
I can set the training iterations as a loop but can not use the stop value to end training with e.g. results = tune.run("DreamerV3", config=config, stop=stop, verbose=0)
which returns:
TuneError: ('Trials did not complete', [DreamerV3_SimpleCorridorReward-v0_bbf54_00000])
Long story short, what is the recommended way to set a stopping point for algo.train() that is part of the config?
The examples I see in the documentation are all related to tune.run, I just want to train the agent not do any hyperparameter tuning.