How severe does this issue affect your experience of using Ray?
- Medium: It contributes to significant difficulty to complete my task, but I can work around it.
Ray’s tune.run()
is running endlessly. Can I force it to save a checkpoint and begin a new iteration after a number of steps?
analysis = tune.run(
"PPO",
stop={
"episode_reward_mean": 2,
"training_iteration": 35,
},
config={
"env": "TradingEnv",
"env_config": env_config_training,
"log_level": "ERROR",
#"log_level": "INFO",
#"log_level": "DEBUG",
"framework": "torch",
"ignore_worker_failures": False,
"clip_rewards": True,
"lr": LR,
"lr_schedule": [
[0, 1e-1],
[int(1e2), 1e-2],
[int(1e3), 1e-3],
[int(1e4), 1e-4],
[int(1e5), 1e-5],
[int(1e6), 1e-6],
[int(1e7), 1e-7]
],
"model": {
"use_lstm": True,
"lstm_cell_size": 512
},
"gamma": GAMMA,
"observation_filter": "MeanStdFilter",
"lambda": LAMBDA,
"vf_share_layers": True,
"vf_loss_coeff": VF_LOSS_COEFF,
"entropy_coeff": ENTROPY_COEFF,
"evaluation_interval": 1, # Run evaluation on every iteration
"evaluation_config": {
"env_config": env_config_evaluation, # The dictionary we built before (only the overriding keys to use in evaluation)
"explore": False, # We don't want to explore during evaluation. All actions have to be repeatable.
},
},
metric=checkpoint_metric,
mode="max",
search_alg=search_alg,
scheduler=scheduler,
num_samples=10, # Samples per hyperparameter combination. More averages out randomness. Less runs faster
keep_checkpoints_num=10, # Keep the last 10 checkpoints
checkpoint_freq=1, # Do a checkpoint on each iteration (slower but you can pick more finely the checkpoint to use later)
# resume="AUTO",
local_dir="./results",
name=f"testing_{int(time.time()-1651400000)}",
trial_name_creator=Methods.trial_name_string
)