How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi,
After implementing the reset_config()
method for PPO and running PB2 with reuse_actors=True
with Pendulum-v1, it gives this error:
2024-04-05 18:04:17,154 ERROR tune_controller.py:1374 -- Trial task failed for trial PPO_Pendulum-v1_444a6_00014
Traceback (most recent call last):
File "/home/.conda/envs/.../lib/python3.10/site-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
result = ray.get(future)
File "/home/.conda/envs/.../lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
return fn(*args, **kwargs)
File "/home/.conda/envs/.../lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "/home/.conda/envs/.../lib/python3.10/site-packages/ray/_private/worker.py", line 2624, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(AttributeError): e[36mray::PPO.reset()e[39m (pid=353467, ip=172.21.1.20, actor_id=5317424354362453bd5550a601000000, repr=PPO)
File "/home/.conda/envs/.../lib/python3.10/site-packages/ray/tune/trainable/trainable.py", line 640, in reset
success = self.reset_config(new_config)
File "/home/.conda/envs/.../lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo.py", line 606, in reset_config
self.config._enable_new_api_stack = new_config._enable_new_api_stack
AttributeError: 'dict' object has no attribute '_enable_new_api_stack'
If I set reuse_actors = False
under the PB2 configs, this error doesn’t show up, so I assume that maybe I don’t have a correct implementation of reset_config()
? I added it under the class PPO(Algorithm):
under the ray library in my conda env:
def reset_config(self, new_config):
self.config = new_config
return True
Versions / Dependencies
python=Python 3.10.14
ray-rllib=2.9.3
Reproduction script
The PPO class:
class PPO(Algorithm):
...
def reset_config(self, new_config):
self.config = new_config
return True
script:
import ray
from ray import train, tune
from ray.tune.tuner import Tuner
from ray.tune.registry import register_env
from ray.rllib.algorithms.ppo import PPO
from ray.tune.schedulers.pb2 import PB2
import argparse
import os
import numpy as np
import pandas as pd
def explore(config):
if config["train_batch_size"] < config["sgd_minibatch_size"] * 2:
config["train_batch_size"] = config["sgd_minibatch_size"] * 2
config["train_batch_size"] = int(config["train_batch_size"])
return config
pb2 = PB2(
time_attr="timesteps_total",
metric="episode_reward_mean",
mode="max",
perturbation_interval=50000,
# Specifies the hyperparam search space
hyperparam_bounds={
'gamma': [0.9, 0.9999],
'lambda_': [0.8, 1.0],
'kl_coeff': [0.3, 2.0],
'kl_target': [0.001, 0.1],
'sgd_minibatch_size': [8, 2048],
'num_sgd_iter': [1, 20],
'vf_loss_coeff': [0, 1],
'entropy_coeff': [1e-8, 0.1],
'clip_param': [0.1, 0.5],
'lr': [1e-5, 1],
'train_batch_size': [2048, 65536],
'vf_clip_param': [1, 10],
'grad_clip': [0.3, 5],
},
quantile_fraction=0.25, # copy bottom % with top %
custom_explore_fn = explore,
)
tuner = Tuner(
trainable = PPO,
param_space = {
'env': 'Pendulum-v1,
'model': {'free_log_std': True},
'horizon': 100, # length of MDP
'gamma': tune.choice([0.9, 0.95, 0.98, 0.99, 0.995, 0.999, 0.9999]),
'lambda_': tune.choice([0.8, 0.9, 0.92, 0.95, 0.98, 0.99, 1.0]),
'kl_coeff': tune.choice([0.3, 0.5, 1, 1.5, 2.0]),
'kl_target': tune.choice([0.1, 0.05, 0.03, 0.02, 0.01, 0.005, 0.001]),
'sgd_minibatch_size': tune.choice([8, 16, 32, 64, 128, 256, 512, 1024, 2048]),
'num_sgd_iter': tune.choice([1, 5, 10, 20]),
'vf_loss_coeff': tune.uniform(0, 1),
'entropy_coeff': tune.loguniform(1e-8, 0.1),
'clip_param': tune.choice([0.1, 0.2, 0.3, 0.4, 0.5]),
'lr': tune.loguniform(1e-5, 1),
'train_batch_size': tune.choice([2048, 4096, 8192, 16384, 32768, 65536]),
'vf_clip_param': tune.choice([1, 2, 3, 5, 10]),
'grad_clip': tune.choice([0.3, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 5]),
},
tune_config=tune.TuneConfig(
num_samples=10, # number of trials
scheduler=pb2,
reuse_actors=True,
),
run_config=train.RunConfig(
name='test',
),
)
results = tuner.fit()
print("best hyperparameters: ", results.get_best_result().config)