Hello, has anyone implemented an example of how so set reset_config for Rllib? I can’t seem to find any example either on github or here. The following is a simple test code which I haven’t been able to make work due to “reuse_actors” = True.
ray.init(logging_level="WARN")
config = PPOConfig().environment("CartPole-v1").framework("torch").debugging(log_level="WARN")\
.training(gamma=tune.uniform(0.9, 0.999), lr=tune.uniform(1e-4, 1e-3),_enable_learner_api=True).rl_module(_enable_rl_module_api=True)\
.rollouts(num_rollout_workers=1, num_envs_per_worker=1, create_env_on_local_worker=False)\
.resources(num_cpus_for_local_worker=0, num_cpus_per_worker=2, num_gpus_per_learner_worker=0.25, num_cpus_per_learner_worker=0)\
pb2_scheduler = PB2(
time_attr='training_iteration',
metric= 'episode_reward_mean',
mode='max',
perturbation_interval=2,
hyperparam_bounds={
"lr": [1e-4, 1e-3],
"gamma": [0.9, 0.999],
},
quantile_fraction= 0.5,
)
tune.run("PPO", scheduler=pb2_scheduler, num_samples=4, config=config, stop={"training_iteration":5}, verbose=1, reuse_actors=True)