inspired on PPO+PB2 example
I set in run.config section the sample_froms as in example.
I mean, the following code section:
"lambda": sample_from(lambda spec: random.uniform(0.9, 1.0)),
"clip_param": sample_from(lambda spec: random.uniform(0.1, 0.5)),
"lr": sample_from(lambda spec: random.uniform(1e-3, 1e-5)),
"train_batch_size": sample_from(lambda spec: random.randint(1000,
But as in ray 2.2.0 I cannot construct a algorithm from a dictionary, but using, for example ppo.PPOConfig.from_dict(my_config_dict)
which throw an error like ValueError: invalid literal for int() with base 10: 'grid_search'
I tested the code with a custom environment and with a gym environment, using DQN and PPO algorithms, creating algorithm using from_dict(my_config_dict)
and as previous version, directly with a configuration as dictionary.
The error appears only when include the search Algorithms and/or the sample_from
method
Here my script.
Have to say that because copyrights I cannot share my environment implementation.
I’m really grateful guys for your extraordinary work developing ray.
I cannot wait to contribute if I could
import ray
from ray.tune.schedulers.pb2 import PB2
from ray.tune.schedulers import ASHAScheduler
from ray.tune import run, sample_from
import random
from cerri_memory.settings import *
from cerri_memory.environment import Environment
from ray import tune
import ray
from ray.rllib.algorithms import dqn, ppo
##########################################
# SETTINGS SECTION
##########################################
env_name = "cerri_env" # "Walker2d-v4" | "cerri_env"
algo_name = "PPO" # "DQN" | "PPO"
scheduler = "pb2" # "PB2" | "ASHAScheduler"
include_samplings = True
include_fcnet_hiddens_config = True
use_config_as_dict = True
##########################################
##########################################
# CREATING CONFIGURATION
##########################################
if env_name == "cerri_env":
env_config = {
"algo_name": "DQN",
"abst_experiment_pkl_path": "/SERIALIZED_EXPERIMENT/",
"stage": "Tuning",
"verbose": verbose,
"sort_training": sort_training,
"day_repetitions_limit": day_repetitions_limit,
"num_episodes_for_avg_reward": num_episodes_for_avg_reward,
"validation_type": validation_type,
"finishing_episode_mode": finishing_episode_mode,
"validate_action_during_testing": validate_action_during_testing,
}
else:
env_config = {}
FC_SIZE = ray.tune.grid_search([[256, 256], [1024], [128, 64, 32]])
CONFIG = {
"env": env_name,
"env_config": env_config,
"framework_str" : "tf2",
"disable_env_checking":True,
}
if include_fcnet_hiddens_config:
CONFIG["model"] = {
"fcnet_hiddens": FC_SIZE
}
##########################################
# SET ALGORITHM'S CONFIGURATION
##########################################
if algo_name == "DQN":
default_config_dict = dqn.DQNConfig().to_dict()
if include_samplings:
CONFIG["v_min"] = sample_from(lambda spec: random.randint(0, 0))
CONFIG["v_max"] = sample_from(lambda spec: random.randint(1, 1))
hyp_bounds = {
"v_min": [0, 0],
"v_max": [1, 1],
}
else:
default_config_dict = ppo.PPOConfig().to_dict()
hyp_bounds = {}
if include_samplings:
CONFIG["lr"]= sample_from(lambda spec: random.uniform(1e-3, 1e-5))
hyp_bounds["lr"] = [1e-3, 1e-5]
###########################################
# UPDATES DEFAULT CONFIG AND CREATE ALGORITHM
##########################################
for k, v in CONFIG.items():
default_config_dict[k] = v
if use_config_as_dict is False:
if algo_name == "DQN":
algorithm_obj = dqn.DQNConfig.from_dict(default_config_dict)
else:
algorithm_obj = ppo.PPOConfig.from_dict(default_config_dict)
else:
algorithm_obj = default_config_dict
# IF CUSTOM ENVIRONMENT, THEN REGISTER IT
if env_name == "cerri_env":
environment = Environment(default_config_dict)
tune.register_env("cerri_env", lambda config: environment)
##########################################
# SET SCHEDULER SECTION
##########################################
pb2 = PB2(
time_attr="episode_reward_mean",
metric="episode_reward_mean",
mode="max",
perturbation_interval=500,
quantile_fraction=0.25, # copy bottom % with top %
# Specifies the hyperparam search space
hyperparam_bounds=hyp_bounds
)
asha_scheduler = ASHAScheduler(
time_attr='training_iteration',
metric='episode_reward_mean',
mode='max',
max_t=100,
grace_period=10,
reduction_factor=3,
brackets=1)
if scheduler == "pb2":
scdlr = pb2
elif scheduler == "ASHAScheduler":
scdlr = asha_scheduler
analysis = ray.tune.tune.run(
algo_name,
scheduler=scdlr,
verbose=1,
num_samples=4,
stop={"timesteps_total": 500},
config=algorithm_obj,
)