After try different things, I found a subtle “error” in the algorithm configuration. I was using the following syntax to define the algorithm:
algo = (
QMixConfig()
.training(
train_batch_size = 144,
gamma=0.7 if not tune_runner else tune.grid_search([0.7, 0.9, 0.99]),
lr=0.001 if not tune_runner else tune.grid_search([0.001, 0.01, 0.1]),
#QMixConig
mixer = 'qmix',
)
.environment(
env="EPEnv",
env_config={
"csv": False,
"epw": EPW_PATH,
"output": TemporaryDirectory().name,
"idf": IDF_PATH,
'beta': 0.5,
'E_max': 2.5/6,
'climatic_stads': CLIMATIC_STADS,
"separate_state_space": True,
"one_hot_state_encoding": True,
'test': test,
},
)
.framework(
framework = 'torch',
)
.fault_tolerance(
recreate_failed_workers = True,
)
.rollouts(
num_rollout_workers = 0,
create_env_on_local_worker=True,
rollout_fragment_length = 10,
enable_connectors = True,
batch_mode="truncate_episodes",
).exploration(
explore = True,
exploration_config = {
"type": 'EpsilonGreedy',
"initial_epsilon": 1.0,
"final_epsilon": 0.001,
"epsilon_timesteps": 300000
},
)
.reporting(
min_sample_timesteps_per_iteration = 2400,
)
.checkpointing(
export_native_model_files = True,
)
.debugging(
log_level = "INFO",
seed=7,
)
.resources(
num_gpus = 0,
)
.evaluation(
evaluation_interval = 100,
evaluation_duration = 2,
evaluation_duration_unit = 'episodes',
evaluation_num_workers=0,
evaluation_config={
"explore": False,
}
)
)
which is suggest in different parts of the documentation (for example here.-,from,-ray.rllib.algorithms) )
But if you remove the global brackets () the error desappear. The following code represent the modification implemented:
algo = QMixConfig().training(
train_batch_size = 144,
gamma=0.7 if not tune_runner else tune.grid_search([0.7, 0.9, 0.99]),
lr=0.001 if not tune_runner else tune.grid_search([0.001, 0.01, 0.1]),
#QMixConig
mixer = 'qmix',
).environment(
env="EPEnv",
env_config={
"csv": False,
"epw": EPW_PATH,
"output": TemporaryDirectory().name,
"idf": IDF_PATH,
'beta': 0.5,
'E_max': 2.5/6,
'climatic_stads': CLIMATIC_STADS,
"separate_state_space": True,
"one_hot_state_encoding": True,
'test': test,
},
).framework(
framework = 'torch',
).fault_tolerance(
recreate_failed_workers = True,
).rollouts(
num_rollout_workers = 0,
create_env_on_local_worker=True,
rollout_fragment_length = 10,
enable_connectors = True,
batch_mode="truncate_episodes",
).exploration(
explore = True,
exploration_config = {
"type": 'EpsilonGreedy',
"initial_epsilon": 1.0,
"final_epsilon": 0.001,
"epsilon_timesteps": 300000
},
).reporting(
min_sample_timesteps_per_iteration = 2400,
).checkpointing(
export_native_model_files = True,
).debugging(
log_level = "INFO",
seed=7,
).resources(
num_gpus = 0,
).evaluation(
evaluation_interval = 100,
evaluation_duration = 2,
evaluation_duration_unit = 'episodes',
evaluation_num_workers=0,
evaluation_config={
"explore": False,
}
)