How severe does this issue affect your experience of using Ray?
- High: It block complete my task and I can not work around it.
Hello, I have a problem that blocks me in my activities and I cannot solve it. I have tuned the hyperparameters of the QMix algorithm with Tune and now I want to use the trained policy for evaluation. However, I cannot import the policy from the checkpoint.
My tuning code is as follows:
import logging
import gymnasium as gym
import ray
from ray import air, tune
from ray.tune import register_env
from ray.rllib.algorithms.qmix import QMixConfig, QMix
from ray.rllib.utils.replay_buffers.replay_buffer import StorageUnit
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.env.multi_agent_env import ENV_STATE
from tempfile import TemporaryDirectory
from GENERAL_QMIX_ep_gym_env import EnergyPlusEnvWithGroupedAgents_v0
logger = logging.getLogger(__name__)
#path = "/home/german"
path = 'E:/Usuario/Cliope'
#path = 'C:/Users/grhen'
EPW_PATH = path+'/Documents/GitHub/EP_RLlib/EP_Wheater_Configuration/Mendoza_Obs_-hour-historico1.epw'
IDF_PATH = path+'/home/german/Documents/GitHub/EP_RLlib/EP_IDF_Configuration/model_1.epJSON'
CLIMATIC_STADS_PATH = path+'/Documents/GitHub/EP_RLlib/EP_Wheater_Configuration'
tune_runner = True
ray.init()
register_env(
"EPEnv",
lambda args: EnergyPlusEnvWithGroupedAgents_v0(args)
)
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
return "default_policy"
algo = QMixConfig().training(
train_batch_size = 30, #if not tune_runner else tune.grid_search([30, 80, 100]),
gamma = 0.7, #if not tune_runner else tune.grid_search([0.7, 0.9, 0.99]),
lr = 0.1, #if not tune_runner else tune.grid_search([0.001, 0.01, 0.1]),
#QMixConig
mixer = 'qmix',
# Mixing network. Either “qmix”, “vdn”, or None.
mixing_embed_dim = 32,
# Size of the mixing network embedding.
double_q = True,
# Whether to use Double_Q learning.
target_network_update_freq = 100, #if not tune_runner else tune.grid_search([100, 300, 1000]),
# Update the target network every target_network_update_freq sample steps.
replay_buffer_config = {
"type": "MultiAgentReplayBuffer",
"capacity": 70000,
"storage_unit": StorageUnit.FRAGMENTS,
},
#optim_alpha = 0.99,
# RMSProp alpha.
#optim_eps = 0.00001,
# RMSProp epsilon.
#grad_clip = None,
# If not None, clip gradients during optimization at this value.
).environment(
env="EPEnv",
env_config={
"csv": False,
"epw": EPW_PATH,
"output": TemporaryDirectory("output","QMIX_",'E:/Resultados_RLforEP').name,
"idf": IDF_PATH,
"idf_folderpath": path+"/Documents/GitHub/EP_RLlib/EP_IDF_Configuration",
'idf_output_folder': path+"/Documents/models",
'beta': 0.5, # Parámetro para ajustar las preferencias del usuario (valor entre 0 y 1)
'E_max': 2.5/6, # in epJSON file: maximum_total_cooling_capacity/1000 / number_of_timesteps_per_hour
'climatic_stads': CLIMATIC_STADS_PATH,
'latitud':0,
'longitud':0,
'altitud':0,
"separate_state_space": True,
"one_hot_state_encoding": True,
"episode": -1
},
).framework(
framework = 'torch',
).fault_tolerance(
recreate_failed_workers = True,
restart_failed_sub_environments=False,
).rollouts(
num_rollout_workers = 0,
create_env_on_local_worker=True,
rollout_fragment_length = 'auto',# if not tune_runner else tune.grid_search([10, 20]),
enable_connectors = True,
batch_mode="truncate_episodes",
num_envs_per_worker=1,
).exploration(
explore = True,
exploration_config = {
"type": 'EpsilonGreedy',
"initial_epsilon": 1.0,
"final_epsilon": 0,
"epsilon_timesteps": 900000
},
).reporting(
min_sample_timesteps_per_iteration = 1440,
).debugging(
log_level = "INFO",
seed=7,
).resources(
num_gpus = 0,
).checkpointing(
True
).rl_module(
_enable_rl_module_api=False
)
if tune_runner:
tune.Tuner( # 3. train it,
"QMIX",
run_config=air.RunConfig(
name='training_best_QMIX_4',
local_dir='E:/ray_results',
storage_path='E:/ray_results',
stop={"episodes_total": 250},
log_to_file=True,
checkpoint_config=air.CheckpointConfig(
checkpoint_at_end=True,
checkpoint_frequency=2,
num_to_keep=20
),
failure_config=air.FailureConfig(
# Tries to recover a run up to this many times.
max_failures=10
)
),
param_space=algo.to_dict(),
).fit()
else:
algo = algo.build() # 2. build the algorithm,
for _ in range(1500):
print("Train N°"+str(_+1))
print(algo.train()) # 3. train it,
if _%10 == 0:
print(algo.save(checkpoint_dir='E:/ray_results/save_algo'))
algo.save_checkpoint(checkpoint_dir='E:/ray_results/save_policy')
print(algo.get_policy())
print(algo.get_policy().get_weights())
print(algo.get_weights())
for _ in range(40):
print(algo.evaluate()) # 4. and evaluate it.
ray.shutdown()
When I use both configurations tune_runner = True
and tune_runner = False
the runs are executed correctly and I am able to display the policies and weights of the NN in the terminal. However, when I try to import the checkpoint policy the following error appears:
import ray
from ray.rllib.policy.policy import Policy
checkpoint_path = "E:/ray_results/training_best_QMIX_4/QMIX_EPEnv_1fd3f_00000_0_2023-10-04_15-52-10/checkpoint_000001"
#checkpoint_path = "C:/Users/grhen/ray_results/ajuste_modelo_general_QMIX_5/QMIX_EPEnv_24cb3_00005_5_gamma=0.9900,lr=0.0100,mixing_embed_dim=32_2023-09-17_09-38-46/checkpoint_001400"
# Use the `from_checkpoint` utility of the Policy class:
my_restored_policy = Policy.from_checkpoint(checkpoint_path)
print(my_restored_policy)
#print(algo.evaluate()) # 4. and evaluate it.
ray.shutdown()
Traceback (most recent call last):
File "e:\Usuario\Cliope\Documents\GitHub\EP_RLlib\GENERAL_QMIX_init_evaluation.py", line 7, in <module>
my_restored_policy = Policy.from_checkpoint(checkpoint_path)
File "C:\Users\Usuario\anaconda3\envs\qmix_env\lib\site-packages\ray\rllib\policy\policy.py", line 338, in from_checkpoint
policies[policy_id] = Policy.from_state(policy_state)
File "C:\Users\Usuario\anaconda3\envs\qmix_env\lib\site-packages\ray\rllib\policy\policy.py", line 365, in from_state
raise ValueError(
ValueError: No `policy_spec` key was found in given `state`! Cannot create new Policy.
I tried setting the multi_agent
method of the algorithm, but the error persists.
Could someone tell me if I need to change something in my settings or how to fix the problem? Thank you so much.