Hello! I’m trying to implement a DQN algorithm, but after configuring it and running the program an error appears:
PS C:\Users\grhen\Documents\GitHub\EP_RLlib> c:; cd 'c:\Users\grhen\Documents\GitHub\EP_RLlib'; & 'C:\Users\grhen\anaconda3\envs\rllib290\python.exe' 'c:\Users\grhen\.vscode\extensions\ms-python.python-2023.22.1\pythonFiles\lib\python\debugpy\adapter/../..\debugpy\launcher' '62617' '--' 'c:\Users\grhen\Documents\GitHub\EP_RLlib\VENT_init_training.py'
2024-01-09 11:15:49,983 INFO worker.py:1715 -- Started a local Ray instance. View the dashboard at 127.0.0.1:8265
Traceback (most recent call last):
File "c:\Users\grhen\Documents\GitHub\EP_RLlib\VENT_init_training.py", line 136, in <module>
algo.training(
File "C:\Users\grhen\anaconda3\envs\rllib290\lib\site-packages\ray\rllib\algorithms\dqn\dqn.py", line 244, in training
super().training(**kwargs)
File "C:\Users\grhen\anaconda3\envs\rllib290\lib\site-packages\ray\rllib\algorithms\simple_q\simple_q.py", line 243, in training
super().training(**kwargs)
File "C:\Users\grhen\anaconda3\envs\rllib290\lib\site-packages\ray\rllib\algorithms\algorithm_config.py", line 1816, in training
self.optimizer = merge_dicts(self.optimizer, optimizer)
File "C:\Users\grhen\anaconda3\envs\rllib290\lib\site-packages\ray\_private\dict.py", line 22, in merge_dicts
deep_update(merged, d2, True, [])
File "C:\Users\grhen\anaconda3\envs\rllib290\lib\site-packages\ray\_private\dict.py", line 58, in deep_update
for k, value in new_dict.items():
AttributeError: 'NoneType' object has no attribute 'items'
My configuration (part of it) is the following:
from ray.rllib.algorithms.dqn.dqn import DQNConfig
algo = DQNConfig().training(
gamma = 0.99 if not tune_runner else tune.uniform(0.7, 0.99),
lr = 0.1 if not tune_runner else tune.uniform(0.001, 0.1),
grad_clip = None,
grad_clip_by = 'global_norm',
train_batch_size = 128 if not tune_runner else tune.randint(128, 257),
optimizer = None,
max_requests_in_flight_per_sampler_worker = None,
learner_class = None,
_enable_learner_api = None,
num_atoms = 1 if not tune_runner else tune.randint(1, 11),
v_min = -1 if not tune_runner else tune.randint(-10, 0),
v_max = 1 if not tune_runner else tune.randint(1, 11),
noisy = True,
sigma0 = 1 if not tune_runner else tune.uniform(0.01, 0.99),
dueling = True,
hiddens = [256],
double_q = True,
n_step = 1 if not tune_runner else tune.randint(1, 11),
training_intensity = None,
replay_buffer_config = {
'_enable_replay_buffer_api': True,
'type': 'MultiAgentPrioritizedReplayBuffer',
'capacity': 50000,
'prioritized_replay_alpha': 0.6,
'prioritized_replay_beta': 0.4,
'prioritized_replay_eps': 1e-6,
'replay_sequence_length': 1,
},
td_error_loss_fn = None,
categorical_distribution_temperature = 1.0,
).environment(
env="EPEnv",
observation_space=gym.spaces.Box(float("-inf"), float("inf"), (49,)),
action_space=gym.spaces.Discrete(4),
env_config={
'sys_path': path,
'ep_terminal_output': ep_terminal_output,
'csv': False,
'output': TemporaryDirectory("output","DQN_",path+'/Resultados_RLforEP').name,
'epw': path+'/GitHub/EP_RLlib/EP_Wheater_Configuration/Mendoza_Obs_-hour-historico1.epw',
'idf': path+'/GitHub/EP_RLlib/EP_IDF_Configuration/model_1.epJSON',
'idf_folderpath': path+"/GitHub/EP_RLlib/EP_IDF_Configuration",
'idf_output_folder': path+"/models",
'climatic_stads': path+'/GitHub/EP_RLlib/EP_Wheater_Configuration',
'beta': 0,
'E_max': 2.5/6,
'latitud':0,
'longitud':0,
'altitud':0,
'separate_state_space': True,
'one_hot_state_encoding': True,
'episode': -1,
'is_test': False,
},
).framework(
framework = 'torch',
).fault_tolerance(
recreate_failed_workers = True,
restart_failed_sub_environments=False,
).rollouts(
num_rollout_workers = 1,
create_env_on_local_worker=True,
rollout_fragment_length = 'auto',
enable_connectors = True,
num_envs_per_worker=1,
).experimental(
_enable_new_api_stack = True,
).reporting( # multi_agent config va aquí
min_sample_timesteps_per_iteration = 2000,
).checkpointing(
export_native_model_files = True,
).debugging(
log_level = "ERROR",
seed=7,
).resources(
num_gpus = 0,
)
I can’t find out if it’s a mistake I made. Following the error message I find that on line 1816 of the script ray/rllib/algorithms/algorithm_config.py it tries to merge the optimizer
, however, as I have configured _enable_learner_api=True
this should be ignored, right?
Can anyone help me solve it? Thank you!