I am trying to train a multi-agent reinforcement learning model using RLLIB. Unfortunately, I cannot attach the environment, I am usnig the following configuration dictionary:
def policy_mapping(x):
#If the agents are passed as a list to the function then this will always return "agent_DA"
if isinstance(x,list):
raise TypeError('Change Policy Mapping It returns lists ',len(x))
if x in ['agent_BM_Ask_1', 'agent_BM_Ask_2', 'agent_BM_Ask_3', 'agent_BM_Ask_4']:
return "agent_BM_Ask"
elif x in ['agent_BM_Bid_1', 'agent_BM_Bid_2', 'agent_BM_Bid_3', 'agent_BM_Bid_4']:
return "agent_BM_Bid"
else:
return "agent_DA"
experiment_params = {
"training": {
"env": env_name,
"run": "DDPG",
"stop": {
"training_iteration": 20,
},
"local_dir": env_vars.output_intermediate_dir,
"checkpoint_at_end": True,
"checkpoint_freq": 1,
# "export_formats": ["h5"],
"config": {
"explore": True,
"exploration_config": {
"type": "OrnsteinUhlenbeckNoise",
"ou_theta": 0,
},
"env_config": {
"file_name": "/opt/ml/output/intermediate",
},
#"sgd_minibatch_size": 1,
"lr": args.learning_rate,
"train_batch_size": 32,
"rollout_fragment_length": 4,
"collect_metrics_timeout": 10080,
"model": {
# https://docs.ray.io/en/master/rllib-models.html#default-model-config-settings
},
"multiagent": {
"policies": {
"agent_DA": (None, env.observation_space_DA, env.action_space_DA, {}),
"agent_BM_Ask": (None, env.observation_space_BM, env.action_space_BM, {}),
"agent_BM_Bid": (None, env.observation_space_BM, env.action_space_BM, {})
},
"policy_mapping_fn": lambda x: policy_mapping(x),
},
"num_workers": args.num_workers,
"num_gpus": 0,#args.num_gpus,
"seed": 1
}
}
}
I am storing the actions of the RL agents at each timestep, they turn out to be constants. These constants correspond either with the minimum value of the action range or with the maximum value. I have checked that neither the reward nor the next observations are constant. I have made tests using other algorithms but I always get the same result. Has anyone experienced such an issue before?