Hello all, I have a question. I am using DQN algorithm for a custom multi agent environment. After training, I want to test the performance of the model, then I need to use .compute_actions(obs,policy_id = DQNTFPolicy). Because the observation is a dictionary, I need to give it the policy. But when I run the code, I receive this error:
‘tuple’ object has no attribute ‘items’
I don’t know where this error comes from. Here is my setting
ray.init(local_mode=True)
def env_creator(_):
return AAM_Dispatch()
single_env = AAM_Dispatch()
env_name = "AAM_Dispatch"
register_env(env_name, env_creator)
obs_space = single_env.observation_space
act_space = single_env.action_space
num_agents = single_env.num_agents
def gen_policy():
return (DQNTFPolicy)
policy_graphs = {
"dqn_policy": (
gen_policy(),
obs_space,
act_space,
{},
),
}
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
return "dqn_policy"
dqn_config = (
DQNConfig()
.environment("AAM_Dispatch")
.framework("tf")
.rollouts(observation_filter="MeanStdFilter")
.training(
model={"vf_share_layers": True},
n_step=3,
gamma=0.99
)
.multi_agent(
policies=policy_graphs,
policy_mapping_fn=policy_mapping_fn,
policies_to_train=["dqn_policy"],
)
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)
dqn = dqn_config.build()
for i in range(2):
result_dqn = dqn.train()
print(pretty_print(result_dqn))
if (result_dqn["episode_reward_mean"] > 48):
print("ّFinish Training")
quit(0)