Hi everyone, I am trying to obtain the network I trained. I tried the following code for single agent scenario
from ray.rllib.algorithms.ppo import PPOConfig
from ray.tune.logger import pretty_print
from gym_pybullet_drones.envs.multi_agent_rl.CustomBaseMAA3 import CustomRl3
import os
import ray
from ray.rllib.policy.policy import PolicySpec
ray.init(num_cpus=5)
temp_env = CustomRl3()
pol = PolicySpec()
policies = {"policy_1": pol}
policy_ids = list("policy_1")
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
return "policy_1"
algo = (
PPOConfig()
.rollouts(num_rollout_workers=1)
.resources(num_gpus=0)
.environment(env=CustomRl3)
.framework("torch")
.training(num_sgd_iter=5)
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
.build()
)
print(algo.get_policy().get_weights())
ray.shutdown()
which works well, but when I try to extract the network in the multi agent scenario it fails.
The code I use in the multi agent scenario is the following:
from ray.rllib.algorithms.ppo import PPOConfig
from ray.tune.logger import pretty_print
from gym_pybullet_drones.envs.multi_agent_rl.CustomBaseMAA3 import CustomRl3
import os
import ray
from ray.rllib.policy.policy import PolicySpec
ray.init(num_cpus=5)
temp_env = CustomRl3()
pol = PolicySpec()
policies = {"policy_1": pol}
policy_ids = list("policy_1")
def policy_mapping_fn(agent_id, episode, worker, **kwargs):
return "policy_1"
algo = (
PPOConfig()
.rollouts(num_rollout_workers=1)
.multi_agent(policies=policies, policy_mapping_fn=policy_mapping_fn)
.resources(num_gpus=0)
.environment(env=CustomRl3)
.framework("torch")
.training(num_sgd_iter=5)
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
.build()
)
print(algo.get_policy().get_weights())
ray.shutdown()
In the multi agent scenario the function get_policy() returns None. In this blog post , it is suggested to use PPOTrainer, but I get an error when I try to use it. Is the PPOTrainer still supported?
Thanks!