Has anyone been able to use a VectorEnv with sub environments of MultiAgentEnvs successfully?
I am currently trying to have a class like the following;
class MultiVectorizedSimSimEnv(VectorEnv):
"""Vectorized version of the SimSimEnv."""
def __init__(self, env_config: dict, episode_length=10):
self.env = SimSimEnv(env_config, episode_length=episode_length)
super().__init__(
observation_space=self.env.observation_space,
# action space
action_space=self.env.action_space,
num_envs=env_config.get("num_sims", 1), # default to 1
),
....
self.sims = [
SimSimEnv(env_config=env_config, episode_length=tent_episode_length)
for _ in range(self.num_sims)
]
that has instances of the following environment class
class SimSimEnv(MultiAgentEnv):
def __init__(self, env_config, episode_length, config=None):
However, Ray doesn’t appear to recognize that the subenvs are multiagent and throws an error that I can’t use multiagent training on the vector env:
Error:
ValueError: Have multiple policies <PolicyMap lru-caching-capacity=100 policy-IDs=[‘policy_red’, ‘policy_blue’]>, but the env <harness.envs.multi_simsim_env.MultiVectorizedSimSimEnv object at 0x0000019BD0AC2170> is not a subclass of BaseEnv, MultiAgentEnv, ActorHandle, or ExternalMultiAgentEnv!
This is the configuration I have been using for training:
def env_creator(_):
return MultiVectorizedSimSimEnv(env_config) # return an env instance
tune.register_env("multi_simsim", env_creator)
def select_policy(agent_id, episode, **kwargs):
return f"policy_{agent_id}"
# Define the configuration
from ray.rllib.algorithms.algorithm_config import AlgorithmConfig, PolicySpec
from harness.policies.example_policy import EntropyLossPG
config = (
AlgorithmConfig(algo_class=EntropyLossPG)
.environment(env="multi_simsim", disable_env_checking=True)
.framework("torch")
.rollouts(
num_rollout_workers=0,
num_envs_per_worker=4,
rollout_fragment_length=8, # was 10
)
.multi_agent(
policies={
"policy_red": PolicySpec(
config=AlgorithmConfig.overrides(
model={"use_lstm": True}, framework_str="torch"
)
),
"policy_blue": PolicySpec(
config=AlgorithmConfig.overrides(
model={"use_lstm": True}, framework_str="torch"
)
),
},
policy_mapping_fn=select_policy,
)
.build()
)
# Start the training
tune.run("PPO", config=config)
How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.