Failing at configuring a multi-agent trainer

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

I want to update this trainer that uses an old version of rllib.

The configuration looks like this:

# Define all the policies here
    policy_config = exp_run_config["policy"]["regions"]

    # Map of type MultiAgentPolicyConfigDict from policy ids to tuples
    # of (policy_cls, obs_space, act_space, config). This defines the
    # observation and action spaces of the policies and any extra config.
    policies = {
        "regions": (
            None,  # uses default policy
            env_object.observation_space[0],
            env_object.action_space[0],
            policy_config,
        ),
    }

    # Function mapping agent ids to policy ids.
    def policy_mapping_fn(agent_id=None):
        assert agent_id is not None
        return "regions"

    # Optional list of policies to train, or None for all policies.
    policies_to_train = None

    # Settings for Multi-Agent Environments
    multiagent_config = {
        "policies": policies,
        "policies_to_train": policies_to_train,
        "policy_mapping_fn": policy_mapping_fn,
    }

    train_config = exp_run_config["trainer"]
    rllib_config = {
        # Arguments dict passed to the env creator as an EnvContext object (which
        # is a dict plus the properties: num_workers, worker_index, vector_index,
        # and remote).
        "env_config": exp_run_config["env"],
        "framework": train_config["framework"],
        "multiagent": multiagent_config,
        "num_workers": train_config["num_workers"],
        "num_gpus": train_config["num_gpus"],
        "num_envs_per_worker": train_config["num_envs"] // train_config["num_workers"],
        "train_batch_size": train_config["train_batch_size"],
    }

The environment wrapper looks like this:

class EnvWrapper(MultiAgentEnv):
    """
    The environment wrapper class.
    """

    def __init__(self, env_config=None):

        super().__init__()

        env_config_copy = env_config.copy()
        if env_config_copy is None:
            env_config_copy = {}
        source_dir = env_config_copy.get("source_dir", None)
        # Remove source_dir key in env_config if it exists
        if "source_dir" in env_config_copy:
            del env_config_copy["source_dir"]
        if source_dir is None:
            source_dir = PUBLIC_REPO_DIR
        assert isinstance(env_config_copy, dict)
        self.env = import_class_from_path("Rice", os.path.join(source_dir, "rice.py"))(
            **env_config_copy
        )

        self.action_space = self.env.action_space

        self.observation_space = recursive_obs_dict_to_spaces_dict(self.env.reset())

    def reset(self):
        """Reset the env."""
        obs = self.env.reset()
        return recursive_list_to_np_array(obs)

    def step(self, actions=None):
        """Step through the env."""
        assert actions is not None
        assert isinstance(actions, dict)
        obs, rew, done, info = self.env.step(actions)
        return recursive_list_to_np_array(obs), rew, done, info

and the trainer is set up like this:

# Create the A2C trainer.
    exp_run_config["env"]["source_dir"] = source_dir
    rllib_trainer = A2CTrainer(
        env=EnvWrapper,
        config=get_rllib_config(
            exp_run_config=exp_run_config, env_class=EnvWrapper, seed=seed
        ),
    )

I’ve tried updating it like so:

policy_config = exp_run_config["policy"]["regions"]

  # Map of type MultiAgentPolicyConfigDict from policy ids to tuples
  # of (policy_cls, obs_space, act_space, config). This defines the
  # observation and action spaces of the policies and any extra config.
  policies = {
      "regions": (
          None,  # uses default policy
          env_object.observation_space[0],
          env_object.action_space[0],
          policy_config,
      ),
  }

  # Function mapping agent ids to policy ids.
  def policy_mapping_fn(agent_id=None):
      assert agent_id is not None
      return "regions"

  # Optional list of policies to train, or None for all policies.
  policies_to_train = None

  # Settings for Multi-Agent Environments
  multiagent_config = {
      "policies": policies,
      "policies_to_train": policies_to_train,
      "policy_mapping_fn": policy_mapping_fn,
  }

  train_config = exp_run_config["trainer"]
  rllib_config = {
      # Arguments dict passed to the env creator as an EnvContext object
      "env_config": exp_run_config["env"],
      "framework": train_config["framework"],
      "multiagent": multiagent_config,
      "num_workers": train_config["num_workers"],
      "num_gpus": train_config["num_gpus"],
      "train_batch_size": train_config["train_batch_size"],
      "placement_strategy": "PACK",
  }

the new wrapper:

class EnvWrapper(MultiAgentEnv):

    def __init__(self, env_config=None):
        super().__init__()
        
        
        
        env_config_copy = env_config.copy() if env_config else {}
        source_dir = env_config_copy.pop("source_dir", BASE_PATH)
        self.env = import_class_from_path("Test", os.path.join(source_dir, "test.py"))(**env_config_copy)
        
        # Get initial observation to determine number of agents
        initial_obs = self.env.reset()[0]  # [0] to get just obs from (obs, info) tuple
        self._num_agents = len(initial_obs)        
        self.agents = list(range(self._num_agents))  # Current agents in the env
        self.possible_agents = self.agents.copy()   # All possible agents that could be in the env
        
        # Convert action and observation spaces to proper format
        if isinstance(self.env.action_space, dict):
            self.action_space = Dict(self.env.action_space)
        else:
            self.action_space = {i: self.env.action_space for i in range(self._num_agents)}
        
        single_agent_obs_space = recursive_obs_dict_to_spaces_dict(initial_obs)  # Use first agent's obs as template
        self.observation_space = Dict({
            i: single_agent_obs_space for i in range(self._num_agents)
        })
        
    @property
    def num_agents(self):
        """Get the number of agents in the environment."""
        return self._num_agents
    
    @property
    def get_sub_environments(self):
        return [self._env]
    
    def reset(self, *, seed=None, options=None):
        if seed is not None:
            np.random.seed(seed)
        
        obs = self.env.reset()[0]
        
        # Convert observations to per-agent format
        obs_dict = {}
        for i, agent_obs in enumerate(obs):
            # Convert lists to numpy arrays in the observation dict
            processed_obs = recursive_list_to_np_array(agent_obs)
            obs_dict[i] = processed_obs
            
        return obs_dict, {agent_id: {} for agent_id in self.agents}

    def step(self, actions):
        obs, rewards, dones, info = self._env.step(actions)
        
        # Process observations into per-agent format
        obs_dict = {}
        for i, agent_obs in enumerate(obs):
            processed_obs = recursive_list_to_np_array(agent_obs)
            obs_dict[i] = processed_obs
        
        # Create truncated dict (same structure as dones)
        truncated = {agent_id: False for agent_id in self.agents}
        if "__all__" in dones:
            truncated["__all__"] = False
            
        # Ensure rewards and dones have agent IDs as keys
        if not isinstance(rewards, dict):
            rewards = {agent_id: rewards[i] for i, agent_id in enumerate(self.agents)}
        if not isinstance(dones, dict):
            dones = {agent_id: dones[i] for i, agent_id in enumerate(self.agents)}
            dones["__all__"] = all(dones.values())
            
        # Convert info to per-agent format if needed
        if not isinstance(info, dict):
            info = {agent_id: {} for agent_id in self.agents}
            
        return obs_dict, rewards, dones, truncated, info

and the trainer:

rllib_trainer = (PPOConfig()
        .environment(
            env=EnvWrapper,
            env_config=config_rllib["env_config"]
        )
        .framework(config_rllib["framework"])
        .resources(
            num_gpus=config_rllib["num_gpus"],
            num_cpus_for_main_process=config_rllib["num_workers"],
            placement_strategy=config_rllib["placement_strategy"],
        )
        .training(
            train_batch_size=config_rllib["train_batch_size"]
        )
        #.env_runners(num_env_runners=1)
        .multi_agent(**{
            "policies": config_rllib["multiagent"]["policies"],
            "policy_mapping_fn": config_rllib["multiagent"]["policy_mapping_fn"],
            "policies_to_train": config_rllib["multiagent"]["policies_to_train"]
        })
        .build())

I’m not sure how to configure the trainer correctly, so I’m getting:

/rllib/core/models/catalog.py", line 361, in _get_encoder_config
(MultiAgentEnvRunner pid=74256)     raise ValueError(
(MultiAgentEnvRunner pid=74256) ValueError: No default encoder config for obs space=Dict('action_mask': Box(-1.7014117e+38, 1.7014117e+38, (57,), float32), 'features': Box(-1.7014117e+38, 1.7014117e+38, (1043,), float32)), lstm=False found.

during build().