How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I want to update this trainer that uses an old version of rllib.
The configuration looks like this:
# Define all the policies here
policy_config = exp_run_config["policy"]["regions"]
# Map of type MultiAgentPolicyConfigDict from policy ids to tuples
# of (policy_cls, obs_space, act_space, config). This defines the
# observation and action spaces of the policies and any extra config.
policies = {
"regions": (
None, # uses default policy
env_object.observation_space[0],
env_object.action_space[0],
policy_config,
),
}
# Function mapping agent ids to policy ids.
def policy_mapping_fn(agent_id=None):
assert agent_id is not None
return "regions"
# Optional list of policies to train, or None for all policies.
policies_to_train = None
# Settings for Multi-Agent Environments
multiagent_config = {
"policies": policies,
"policies_to_train": policies_to_train,
"policy_mapping_fn": policy_mapping_fn,
}
train_config = exp_run_config["trainer"]
rllib_config = {
# Arguments dict passed to the env creator as an EnvContext object (which
# is a dict plus the properties: num_workers, worker_index, vector_index,
# and remote).
"env_config": exp_run_config["env"],
"framework": train_config["framework"],
"multiagent": multiagent_config,
"num_workers": train_config["num_workers"],
"num_gpus": train_config["num_gpus"],
"num_envs_per_worker": train_config["num_envs"] // train_config["num_workers"],
"train_batch_size": train_config["train_batch_size"],
}
The environment wrapper looks like this:
class EnvWrapper(MultiAgentEnv):
"""
The environment wrapper class.
"""
def __init__(self, env_config=None):
super().__init__()
env_config_copy = env_config.copy()
if env_config_copy is None:
env_config_copy = {}
source_dir = env_config_copy.get("source_dir", None)
# Remove source_dir key in env_config if it exists
if "source_dir" in env_config_copy:
del env_config_copy["source_dir"]
if source_dir is None:
source_dir = PUBLIC_REPO_DIR
assert isinstance(env_config_copy, dict)
self.env = import_class_from_path("Rice", os.path.join(source_dir, "rice.py"))(
**env_config_copy
)
self.action_space = self.env.action_space
self.observation_space = recursive_obs_dict_to_spaces_dict(self.env.reset())
def reset(self):
"""Reset the env."""
obs = self.env.reset()
return recursive_list_to_np_array(obs)
def step(self, actions=None):
"""Step through the env."""
assert actions is not None
assert isinstance(actions, dict)
obs, rew, done, info = self.env.step(actions)
return recursive_list_to_np_array(obs), rew, done, info
and the trainer is set up like this:
# Create the A2C trainer.
exp_run_config["env"]["source_dir"] = source_dir
rllib_trainer = A2CTrainer(
env=EnvWrapper,
config=get_rllib_config(
exp_run_config=exp_run_config, env_class=EnvWrapper, seed=seed
),
)
I’ve tried updating it like so:
policy_config = exp_run_config["policy"]["regions"]
# Map of type MultiAgentPolicyConfigDict from policy ids to tuples
# of (policy_cls, obs_space, act_space, config). This defines the
# observation and action spaces of the policies and any extra config.
policies = {
"regions": (
None, # uses default policy
env_object.observation_space[0],
env_object.action_space[0],
policy_config,
),
}
# Function mapping agent ids to policy ids.
def policy_mapping_fn(agent_id=None):
assert agent_id is not None
return "regions"
# Optional list of policies to train, or None for all policies.
policies_to_train = None
# Settings for Multi-Agent Environments
multiagent_config = {
"policies": policies,
"policies_to_train": policies_to_train,
"policy_mapping_fn": policy_mapping_fn,
}
train_config = exp_run_config["trainer"]
rllib_config = {
# Arguments dict passed to the env creator as an EnvContext object
"env_config": exp_run_config["env"],
"framework": train_config["framework"],
"multiagent": multiagent_config,
"num_workers": train_config["num_workers"],
"num_gpus": train_config["num_gpus"],
"train_batch_size": train_config["train_batch_size"],
"placement_strategy": "PACK",
}
the new wrapper:
class EnvWrapper(MultiAgentEnv):
def __init__(self, env_config=None):
super().__init__()
env_config_copy = env_config.copy() if env_config else {}
source_dir = env_config_copy.pop("source_dir", BASE_PATH)
self.env = import_class_from_path("Test", os.path.join(source_dir, "test.py"))(**env_config_copy)
# Get initial observation to determine number of agents
initial_obs = self.env.reset()[0] # [0] to get just obs from (obs, info) tuple
self._num_agents = len(initial_obs)
self.agents = list(range(self._num_agents)) # Current agents in the env
self.possible_agents = self.agents.copy() # All possible agents that could be in the env
# Convert action and observation spaces to proper format
if isinstance(self.env.action_space, dict):
self.action_space = Dict(self.env.action_space)
else:
self.action_space = {i: self.env.action_space for i in range(self._num_agents)}
single_agent_obs_space = recursive_obs_dict_to_spaces_dict(initial_obs) # Use first agent's obs as template
self.observation_space = Dict({
i: single_agent_obs_space for i in range(self._num_agents)
})
@property
def num_agents(self):
"""Get the number of agents in the environment."""
return self._num_agents
@property
def get_sub_environments(self):
return [self._env]
def reset(self, *, seed=None, options=None):
if seed is not None:
np.random.seed(seed)
obs = self.env.reset()[0]
# Convert observations to per-agent format
obs_dict = {}
for i, agent_obs in enumerate(obs):
# Convert lists to numpy arrays in the observation dict
processed_obs = recursive_list_to_np_array(agent_obs)
obs_dict[i] = processed_obs
return obs_dict, {agent_id: {} for agent_id in self.agents}
def step(self, actions):
obs, rewards, dones, info = self._env.step(actions)
# Process observations into per-agent format
obs_dict = {}
for i, agent_obs in enumerate(obs):
processed_obs = recursive_list_to_np_array(agent_obs)
obs_dict[i] = processed_obs
# Create truncated dict (same structure as dones)
truncated = {agent_id: False for agent_id in self.agents}
if "__all__" in dones:
truncated["__all__"] = False
# Ensure rewards and dones have agent IDs as keys
if not isinstance(rewards, dict):
rewards = {agent_id: rewards[i] for i, agent_id in enumerate(self.agents)}
if not isinstance(dones, dict):
dones = {agent_id: dones[i] for i, agent_id in enumerate(self.agents)}
dones["__all__"] = all(dones.values())
# Convert info to per-agent format if needed
if not isinstance(info, dict):
info = {agent_id: {} for agent_id in self.agents}
return obs_dict, rewards, dones, truncated, info
and the trainer:
rllib_trainer = (PPOConfig()
.environment(
env=EnvWrapper,
env_config=config_rllib["env_config"]
)
.framework(config_rllib["framework"])
.resources(
num_gpus=config_rllib["num_gpus"],
num_cpus_for_main_process=config_rllib["num_workers"],
placement_strategy=config_rllib["placement_strategy"],
)
.training(
train_batch_size=config_rllib["train_batch_size"]
)
#.env_runners(num_env_runners=1)
.multi_agent(**{
"policies": config_rllib["multiagent"]["policies"],
"policy_mapping_fn": config_rllib["multiagent"]["policy_mapping_fn"],
"policies_to_train": config_rllib["multiagent"]["policies_to_train"]
})
.build())
I’m not sure how to configure the trainer correctly, so I’m getting:
/rllib/core/models/catalog.py", line 361, in _get_encoder_config
(MultiAgentEnvRunner pid=74256) raise ValueError(
(MultiAgentEnvRunner pid=74256) ValueError: No default encoder config for obs space=Dict('action_mask': Box(-1.7014117e+38, 1.7014117e+38, (57,), float32), 'features': Box(-1.7014117e+38, 1.7014117e+38, (1043,), float32)), lstm=False found.
during build()
.