How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I am transitioning to using the new API to train a PPO agent in my custom environment. My environment works fine for the old API where having n env_runners launches n environments. Now the new api attempts to launch n+1, an extra environment for a process “PPO” and n for MultiAgentEnvRunner. Why is this extra environment being launched, is there a configuration I am missing? My config is as follows:
env_name = "ActionMaskEnv_V2"
register_env(env_name, lambda config: env_creator(config))
config = (
PPOConfig()
.api_stack(
enable_rl_module_and_learner=True,
enable_env_runner_and_connector_v2=True,
)
.environment(env=env_name)
.learners(num_learners=1)
.env_runners(num_env_runners=1,
num_envs_per_env_runner=1,
rollout_fragment_length="auto",
batch_mode="truncate_episodes",
sample_timeout_s=6000.0)
.rl_module(
# We need to explicitly specify here RLModule to use and
# the catalog needed to build it.
rl_module_spec=RLModuleSpec(
module_class=ActionMaskingTorchRLModule,
model_config={
"fcnet_hiddens": [256, 256, 256, 256],
"fcnet_activation": "tanh",
"vf_share_layers": True
},
),
)
.training(
train_batch_size_per_learner=4096,
lr=1e-3,
gamma=0.99,
lambda_=0.95,
clip_param=0.2,
grad_clip=None,
entropy_coeff=0.001,
vf_loss_coeff=0.25,
minibatch_size=128,
num_epochs=10,
)
.multi_agent(policies = {"global"},
policy_mapping_fn = lambda agent_id, episode, **kwargs: "global",
count_steps_by="env_steps",
)
.debugging(log_level="DEBUG")
.framework(framework="torch")
)