Hi @mannyv , I’ve created a dummy customized env just to show the behavior:
from gym.spaces import Dict
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.framework import try_import_tf, try_import_torch
from ray.rllib.utils.torch_utils import FLOAT_MIN
torch, nn = try_import_torch()
### Customized Policy NN model
class TorchActionMaskModel(TorchModelV2, nn.Module):
"""PyTorch version of above ActionMaskingModel."""
def __init__(
self,
obs_space,
action_space,
num_outputs,
model_config,
name,
**kwargs,
):
print('original_space: ',obs_space)
orig_space = getattr(obs_space, "original_space", obs_space)
# assert (
# isinstance(orig_space, Dict)
# and "action_mask" in orig_space.spaces
# and "observations" in orig_space.spaces
# )
TorchModelV2.__init__(
self, obs_space, action_space, num_outputs, model_config, name, **kwargs
)
nn.Module.__init__(self)
# print(orig_space)
self.internal_model = TorchFC(
obs_space,
# orig_space["observations"],
action_space,
num_outputs,
model_config,
name + "_internal",
)
# disable action masking --> will likely lead to invalid actions
self.no_masking = False
if "no_masking" in model_config["custom_model_config"]:
self.no_masking = model_config["custom_model_config"]["no_masking"]
def forward(self, input_dict, state, seq_lens):
# Extract the available actions tensor from the observation.
# print('input_dict', input_dict)
# import pdb; pdb.set_trace()
action_mask = input_dict["obs"]["action_mask"]
# Compute the unmasked logits.
logits, _ = self.internal_model({"obs": input_dict["obs"]})
# If action masking is disabled, directly return unmasked logits
if self.no_masking:
return logits, state
# Convert action_mask into a [0.0 || -inf]-type mask.
inf_mask = torch.clamp(torch.log(action_mask), min=FLOAT_MIN)
masked_logits = logits + inf_mask
# Return masked logits.
return masked_logits, state
def value_function(self):
return self.internal_model.value_function()
from ray.rllib.models import ModelCatalog
ModelCatalog.register_custom_model("my_torch_model", TorchActionMaskModel)
import gym_roku_session
from ray.rllib.algorithms.ppo import PPO
from ray import tune
import gym
import numpy as np
from gym import Env, spaces
class DummyEnv(Env):
def __init__(self,*arg,**kwargs):
super(DummyEnv).__init__(*arg, **kwargs)
self.obs1 = None
self.reset()
def reset(self):
self._get_obs()
self.observation_space = spaces.Dict({
'obs1': spaces.Box(low = -1, high = 100, dtype = np.float16, shape = (10,)),\
'obs2': spaces.Box(low = -1, high = 100, dtype = np.float16, shape = (1,)),\
'obs3': spaces.Box(low = -1, high = 100, dtype = np.float16, shape = (1,)),\
})
@property
def action_space(self):
# Action space size is random
self.obs1 = self.obs1 if self.obs1 is not None else self._get_obs()['obs1']
return spaces.Discrete(len(self.obs1))
def _get_obs(self):
import random
self.obs1 = np.random.rand(random.randint(1,10))
return {
'obs1': self.obs1,
'obs2': [np.random.rand()],
'obs3': [np.random.rand()]
}
def step(self, action):
# Generate random values with random len
return self._get_obs(), 1, np.random.rand() < .1, {}
import numpy as np
class PaddedEnv(gym.ObservationWrapper):
def __init__(self, env):
super().__init__(env)
# Override `observation` to custom process the original observation
# coming from the env.
def observation(self, observation):
# E.g. padding to fixed len
pad_len = 15 - len(observation['obs1'])
observation['obs1'] = np.pad(observation['obs1'],(0,pad_len))
observation = {
'observations': np.concatenate([i for i in observation.values()]),\
'action_mask': observation['obs1'] > 0
}
return observation
env = PaddedEnv(DummyEnv())
tune.register_env('dummyEnv', lambda config: env)
# Configure the algorithm.
config = {
# Environment (RLlib understands openAI gym registered strings).
"env": "dummyEnv",
# Use 2 environment workers (aka "rollout workers") that parallelly
# collect samples from their own environment clone(s).
"num_workers": 1,
# Change this to "framework: torch", if you are using PyTorch.
# Also, use "framework: tf2" for tf2.x eager execution.
"framework": "torch",
# Tweak the default model provided automatically by RLlib,
# given the environment's observation- and action spaces.
# "model": {
# "fcnet_hiddens": [64, 64],
# "fcnet_activation": "relu",
# "framestack": True
# },
"model": {
"custom_model": "my_torch_model",
# Extra kwargs to be passed to your model's c'tor.
# "custom_model_config": {'no_masking':False},
},
# Set up a separate evaluation worker set for the
# `algo.evaluate()` call after training (see below).
# "evaluation_num_workers": 1,
# 'evaluation_interval':1,
# Only for evaluation runs, render the env.
"evaluation_config": {
"render_env": False,
},
"disable_env_checking": True,
#"recreate_failed_workers":True
"log_level": 'WARN',
}
tune.run(
run_or_experiment="PPO",
config = config,
stop = {"training_iteration": 200},
num_samples = 1,
checkpoint_at_end=True,\
checkpoint_freq=1,
)
Note that I’ve made minor revision on TorchActionMaskModel
initialization otherwise it will fail on assert
and TorchFC
. Running the script will get error of
2022-10-06 20:54:24,115 INFO worker.py:1518 -- Started a local Ray instance.
(PPO pid=51191) 2022-10-06 20:54:34,493 INFO ppo.py:378 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
(PPO pid=51191) 2022-10-06 20:54:34,495 INFO algorithm.py:351 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
(RolloutWorker pid=51255) original_space: Box([-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.], [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.], (12,), float32)
(RolloutWorker pid=51255) 2022-10-06 20:54:42,557 ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=51255, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fcf388b17c0>)
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 613, in __init__
(RolloutWorker pid=51255) self._build_policy_map(
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1784, in _build_policy_map
(RolloutWorker pid=51255) self.policy_map.create_policy(
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy_map.py", line 123, in create_policy
(RolloutWorker pid=51255) self[policy_id] = create_policy_for_framework(
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/policy.py", line 80, in create_policy_for_framework
(RolloutWorker pid=51255) return policy_class(observation_space, action_space, merged_config)
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py", line 66, in __init__
(RolloutWorker pid=51255) self._initialize_loss_from_dummy_batch()
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy.py", line 1050, in _initialize_loss_from_dummy_batch
(RolloutWorker pid=51255) actions, state_outs, extra_outs = self.compute_actions_from_input_dict(
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 483, in compute_actions_from_input_dict
(RolloutWorker pid=51255) return self._compute_action_helper(
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/threading.py", line 24, in wrapper
(RolloutWorker pid=51255) return func(self, *a, **k)
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 1016, in _compute_action_helper
(RolloutWorker pid=51255) dist_inputs, state_out = self.model(input_dict, state_batches, seq_lens)
(RolloutWorker pid=51255) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/models/modelv2.py", line 259, in __call__
(RolloutWorker pid=51255) res = self.forward(restored, state or [], seq_lens)
(RolloutWorker pid=51255) File "tmp.py", line 53, in forward
(RolloutWorker pid=51255) action_mask = input_dict["obs"]["action_mask"]
(RolloutWorker pid=51255) KeyError: 'action_mask'
== Status ==
Current time: 2022-10-06 20:54:42 (running for 00:00:16.66)
Memory usage on this node: 10.3/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 2.0/12 CPUs, 0/0 GPUs, 0.0/4.7 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/zhan/ray_results/PPO
Number of trials: 1/1 (1 RUNNING)
+--------------------------+----------+-------+
| Trial name | status | loc |
|--------------------------+----------+-------|
| PPO_dummyEnv_971fc_00000 | RUNNING | |
+--------------------------+----------+-------+
2022-10-06 20:54:42,593 ERROR trial_runner.py:980 -- Trial PPO_dummyEnv_971fc_00000: Error processing event.
ray.tune.error._TuneNoNextExecutorEventError: Traceback (most recent call last):
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/tune/execution/ray_trial_executor.py", line 989, in get_next_executor_event
future_result = ray.get(ready_future)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/_private/worker.py", line 2277, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=51191, ip=127.0.0.1, repr=PPO)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 125, in __init__
self.add_workers(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 269, in add_workers
self.foreach_worker(lambda w: w.assert_healthy())
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 391, in foreach_worker
remote_results = ray.get([w.apply.remote(func) for w in self.remote_workers()])
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=51255, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fcf388b17c0>)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 613, in __init__
self._build_policy_map(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1784, in _build_policy_map
self.policy_map.create_policy(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy_map.py", line 123, in create_policy
self[policy_id] = create_policy_for_framework(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/policy.py", line 80, in create_policy_for_framework
return policy_class(observation_space, action_space, merged_config)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py", line 66, in __init__
self._initialize_loss_from_dummy_batch()
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy.py", line 1050, in _initialize_loss_from_dummy_batch
actions, state_outs, extra_outs = self.compute_actions_from_input_dict(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 483, in compute_actions_from_input_dict
return self._compute_action_helper(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/threading.py", line 24, in wrapper
return func(self, *a, **k)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 1016, in _compute_action_helper
dist_inputs, state_out = self.model(input_dict, state_batches, seq_lens)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/models/modelv2.py", line 259, in __call__
res = self.forward(restored, state or [], seq_lens)
File "tmp.py", line 53, in forward
action_mask = input_dict["obs"]["action_mask"]
KeyError: 'action_mask'
During handling of the above exception, another exception occurred:
ray::PPO.__init__() (pid=51191, ip=127.0.0.1, repr=PPO)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 308, in __init__
super().__init__(config=config, logger_creator=logger_creator, **kwargs)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/tune/trainable/trainable.py", line 157, in __init__
self.setup(copy.deepcopy(self.config))
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 443, in setup
raise e.args[0].args[2]
KeyError: 'action_mask'
Result for PPO_dummyEnv_971fc_00000:
trial_id: 971fc_00000
== Status ==
Current time: 2022-10-06 20:54:42 (running for 00:00:16.69)
Memory usage on this node: 10.3/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/12 CPUs, 0/0 GPUs, 0.0/4.7 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/zhan/ray_results/PPO
Number of trials: 1/1 (1 ERROR)
+--------------------------+----------+-------+
| Trial name | status | loc |
|--------------------------+----------+-------|
| PPO_dummyEnv_971fc_00000 | ERROR | |
+--------------------------+----------+-------+
Number of errored trials: 1
+--------------------------+--------------+--------------------------------------------------------------------------------------+
| Trial name | # failures | error file |
|--------------------------+--------------+--------------------------------------------------------------------------------------|
| PPO_dummyEnv_971fc_00000 | 1 | /Users/zhan/ray_results/PPO/PPO_dummyEnv_971fc_00000_0_2022-10-06_20-54-26/error.txt |
+--------------------------+--------------+--------------------------------------------------------------------------------------+
== Status ==
Current time: 2022-10-06 20:54:42 (running for 00:00:16.69)
Memory usage on this node: 10.3/16.0 GiB
Using FIFO scheduling algorithm.
Resources requested: 0/12 CPUs, 0/0 GPUs, 0.0/4.7 GiB heap, 0.0/2.0 GiB objects
Result logdir: /Users/zhan/ray_results/PPO
Number of trials: 1/1 (1 ERROR)
+--------------------------+----------+-------+
| Trial name | status | loc |
|--------------------------+----------+-------|
| PPO_dummyEnv_971fc_00000 | ERROR | |
+--------------------------+----------+-------+
Number of errored trials: 1
+--------------------------+--------------+--------------------------------------------------------------------------------------+
| Trial name | # failures | error file |
|--------------------------+--------------+--------------------------------------------------------------------------------------|
| PPO_dummyEnv_971fc_00000 | 1 | /Users/zhan/ray_results/PPO/PPO_dummyEnv_971fc_00000_0_2022-10-06_20-54-26/error.txt |
+--------------------------+--------------+--------------------------------------------------------------------------------------+
2022-10-06 20:54:42,617 ERROR ray_trial_executor.py:103 -- An exception occurred when trying to stop the Ray actor:Traceback (most recent call last):
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/tune/execution/ray_trial_executor.py", line 94, in _post_stop_cleanup
ray.get(future, timeout=0)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/_private/client_mode_hook.py", line 105, in wrapper
return func(*args, **kwargs)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/_private/worker.py", line 2277, in get
raise value
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=51191, ip=127.0.0.1, repr=PPO)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 125, in __init__
self.add_workers(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 269, in add_workers
self.foreach_worker(lambda w: w.assert_healthy())
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 391, in foreach_worker
remote_results = ray.get([w.apply.remote(func) for w in self.remote_workers()])
ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=51255, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fcf388b17c0>)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 613, in __init__
self._build_policy_map(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1784, in _build_policy_map
self.policy_map.create_policy(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy_map.py", line 123, in create_policy
self[policy_id] = create_policy_for_framework(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/policy.py", line 80, in create_policy_for_framework
return policy_class(observation_space, action_space, merged_config)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py", line 66, in __init__
self._initialize_loss_from_dummy_batch()
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy.py", line 1050, in _initialize_loss_from_dummy_batch
actions, state_outs, extra_outs = self.compute_actions_from_input_dict(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 483, in compute_actions_from_input_dict
return self._compute_action_helper(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/threading.py", line 24, in wrapper
return func(self, *a, **k)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 1016, in _compute_action_helper
dist_inputs, state_out = self.model(input_dict, state_batches, seq_lens)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/models/modelv2.py", line 259, in __call__
res = self.forward(restored, state or [], seq_lens)
File "tmp.py", line 53, in forward
action_mask = input_dict["obs"]["action_mask"]
KeyError: 'action_mask'
During handling of the above exception, another exception occurred:
ray::PPO.__init__() (pid=51191, ip=127.0.0.1, repr=PPO)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 308, in __init__
super().__init__(config=config, logger_creator=logger_creator, **kwargs)
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/tune/trainable/trainable.py", line 157, in __init__
self.setup(copy.deepcopy(self.config))
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 443, in setup
raise e.args[0].args[2]
KeyError: 'action_mask'
(PPO pid=51191) 2022-10-06 20:54:42,575 ERROR worker.py:756 -- Exception raised in creation task: The actor died because of an error raised in its creation task, ray::PPO.__init__() (pid=51191, ip=127.0.0.1, repr=PPO)
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 125, in __init__
(PPO pid=51191) self.add_workers(
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 269, in add_workers
(PPO pid=51191) self.foreach_worker(lambda w: w.assert_healthy())
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 391, in foreach_worker
(PPO pid=51191) remote_results = ray.get([w.apply.remote(func) for w in self.remote_workers()])
(PPO pid=51191) ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.__init__() (pid=51255, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fcf388b17c0>)
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 613, in __init__
(PPO pid=51191) self._build_policy_map(
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1784, in _build_policy_map
(PPO pid=51191) self.policy_map.create_policy(
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy_map.py", line 123, in create_policy
(PPO pid=51191) self[policy_id] = create_policy_for_framework(
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/policy.py", line 80, in create_policy_for_framework
(PPO pid=51191) return policy_class(observation_space, action_space, merged_config)
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/ppo/ppo_torch_policy.py", line 66, in __init__
(PPO pid=51191) self._initialize_loss_from_dummy_batch()
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/policy.py", line 1050, in _initialize_loss_from_dummy_batch
(PPO pid=51191) actions, state_outs, extra_outs = self.compute_actions_from_input_dict(
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 483, in compute_actions_from_input_dict
(PPO pid=51191) return self._compute_action_helper(
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/utils/threading.py", line 24, in wrapper
(PPO pid=51191) return func(self, *a, **k)
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/policy/torch_policy_v2.py", line 1016, in _compute_action_helper
(PPO pid=51191) dist_inputs, state_out = self.model(input_dict, state_batches, seq_lens)
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/models/modelv2.py", line 259, in __call__
(PPO pid=51191) res = self.forward(restored, state or [], seq_lens)
(PPO pid=51191) File "tmp.py", line 53, in forward
(PPO pid=51191) action_mask = input_dict["obs"]["action_mask"]
(PPO pid=51191) KeyError: 'action_mask'
(PPO pid=51191)
(PPO pid=51191) During handling of the above exception, another exception occurred:
(PPO pid=51191)
(PPO pid=51191) ray::PPO.__init__() (pid=51191, ip=127.0.0.1, repr=PPO)
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 308, in __init__
(PPO pid=51191) super().__init__(config=config, logger_creator=logger_creator, **kwargs)
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/tune/trainable/trainable.py", line 157, in __init__
(PPO pid=51191) self.setup(copy.deepcopy(self.config))
(PPO pid=51191) File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/rllib/algorithms/algorithm.py", line 443, in setup
(PPO pid=51191) raise e.args[0].args[2]
(PPO pid=51191) KeyError: 'action_mask'
Traceback (most recent call last):
File "tmp.py", line 171, in <module>
tune.run(
File "/Users/zhan/Applications/anaconda3/envs/python38/lib/python3.8/site-packages/ray/tune/tune.py", line 752, in run
raise TuneError("Trials did not complete", incomplete_trials)
ray.tune.error.TuneError: ('Trials did not complete', [PPO_dummyEnv_971fc_00000])
My experiment is running with:
python==3.8.13
gym==0.21.0
ray==2.0.0