I try to use example with action masking function but I don’t know why tre trainer "can’t see " that some actions are masked.
import gym
from gym.spaces import Box, Dict, Discrete
import numpy as np
import random
from ray import tune
from ray.tune.registry import register_env
class ParametricActionsCartPoleNoEmbeddings(gym.Env):
def __init__(self, max_avail_actions):
# Randomly set which two actions are valid and available.
self.left_idx, self.right_idx = random.sample(range(max_avail_actions), 2)
self.valid_avail_actions_mask = np.array(
[0.0] * max_avail_actions, dtype=np.float32
)
self.valid_avail_actions_mask[self.left_idx] = 1
self.valid_avail_actions_mask[self.right_idx] = 1
self.action_space = Discrete(max_avail_actions)
self.wrapped = gym.make("CartPole-v0")
self.observation_space = Dict(
{
"action_mask": Box(0, 1, shape=(max_avail_actions,)),
"observation": self.wrapped.observation_space,
}
)
self._skip_env_checking = True
def reset(self):
return {
"action_mask": self.valid_avail_actions_mask,
"observation": self.wrapped.reset(),
}
def step(self, action):
if action == self.left_idx:
actual_action = 0
elif action == self.right_idx:
actual_action = 1
else:
raise ValueError(
"Chosen action was not one of the non-zero action embeddings",
action,
self.valid_avail_actions_mask,
self.left_idx,
self.right_idx,
)
orig_obs, rew, done, info = self.wrapped.step(actual_action)
obs = {
"action_mask": self.valid_avail_actions_mask,
"observation": orig_obs,
}
return obs, rew, done, info
if __name__ == "__main__":
def env_creator(env_config={}):
return ParametricActionsCartPoleNoEmbeddings(max_avail_actions=6)
register_env("my_env", env_creator)
tune.run("PPO",
# algorithm specific configuration
config={"env": "my_env",
"num_gpus": 0,
"num_workers": 7,}, )
It gets the error:
ValueError: ('Chosen action was not one of the non-zero action embeddings', 2, array([0., 1., 0., 0., 1., 0.], dtype=float32), 4, 1)
By the way, I think that in some examples in github good idea is to split examples for so basic parts as possible. Putting embedding example and masking example in the same file can be a little confusing for beginners. Maybe basic example should be as simple as possible to run specified functionality.