How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I have created a PettingZoo parallel env to manage a traffic light intersection where each agent is a traffic signal (using the micro traffic simulator SUMO). Each agent has 3 possible actions : [green, yellow, red], but I need to put some constraints on it such as a minimum green duration and a fixed yellow duration. I implemented it as action mask in this way :
def compute_action_mask(self) -> np.array:
"""Return the action mask for the traffic signal.
Array with
1 : valid action
0 : invalid action
"""
action_mask = None
current_state = self.get_current_couleur()
if current_state == "g":
# ctr green min
if self.consecutive_durations["g"] < self.env.min_green_time:
action_mask = np.array([1, 0, 0], dtype=np.int8)
else:
action_mask = np.array([1, 1, 0], dtype=np.int8)
elif current_state == "y":
# ctr yellow
if self.consecutive_durations["y"] < self.env.yellow_time:
action_mask = np.array([0, 1, 0], dtype=np.int8)
elif self.consecutive_durations["y"] == self.env.yellow_time:
action_mask = np.array([0, 0, 1], dtype=np.int8)
else:
raise Exception(f"Impossible to have more than 3 sec of yellow.\nself.consecutive_durations : {self.consecutive_durations['y']}")
else:
# ctr red max
if self.consecutive_durations["r"] >= self.env.max_red_time:
action_mask = np.array([1, 0, 0], dtype=np.int8)
else:
action_mask = np.array([1, 0, 1], dtype=np.int8)
return action_mask
When I test my env using the parallel_api_test and the api_test function from Petting Zoo, it works fine, all constraints are respected, but when a try to train it using this code :
from ray.tune.registry import register_env
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv
from FpfEnv_v2 import FeuParFeuEnv
from ray.rllib.algorithms.ppo import PPOConfig
import ray
from ray import tune
import os
ray.init()
net_file = r"D:\travail\data\networks\2way-single-intersection\single-intersection.net.xml"
route_file = r"D:\travail\data\networks\2way-single-intersection\single-intersection.rou.xml"
register_env('fpf', lambda config : ParallelPettingZooEnv((FeuParFeuEnv(
net_file=net_file,
route_file=route_file,
reward_fn_name='arrived_vehicles',
with_gui=True,
num_seconds=3600
))))
num_rollout_workers = 4
rollout_fragment_length = 128
train_batch_size = num_rollout_workers * rollout_fragment_length
config = (
PPOConfig()
.environment(env='fpf', clip_actions=True)
.rollouts(num_rollout_workers=num_rollout_workers, rollout_fragment_length=rollout_fragment_length)
.training(
train_batch_size=train_batch_size,
lr=2e-5,
gamma=0.99,
lambda_=0.9,
use_gae=True,
clip_param=0.4,
grad_clip=None,
entropy_coeff=0.1,
vf_loss_coeff=0.25,
sgd_minibatch_size=64,
num_sgd_iter=10,
)
.debugging(log_level="ERROR")
.framework(framework="torch")
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)
tune.run(
"PPO",
name="PPO",
stop={"num_episodes": 5 if not os.environ.get("CI") else 2},
checkpoint_freq=50,
local_dir=r"D:\travail\outputs\fpf",
config=config.to_dict(),
)
My following error raises each time after a couple of timesteps and other constraints such as min green time does not work either :
raise Exception(f"Impossible to have more than 3 sec of yellow....)
My observation space :
def observation_space(self) -> spaces.Dict:
"""Return the observation space."""
return spaces.Dict({
"observation" : spaces.Box(low=0, high=100, shape=(2,), dtype=np.float32),
"action_mask" : spaces.MultiBinary(3)
}
)
Where is the problem ?
Thanks