I was wondering if anyone had any luck implementing a parameterised action space using RLlib, or which steps would be necessary to get it working, as I’m unfamiliar with the library. My action space is as follows, it is hierarchical, the model first chooses from a discrete action space. Each of these discrete actions have their own continuous values.
import gymnasium
import numpy as np
from gymnasium.spaces import Space
class ActionSpace(Space):
def __init__(self):
super(IllustratorActionSpace, self).__init__()
self.create_shape = gymnasium.spaces.Box(
low=np.array([
0, # shape_type
0, 0, # shape_x, shape_y,
0, 0, # shape_width, shape_height
]),
high=np.array([
1, # shape_type
1, 1, # shape_x, shape_y
1, 1, # shape_width, shape_height
]),
dtype=np.float32
)
self.move_shape = gymnasium.spaces.Box(
low=np.array([
0, # item_index
0, 0, # dx, dy,
0, 0, # shape_width, shape_height
]),
high=np.array([
39, # item_index
1, 1, # dx, dy
1, 1, # shape_width, shape_height
]),
dtype=np.float32
)
self.resize_shape = gymnasium.spaces.Box(
low=np.array([
0, # item_index
0, 0, # shape_width, shape_height
]),
high=np.array([
39, # item_index
1, 1, # shape_width, shape_height
]),
dtype=np.float32
)
self.rotate_shape = gymnasium.spaces.Box(
low=np.array([
0, # item_index
-1, # shape_angle
]),
high=np.array([
39, # item_index
1, # shape_angle
]),
dtype=np.float32
)
self.delete_shape = gymnasium.spaces.Box(
low=np.array([
0, # item_index
]),
high=np.array([
39, # item_index
]),
dtype=np.float32
)
self.choices = gymnasium.spaces.Discrete(5)
def sample(self):
choice = self.choices.sample()
if choice == 0:
return ('Create', self.create_shape.sample())
elif choice == 1:
return ('Move', self.move_shape.sample())
elif choice == 2:
return ('Resize', self.resize_shape.sample())
elif choice == 3:
return ('Rotate', self.rotate_shape.sample())
else:
return ('Delete', self.delete_shape.sample())
def contains(self, x):
if not isinstance(x, (tuple, list)) or len(x) != 2:
return False
choice, action = x
if choice not in ('Create', 'Move', 'Resize', 'Rotate', 'Delete'):
return False
if choice == 'Create':
return self.create_shape.contains(action)
elif choice == 'Move':
return self.move_shape.contains(action)
elif choice == 'Resize':
return self.resize_shape.contains(action)
elif choice == 'Rotate':
return self.rotate_shape.contains(action)
else:
return self.delete_shape.contains(action)
I’m aware I could completely flatten the space to make it all continuous but ideally I’d want to experiment with multiple approaches. I’m assuming I will need to implement my own action distribution and model for this to work, if even possible?