class RLAgent_v0(MultiAgentEnv):
def __init__(self, return_agent_actions=False, part=False):
low = np.full((16, 256, 10), -1000)
high = np.full((16, 256, 10), 1000)
self.obs = []
self.action_space = gym.spaces.Discrete(5)
self.observation_space = gym.spaces.Box(
low=low, high=high, shape=(16, 256, 10), dtype=np.float32
)
class KerasModel(TFModelV2):
"""Custom model for policy gradient algorithms."""
def __init__(self, obs_space, action_space, num_outputs, model_config,
name):
super(KerasModel, self).__init__(obs_space, action_space,
num_outputs, model_config, name)
import tensorflow as tf
import tensorflow.keras.metrics
import tensorflow.keras.losses
self.inputs = tf.keras.layers.Input(
shape=obs_space.shape, name="observations")
layer_1 = tf.keras.layers.Conv2D(
2, 3,
name="layer1",
padding='same',
activation=tf.nn.relu,
kernel_initializer=normc_initializer(1.0))(self.inputs)
layer_2 = tf.keras.layers.Flatten()(layer_1)
# layer_out = tf.reshape(layer_1, (1, -1))
value_out = tf.keras.layers.Dense(
1,
name="value_out",
activation=tf.nn.relu,
kernel_initializer=normc_initializer(0.01))(layer_2)
self.base_model = tf.keras.Model(self.inputs, [layer_2, value_out])
self.register_variables(self.base_model.variables)
self.base_model.summary()
# @tf.function
def forward(self, input_dict, state, seq_lens):
model_out, self._value_out = self.base_model(input_dict["obs"])
return model_out, state
# @tf.function
def value_function(self):
import tensorflow as tf
import tensorflow.keras.metrics
import tensorflow.keras.losses
return tf.reshape(self._value_out, [-1])
# @tf.function
def metrics(self):
import tensorflow as tf
import tensorflow.keras.metrics
import tensorflow.keras.losses
return {"foo": tf.constant(42.0)}
This is the environment and model I have defined