CustomModelPPO
I am trying to use a custom model for FetchReach-v1 environment of open ai gym. I used this following custom model. “It should be the same as standard model of ppo in rllib”. i get good results wiht the “standard model” of rllib ppo, but not with this custom model:
class MyKerasModel(TFModelV2):
def __init__(self, obs_space, action_space, num_outputs, model_config, name):
super(MyKerasModel, self).__init__(
obs_space, action_space, num_outputs, model_config, name
)
#self.inputs = tf.keras.layers.Input(shape=(int(np.product(obs_space.shape)),), name="observations")
self.inputs = tf.keras.layers.Input(shape=obs_space.shape)
# Actor/Policy Network
fc_1 = tf.keras.layers.Dense(
256,
name="fc_1",
activation=tf.nn.tanh,
kernel_initializer=normc_initializer(1.0),
)(self.inputs)
fc_2 = tf.keras.layers.Dense(
256,
name="fc_2",
activation=tf.nn.tanh,
kernel_initializer=normc_initializer(1.0),
)(fc_1)
fc_out = tf.keras.layers.Dense(
num_outputs,
name="fc_out",
activation=tf.keras.activations.linear,
kernel_initializer=normc_initializer(0.01),
)(fc_2)
# Critic/Value Network
fc_value_1 = tf.keras.layers.Dense(
256,
name="fc_value_1",
activation=tf.nn.tanh,
kernel_initializer=normc_initializer(1.0),
)(self.inputs)
fc_value_2 = tf.keras.layers.Dense(
256,
name="fc_value_2",
activation=tf.nn.tanh,
kernel_initializer=normc_initializer(1.0),
)(fc_value_1)
value_out = tf.keras.layers.Dense(
1,
name="value_out",
activation=tf.keras.activations.linear,
kernel_initializer=normc_initializer(0.01),
)(fc_value_2)
self.base_model = tf.keras.Model(self.inputs, [fc_out, value_out])
plot_model(self.base_model,
to_file='model_plot.png',
show_shapes=True,
show_layer_names=True,
rankdir="TB",
expand_nested=False,
#show_layer_activations=True
)
def forward(self, input_dict, state, seq_lens):
model_out, self._value_out = self.base_model(input_dict["obs_flat"])
return model_out, state
def value_function(self):
return tf.reshape(self._value_out, [-1])
def metrics(self):
return {"foo": tf.constant(42.0)}