RLLIB- FetchReach-v1

CustomModelPPO
I am trying to use a custom model for FetchReach-v1 environment of open ai gym. I used this following custom model. “It should be the same as standard model of ppo in rllib”. i get good results wiht the “standard model” of rllib ppo, but not with this custom model:

class MyKerasModel(TFModelV2):

def __init__(self, obs_space, action_space, num_outputs, model_config, name):
    super(MyKerasModel, self).__init__(
        obs_space, action_space, num_outputs, model_config, name
    )

    #self.inputs = tf.keras.layers.Input(shape=(int(np.product(obs_space.shape)),), name="observations")
    self.inputs = tf.keras.layers.Input(shape=obs_space.shape)
    
    # Actor/Policy Network
    fc_1 = tf.keras.layers.Dense(
        256,
        name="fc_1",
        activation=tf.nn.tanh,
        kernel_initializer=normc_initializer(1.0),
    )(self.inputs)
    
    fc_2 = tf.keras.layers.Dense(
        256,
        name="fc_2",
        activation=tf.nn.tanh,
        kernel_initializer=normc_initializer(1.0),
    )(fc_1)
    
    fc_out = tf.keras.layers.Dense(
        num_outputs,
        name="fc_out",
        activation=tf.keras.activations.linear,
        kernel_initializer=normc_initializer(0.01),
    )(fc_2)
    
    # Critic/Value Network
    fc_value_1 = tf.keras.layers.Dense(
        256,
        name="fc_value_1",
        activation=tf.nn.tanh,
        kernel_initializer=normc_initializer(1.0),
    )(self.inputs)
    
    
    fc_value_2 = tf.keras.layers.Dense(
        256,
        name="fc_value_2",
        activation=tf.nn.tanh,
        kernel_initializer=normc_initializer(1.0),
    )(fc_value_1)
    
    value_out = tf.keras.layers.Dense(
        1,
        name="value_out",
        activation=tf.keras.activations.linear,
        kernel_initializer=normc_initializer(0.01),
    )(fc_value_2)
    
    self.base_model = tf.keras.Model(self.inputs, [fc_out, value_out])
    
    plot_model(self.base_model, 
               to_file='model_plot.png', 
               show_shapes=True, 
               show_layer_names=True,
               rankdir="TB",
               expand_nested=False,
               #show_layer_activations=True
               )

def forward(self, input_dict, state, seq_lens):
    model_out, self._value_out = self.base_model(input_dict["obs_flat"])
    return model_out, state

def value_function(self):
    return tf.reshape(self._value_out, [-1])

def metrics(self):
    return {"foo": tf.constant(42.0)}