How can i fix this Error?
ValueError: Shape must be rank 2 but is rank 3 for ‘{{node default_policy_wk2/categorical/Multinomial}} = Multinomial[T=DT_FLOAT, output_dtype=DT_INT64, seed=0, seed2=0](default_policy_wk2/truediv, default_policy_wk2/categorical/Multinomial/num_samples)’ with input shapes: [?,25,2], .
(RolloutWorker pid=810) 2022-08-07 15:34:13,735 ERROR worker.py:451 – Exception raised in creation task: The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=810, ip=172.24.188.14, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7fcff9d27850>)
(RolloutWorker pid=810) File “/home/sb/anaconda3/envs/trade/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 634, in init
(RolloutWorker pid=810) seed=seed,
…
(RolloutWorker pid=810) raise e.with_traceback(filtered_tb) from None
(RolloutWorker pid=810) File “/home/sb/anaconda3/envs/trade/lib/python3.7/site-packages/tensorflow/python/framework/ops.py”, line 1963, in _create_c_op
(RolloutWorker pid=810) raise ValueError(e.message)
(RolloutWorker pid=810) ValueError: Shape must be rank 2 but is rank 3 for ‘{{node default_policy_wk4/categorical/Multinomial}} = Multinomial[T=DT_FLOAT, output_dtype=DT_INT64, seed=0, seed2=0](default_policy_wk4/truediv, default_policy_wk4/categorical/Multinomial/num_samples)’ with input shapes: [?,25,2], .
class MyKerasModel(TFModelV2):
"""Custom model for policy gradient algorithms."""
def __init__(self, obs_space, action_space, num_outputs, model_config, name):
super(MyKerasModel, self).__init__(
obs_space, action_space, num_outputs, model_config, name
)
self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations")
layer_1 = tf.keras.layers.Dense(
128,
name="my_layer1",
activation=tf.nn.relu,
kernel_initializer=normc_initializer(1.0),
)(self.inputs)
layer_out = tf.keras.layers.Dense(
num_outputs,
name="my_out",
activation=None,
kernel_initializer=normc_initializer(0.01),
)(layer_1)
value_out = tf.keras.layers.Dense(
1,
name="value_out",
activation=None,
kernel_initializer=normc_initializer(0.01),
)(layer_1)
self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
print(self.base_model.summary())
def forward(self, input_dict, state, seq_lens):
model_out, self._value_out = self.base_model(input_dict["obs"])
return model_out, state
def value_function(self):
return tf.reshape(self._value_out, [-1])
def metrics(self):
return {"foo": tf.constant(42.0)}
from ray import tune
analysis = tune.run(
"PPO",
stop={
"episode_reward_mean": 100
},
config={
"env": "My_Env",
"env_config": {
"window_size": 25
},
"model":{
#"fcnet_hiddens": [256, 256],
"custom_model": MyKerasModel
},
"log_level": "DEBUG",
"framework": "tf2",
"ignore_worker_failures": True,
"num_workers": 7,
"num_gpus": 0,
"clip_rewards": True,
"lr": 8e-6,
"lr_schedule": [
[0, 1e-1],
[int(1e2), 1e-2],
[int(1e3), 1e-3],
[int(1e4), 1e-4],
[int(1e5), 1e-5],
[int(1e6), 1e-6],
[int(1e7), 1e-7]
],
"gamma": 0,
"observation_filter": "MeanStdFilter",
"lambda": 0.72,
"vf_loss_coeff": 0.5,
"entropy_coeff": 0.01,
"framework":"tf"
},
checkpoint_at_end=True,
fail_fast=True,
#verbose=1,
)