How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I’m trying to train a custom tensorflow model using PG which is initialized by a pretrained tf model. The model file looks as below:
`
“”“A custom model created for RLLib policy gradient implementation.”“”
import importlib
from pathlib import Path
import yaml
import os
import sys, gym
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.models.modelv2 import ModelV2
from keras.models import load_model
from keras.optimizers import Adam
from ray.rllib.utils.framework import try_import_tf
from azureml.core.model import Model
from azureml.core import Run
import numpy as np
tf1, tf, tfv = try_import_tf()
class RBCModel(TFModelV2):
“”“Using RBC Model implementation and freezing final layers.”“”
def __init__(
self,
observation_space: gym.spaces.Space,
action_space: gym.spaces.Space,
num_outputs: int,
model_config: dict,
name: str,
):
"""Init method for custom rllib model."""
super(RBCModel, self).__init__(
observation_space, action_space, num_outputs, model_config, name
)
# Download the model from registry
model_name = model_config["custom_model_config"]["model_name"]
model_version = model_config["custom_model_config"]["model_version"]
trainable_upto = model_config["custom_model_config"]["last_trainable_layer"]
trainable_upto_block = model_config["custom_model_config"]["last_trainable_block"]
model_dir = os.path.join(BASE_DIR, model_name)
if not os.path.exists(model_dir):
run = Run.get_context()
ws = run.experiment.workspace
rbc_model = Model(workspace=ws, name=model_name, version=model_version)
rbc_model.download(target_dir=os.getcwd())
self.base_model = load_model(model_dir, compile=False)
with Path(model_dir).joinpath("loss_function.yaml").open("r") as f:
loss_config = yaml.safe_load(f)
self.base_model.compile(
optimizer=Adam(learning_rate=float(0.0001)),
loss=getattr(
importlib.import_module(f".{loss_config['module_name']}", package=model_name),
loss_config["loss_function"],
)(**loss_config["loss_init_args"]),
metrics=getattr(
importlib.import_module(f".{loss_config['module_name']}", package=model_name),
loss_config["metric_function"],
)(**loss_config["metric_init_args"]),
)
self.base_model.summary()
@override(ModelV2)
def forward(self, input_dict: dict, state: T.Any, seq_lens: T.Any):
"""Perform one forward pass over the base model."""
dim = input_dict["obs"]["speed"].shape[0]
model_inputs = (
input_dict["obs"]["image"],
tf.reshape(input_dict["obs"]["speed"], (dim, 1)),
tf.reshape(input_dict["obs"]["angle_prev"], (dim, 1)),
tf.reshape(input_dict["obs"]["throttle_prev"], (dim, 1)),
tf.reshape(input_dict["obs"]["brake_prev"], (dim, 1)),
tf.reshape(input_dict["obs"]["target_speed"], (dim, 1)),
)
model_out = self.base_model(model_inputs)
print(f"Model output is {tf.print(model_out)}, shape is {model_out.shape}")
return model_out, []
@override(ModelV2)
def trainable_variables(
self, as_dict: bool = False
):
variable_list = self.base_model.trainable_variables
if not as_dict:
return variable_list
else:
var_dict = {}
for var in variable_list:
var_dict[var.name] = var
return var_dict
`
While the forward pass outputs the values correctly, I cannot see the same in the action recieved by the compute actions method.
Model Output:
Tensor Size: [1,2]
Datatype: Float32
Example: [[0.5, 0.5]]
Action Space: Box(np.array([-1.0, -1.0]), np.array([1.0, 1.0]), dtype=np.float32)
Computed Action:
Tensor Size: [1,2]
Datatype: Float32
I have also set explore=False to verify this.
For a model output [ [ 0.5, 0.6 ] ] I am expecting the action to look like [ 0.5, 0.6 ] but it always looks like [0.5, 0.5]. The first element of model output repeats while the second is lost.
Versions / Dependencies
protobuf==3.20.0
ray[rllib]==2.7.0
scp==0.14.5
tensorboard==2.9.0
fsspec==2023.6.0
tensorflow-gpu==2.9
azureml-sdk==1.52.0
psutil==5.9.0
matplotlib==3.7.2