RLLIB Custom_keras_model

Seyar_Barez · July 24, 2022, 7:44pm

Hi i run this custom_keras_model.py example code with
os: ubuntu 18.04
ray 1.13

here you rllib github code:
“”“Example of using a custom ModelV2 Keras-style model.”“”

import argparse
import os

import ray
from ray import tune
from ray.rllib.agents.dqn.distributional_q_tf_model import DistributionalQTFModel
from ray.rllib.models import ModelCatalog
from ray.rllib.models.tf.misc import normc_initializer
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.visionnet import VisionNetwork as MyVisionNetwork
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, LEARNER_STATS_KEY

tf1, tf, tfv = try_import_tf()

parser = argparse.ArgumentParser()
parser.add_argument(
“–run”, type=str, default=“DQN”, help=“The RLlib-registered algorithm to use.”
)
parser.add_argument(“–stop”, type=int, default=200)
parser.add_argument(“–use-vision-network”, action=“store_true”)
parser.add_argument(“–num-cpus”, type=int, default=0)

class MyKerasModel(TFModelV2):
“”“Custom model for policy gradient algorithms.”“”

def __init__(self, obs_space, action_space, num_outputs, model_config, name):
    super(MyKerasModel, self).__init__(
        obs_space, action_space, num_outputs, model_config, name
    )
    self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations")
    layer_1 = tf.keras.layers.Dense(
        128,
        name="my_layer1",
        activation=tf.nn.relu,
        kernel_initializer=normc_initializer(1.0),
    )(self.inputs)
    layer_out = tf.keras.layers.Dense(
        num_outputs,
        name="my_out",
        activation=None,
        kernel_initializer=normc_initializer(0.01),
    )(layer_1)
    value_out = tf.keras.layers.Dense(
        1,
        name="value_out",
        activation=None,
        kernel_initializer=normc_initializer(0.01),
    )(layer_1)
    self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])

def forward(self, input_dict, state, seq_lens):
    model_out, self._value_out = self.base_model(input_dict["obs"])
    return model_out, state

def value_function(self):
    return tf.reshape(self._value_out, [-1])

def metrics(self):
    return {"foo": tf.constant(42.0)}

class MyKerasQModel(DistributionalQTFModel):
“”“Custom model for DQN.”“”

def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
    super(MyKerasQModel, self).__init__(
        obs_space, action_space, num_outputs, model_config, name, **kw
    )

    # Define the core model layers which will be used by the other
    # output heads of DistributionalQModel
    self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations")
    layer_1 = tf.keras.layers.Dense(
        128,
        name="my_layer1",
        activation=tf.nn.relu,
        kernel_initializer=normc_initializer(1.0),
    )(self.inputs)
    layer_out = tf.keras.layers.Dense(
        num_outputs,
        name="my_out",
        activation=tf.nn.relu,
        kernel_initializer=normc_initializer(1.0),
    )(layer_1)
    self.base_model = tf.keras.Model(self.inputs, layer_out)

# Implement the core forward method.
def forward(self, input_dict, state, seq_lens):
    model_out = self.base_model(input_dict["obs"])
    return model_out, state

def metrics(self):
    return {"foo": tf.constant(42.0)}

if name == “main”:
args = parser.parse_args()
ray.init(num_cpus=args.num_cpus or None)
ModelCatalog.register_custom_model(
“keras_model”, MyVisionNetwork if args.use_vision_network else MyKerasModel
)
ModelCatalog.register_custom_model(
“keras_q_model”, MyVisionNetwork if args.use_vision_network else MyKerasQModel
)

# Tests https://github.com/ray-project/ray/issues/7293
def check_has_custom_metric(result):
    r = result["result"]["info"][LEARNER_INFO]
    if DEFAULT_POLICY_ID in r:
        r = r[DEFAULT_POLICY_ID].get(LEARNER_STATS_KEY, r[DEFAULT_POLICY_ID])
    assert r["model"]["foo"] == 42, result

if args.run == "DQN":
    extra_config = {"learning_starts": 0}
else:
    extra_config = {}

tune.run(
    args.run,
    stop={"episode_reward_mean": args.stop},
    config=dict(
        extra_config,
        **{
            "env": "BreakoutNoFrameskip-v4"
            if args.use_vision_network
            else "CartPole-v0",
            # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
            "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
            "callbacks": {
                "on_train_result": check_has_custom_metric,
            },
            "model": {
                "custom_model": "keras_q_model"
                if args.run == "DQN"
                else "keras_model"
            },
            "framework": "tf",
        }
    ),
)

I get this error:
(gym) sb@DES:~/Lab$ python3 custom_keras.py --run PPO --num-cpus 6
2022-07-24 21:38:26,178 INFO services.py:1476 – View the Ray dashboard at http://127.0.0.1:8265
(PPOTrainer pid=14368) 2022-07-24 21:38:33,018 INFO trainer.py:2333 – Your framework setting is ‘tf’, meaning you are using static-graph mode. Set framework=‘tf2’ to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
(PPOTrainer pid=14368) 2022-07-24 21:38:33,018 INFO ppo.py:415 – In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn’t work for you.
(PPOTrainer pid=14368) 2022-07-24 21:38:33,019 WARNING deprecation.py:47 – DeprecationWarning: callbacks dict interface has been deprecated. Use a class extending rllib.agents.callbacks.DefaultCallbacks instead. This will raise an error in the future!
(PPOTrainer pid=14368) 2022-07-24 21:38:33,019 INFO trainer.py:906 – Current log_level is WARN. For more information, set ‘log_level’: ‘INFO’ / ‘DEBUG’ or use the -v and -vv flags.
(RolloutWorker pid=14440) 2022-07-24 21:38:37,191 WARNING deprecation.py:47 – DeprecationWarning: callbacks dict interface has been deprecated. Use a class extending rllib.agents.callbacks.DefaultCallbacks instead. This will raise an error in the future!
(PPOTrainer pid=14368) 2022-07-24 21:38:37,257 ERROR worker.py:451 – Exception raised in creation task: The actor died because of an error raised in its creation task, ray::PPOTrainer.init() (pid=14368, ip=172.27.130.120, repr=PPOTrainer)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/agents/trainer.py”, line 1074, in _init
(PPOTrainer pid=14368) raise NotImplementedError
(PPOTrainer pid=14368) NotImplementedError
(PPOTrainer pid=14368)
(PPOTrainer pid=14368) During handling of the above exception, another exception occurred:
(PPOTrainer pid=14368)
(PPOTrainer pid=14368) ray::PPOTrainer.init() (pid=14368, ip=172.27.130.120, repr=PPOTrainer)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/agents/trainer.py”, line 871, in init
(PPOTrainer pid=14368) config, logger_creator, remote_checkpoint_dir, sync_function_tpl
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/tune/trainable.py”, line 156, in init
(PPOTrainer pid=14368) self.setup(copy.deepcopy(self.config))
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/agents/trainer.py”, line 957, in setup
(PPOTrainer pid=14368) logdir=self.logdir,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/worker_set.py”, line 144, in init
(PPOTrainer pid=14368) lambda p, pid: (pid, p.observation_space, p.action_space)
(PPOTrainer pid=14368) ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=14440, ip=172.27.130.120, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f64865492e8>)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 634, in init
(PPOTrainer pid=14368) seed=seed,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 1789, in _build_policy_map
(PPOTrainer pid=14368) name, orig_cls, obs_space, act_space, conf, merged_conf
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/policy_map.py”, line 141, in create_policy
(PPOTrainer pid=14368) observation_space, action_space, merged_config
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/tf_policy_template.py”, line 270, in init
(PPOTrainer pid=14368) get_batch_divisibility_req=get_batch_divisibility_req,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/dynamic_tf_policy.py”, line 211, in init
(PPOTrainer pid=14368) framework=“tf”,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 541, in get_model_v2
(PPOTrainer pid=14368) **customized_model_kwargs,
(PPOTrainer pid=14368) File “custom_keras.py”, line 41, in init
(PPOTrainer pid=14368) )(self.inputs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 824, in call
(PPOTrainer pid=14368) self._maybe_build(inputs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 2146, in _maybe_build
(PPOTrainer pid=14368) self.build(input_shapes)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/core.py”, line 1021, in build
(PPOTrainer pid=14368) trainable=True)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 529, in add_weight
(PPOTrainer pid=14368) aggregation=aggregation)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py”, line 712, in _add_variable_with_custom_getter
(PPOTrainer pid=14368) **kwargs_for_getter)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py”, line 139, in make_variable
(PPOTrainer pid=14368) shape=variable_shape if variable_shape else None)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 258, in call
(PPOTrainer pid=14368) return cls._variable_v1_call(*args, **kwargs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 219, in _variable_v1_call
(PPOTrainer pid=14368) shape=shape)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 65, in getter
(PPOTrainer pid=14368) return captured_getter(captured_previous, **kwargs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 518, in track_var_creation
(PPOTrainer pid=14368) created.add(v.ref())
(PPOTrainer pid=14368) AttributeError: ‘ResourceVariable’ object has no attribute ‘ref’
(RolloutWorker pid=14440) WARNING:tensorflow:From /home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.init (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
(RolloutWorker pid=14440) Instructions for updating:
(RolloutWorker pid=14440) If using Keras pass *_constraint arguments to layers.
(RolloutWorker pid=14440) 2022-07-24 21:38:37,243 ERROR worker.py:451 – Exception raised in creation task: The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=14440, ip=172.27.130.120, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f64865492e8>)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 634, in init
(RolloutWorker pid=14440) seed=seed,
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 1789, in _build_policy_map
(RolloutWorker pid=14440) name, orig_cls, obs_space, act_space, conf, merged_conf
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/policy_map.py”, line 141, in create_policy
(RolloutWorker pid=14440) observation_space, action_space, merged_config
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/tf_policy_template.py”, line 270, in init
(RolloutWorker pid=14440) get_batch_divisibility_req=get_batch_divisibility_req,
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/dynamic_tf_policy.py”, line 211, in init
(RolloutWorker pid=14440) framework=“tf”,
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 541, in get_model_v2
(RolloutWorker pid=14440) **customized_model_kwargs,
(RolloutWorker pid=14440) File “custom_keras.py”, line 41, in init
(RolloutWorker pid=14440) )(self.inputs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 824, in call
(RolloutWorker pid=14440) self._maybe_build(inputs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 2146, in _maybe_build
(RolloutWorker pid=14440) self.build(input_shapes)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/core.py”, line 1021, in build
(RolloutWorker pid=14440) trainable=True)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 529, in add_weight
(RolloutWorker pid=14440) aggregation=aggregation)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py”, line 712, in _add_variable_with_custom_getter
(RolloutWorker pid=14440) **kwargs_for_getter)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py”, line 139, in make_variable
(RolloutWorker pid=14440) shape=variable_shape if variable_shape else None)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 258, in call
(RolloutWorker pid=14440) return cls._variable_v1_call(*args, **kwargs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 219, in _variable_v1_call
(RolloutWorker pid=14440) shape=shape)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 65, in getter
(RolloutWorker pid=14440) return captured_getter(captured_previous, **kwargs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 518, in track_var_creation
(RolloutWorker pid=14440) created.add(v.ref())
(RolloutWorker pid=14440) AttributeError: ‘ResourceVariable’ object has no attribute ‘ref’
(RolloutWorker pid=14441) 2022-07-24 21:38:37,237 WARNING deprecation.py:47 – DeprecationWarning: callbacks dict interface has been deprecated. Use a class extending rllib.agents.callbacks.DefaultCallbacks instead. This will raise an error in the future!
(RolloutWorker pid=14441) WARNING:tensorflow:From /home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.init (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
(RolloutWorker pid=14441) Instructions for updating:
(RolloutWorker pid=14441) If using Keras pass *_constraint arguments to layers.
(RolloutWorker pid=14441) 2022-07-24 21:38:37,301 ERROR worker.py:451 – Exception raised in creation task: The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=14441, ip=172.27.130.120, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f2bf82ce2e8>)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 634, in init
(RolloutWorker pid=14441) seed=seed,
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 1789, in _build_policy_map
(RolloutWorker pid=14441) name, orig_cls, obs_space, act_space, conf, merged_conf
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/policy_map.py”, line 141, in create_policy
(RolloutWorker pid=14441) observation_space, action_space, merged_config
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/tf_policy_template.py”, line 270, in init
(RolloutWorker pid=14441) get_batch_divisibility_req=get_batch_divisibility_req,
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/dynamic_tf_policy.py”, line 211, in init
(RolloutWorker pid=14441) framework=“tf”,
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 541, in get_model_v2
(RolloutWorker pid=14441) **customized_model_kwargs,
(RolloutWorker pid=14441) File “custom_keras.py”, line 41, in init
(RolloutWorker pid=14441) )(self.inputs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 824, in call
(RolloutWorker pid=14441) self._maybe_build(inputs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 2146, in _maybe_build
(RolloutWorker pid=14441) self.build(input_shapes)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/core.py”, line 1021, in build
(RolloutWorker pid=14441) trainable=True)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 529, in add_weight
(RolloutWorker pid=14441) aggregation=aggregation)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py”, line 712, in _add_variable_with_custom_getter
(RolloutWorker pid=14441) **kwargs_for_getter)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py”, line 139, in make_variable
(RolloutWorker pid=14441) shape=variable_shape if variable_shape else None)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 258, in call
(RolloutWorker pid=14441) return cls._variable_v1_call(*args, **kwargs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 219, in _variable_v1_call
(RolloutWorker pid=14441) shape=shape)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 65, in getter
(RolloutWorker pid=14441) return captured_getter(captured_previous, **kwargs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 518, in track_var_creation
(RolloutWorker pid=14441) created.add(v.ref())
(RolloutWorker pid=14441) AttributeError: ‘ResourceVariable’ object has no attribute ‘ref’

Seyar_Barez · July 24, 2022, 9:06pm

I updated Tensorflow from 1.15 to tf2 now i don’t have this problem!

Topic		Replies	Views
Customized model training bug RLlib	1	260	January 8, 2024
RLLib Computing random actions that don't match model output RLlib	0	180	November 15, 2023
How to give inputs to a model and get output of the model? RLlib	3	621	May 8, 2021
Cannot understand how to create custom model for DQN RLlib	2	1492	April 29, 2022
[RLlib] PPO custom model only get flattened observations RLlib	5	1579	May 5, 2021

RLLIB Custom_keras_model

Related topics