Hi i run this custom_keras_model.py example code with
os: ubuntu 18.04
ray 1.13
here you rllib github code:
“”“Example of using a custom ModelV2 Keras-style model.”“”
import argparse
import os
import ray
from ray import tune
from ray.rllib.agents.dqn.distributional_q_tf_model import DistributionalQTFModel
from ray.rllib.models import ModelCatalog
from ray.rllib.models.tf.misc import normc_initializer
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.visionnet import VisionNetwork as MyVisionNetwork
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, LEARNER_STATS_KEY
tf1, tf, tfv = try_import_tf()
parser = argparse.ArgumentParser()
parser.add_argument(
“–run”, type=str, default=“DQN”, help=“The RLlib-registered algorithm to use.”
)
parser.add_argument(“–stop”, type=int, default=200)
parser.add_argument(“–use-vision-network”, action=“store_true”)
parser.add_argument(“–num-cpus”, type=int, default=0)
class MyKerasModel(TFModelV2):
“”“Custom model for policy gradient algorithms.”“”
def __init__(self, obs_space, action_space, num_outputs, model_config, name):
super(MyKerasModel, self).__init__(
obs_space, action_space, num_outputs, model_config, name
)
self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations")
layer_1 = tf.keras.layers.Dense(
128,
name="my_layer1",
activation=tf.nn.relu,
kernel_initializer=normc_initializer(1.0),
)(self.inputs)
layer_out = tf.keras.layers.Dense(
num_outputs,
name="my_out",
activation=None,
kernel_initializer=normc_initializer(0.01),
)(layer_1)
value_out = tf.keras.layers.Dense(
1,
name="value_out",
activation=None,
kernel_initializer=normc_initializer(0.01),
)(layer_1)
self.base_model = tf.keras.Model(self.inputs, [layer_out, value_out])
def forward(self, input_dict, state, seq_lens):
model_out, self._value_out = self.base_model(input_dict["obs"])
return model_out, state
def value_function(self):
return tf.reshape(self._value_out, [-1])
def metrics(self):
return {"foo": tf.constant(42.0)}
class MyKerasQModel(DistributionalQTFModel):
“”“Custom model for DQN.”“”
def __init__(self, obs_space, action_space, num_outputs, model_config, name, **kw):
super(MyKerasQModel, self).__init__(
obs_space, action_space, num_outputs, model_config, name, **kw
)
# Define the core model layers which will be used by the other
# output heads of DistributionalQModel
self.inputs = tf.keras.layers.Input(shape=obs_space.shape, name="observations")
layer_1 = tf.keras.layers.Dense(
128,
name="my_layer1",
activation=tf.nn.relu,
kernel_initializer=normc_initializer(1.0),
)(self.inputs)
layer_out = tf.keras.layers.Dense(
num_outputs,
name="my_out",
activation=tf.nn.relu,
kernel_initializer=normc_initializer(1.0),
)(layer_1)
self.base_model = tf.keras.Model(self.inputs, layer_out)
# Implement the core forward method.
def forward(self, input_dict, state, seq_lens):
model_out = self.base_model(input_dict["obs"])
return model_out, state
def metrics(self):
return {"foo": tf.constant(42.0)}
if name == “main”:
args = parser.parse_args()
ray.init(num_cpus=args.num_cpus or None)
ModelCatalog.register_custom_model(
“keras_model”, MyVisionNetwork if args.use_vision_network else MyKerasModel
)
ModelCatalog.register_custom_model(
“keras_q_model”, MyVisionNetwork if args.use_vision_network else MyKerasQModel
)
# Tests https://github.com/ray-project/ray/issues/7293
def check_has_custom_metric(result):
r = result["result"]["info"][LEARNER_INFO]
if DEFAULT_POLICY_ID in r:
r = r[DEFAULT_POLICY_ID].get(LEARNER_STATS_KEY, r[DEFAULT_POLICY_ID])
assert r["model"]["foo"] == 42, result
if args.run == "DQN":
extra_config = {"learning_starts": 0}
else:
extra_config = {}
tune.run(
args.run,
stop={"episode_reward_mean": args.stop},
config=dict(
extra_config,
**{
"env": "BreakoutNoFrameskip-v4"
if args.use_vision_network
else "CartPole-v0",
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")),
"callbacks": {
"on_train_result": check_has_custom_metric,
},
"model": {
"custom_model": "keras_q_model"
if args.run == "DQN"
else "keras_model"
},
"framework": "tf",
}
),
)
I get this error:
(gym) sb@DES:~/Lab$ python3 custom_keras.py --run PPO --num-cpus 6
2022-07-24 21:38:26,178 INFO services.py:1476 – View the Ray dashboard at http://127.0.0.1:8265
(PPOTrainer pid=14368) 2022-07-24 21:38:33,018 INFO trainer.py:2333 – Your framework setting is ‘tf’, meaning you are using static-graph mode. Set framework=‘tf2’ to enable eager execution with tf2.x. You may also then want to set eager_tracing=True in order to reach similar execution speed as with static-graph mode.
(PPOTrainer pid=14368) 2022-07-24 21:38:33,018 INFO ppo.py:415 – In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn’t work for you.
(PPOTrainer pid=14368) 2022-07-24 21:38:33,019 WARNING deprecation.py:47 – DeprecationWarning: callbacks dict interface
has been deprecated. Use a class extending rllib.agents.callbacks.DefaultCallbacks
instead. This will raise an error in the future!
(PPOTrainer pid=14368) 2022-07-24 21:38:33,019 INFO trainer.py:906 – Current log_level is WARN. For more information, set ‘log_level’: ‘INFO’ / ‘DEBUG’ or use the -v and -vv flags.
(RolloutWorker pid=14440) 2022-07-24 21:38:37,191 WARNING deprecation.py:47 – DeprecationWarning: callbacks dict interface
has been deprecated. Use a class extending rllib.agents.callbacks.DefaultCallbacks
instead. This will raise an error in the future!
(PPOTrainer pid=14368) 2022-07-24 21:38:37,257 ERROR worker.py:451 – Exception raised in creation task: The actor died because of an error raised in its creation task, ray::PPOTrainer.init() (pid=14368, ip=172.27.130.120, repr=PPOTrainer)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/agents/trainer.py”, line 1074, in _init
(PPOTrainer pid=14368) raise NotImplementedError
(PPOTrainer pid=14368) NotImplementedError
(PPOTrainer pid=14368)
(PPOTrainer pid=14368) During handling of the above exception, another exception occurred:
(PPOTrainer pid=14368)
(PPOTrainer pid=14368) ray::PPOTrainer.init() (pid=14368, ip=172.27.130.120, repr=PPOTrainer)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/agents/trainer.py”, line 871, in init
(PPOTrainer pid=14368) config, logger_creator, remote_checkpoint_dir, sync_function_tpl
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/tune/trainable.py”, line 156, in init
(PPOTrainer pid=14368) self.setup(copy.deepcopy(self.config))
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/agents/trainer.py”, line 957, in setup
(PPOTrainer pid=14368) logdir=self.logdir,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/worker_set.py”, line 144, in init
(PPOTrainer pid=14368) lambda p, pid: (pid, p.observation_space, p.action_space)
(PPOTrainer pid=14368) ray.exceptions.RayActorError: The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=14440, ip=172.27.130.120, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f64865492e8>)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 634, in init
(PPOTrainer pid=14368) seed=seed,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 1789, in _build_policy_map
(PPOTrainer pid=14368) name, orig_cls, obs_space, act_space, conf, merged_conf
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/policy_map.py”, line 141, in create_policy
(PPOTrainer pid=14368) observation_space, action_space, merged_config
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/tf_policy_template.py”, line 270, in init
(PPOTrainer pid=14368) get_batch_divisibility_req=get_batch_divisibility_req,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/dynamic_tf_policy.py”, line 211, in init
(PPOTrainer pid=14368) framework=“tf”,
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 541, in get_model_v2
(PPOTrainer pid=14368) **customized_model_kwargs,
(PPOTrainer pid=14368) File “custom_keras.py”, line 41, in init
(PPOTrainer pid=14368) )(self.inputs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 824, in call
(PPOTrainer pid=14368) self._maybe_build(inputs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 2146, in _maybe_build
(PPOTrainer pid=14368) self.build(input_shapes)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/core.py”, line 1021, in build
(PPOTrainer pid=14368) trainable=True)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 529, in add_weight
(PPOTrainer pid=14368) aggregation=aggregation)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py”, line 712, in _add_variable_with_custom_getter
(PPOTrainer pid=14368) **kwargs_for_getter)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py”, line 139, in make_variable
(PPOTrainer pid=14368) shape=variable_shape if variable_shape else None)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 258, in call
(PPOTrainer pid=14368) return cls._variable_v1_call(*args, **kwargs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 219, in _variable_v1_call
(PPOTrainer pid=14368) shape=shape)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 65, in getter
(PPOTrainer pid=14368) return captured_getter(captured_previous, **kwargs)
(PPOTrainer pid=14368) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 518, in track_var_creation
(PPOTrainer pid=14368) created.add(v.ref())
(PPOTrainer pid=14368) AttributeError: ‘ResourceVariable’ object has no attribute ‘ref’
(RolloutWorker pid=14440) WARNING:tensorflow:From /home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.init (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
(RolloutWorker pid=14440) Instructions for updating:
(RolloutWorker pid=14440) If using Keras pass *_constraint arguments to layers.
(RolloutWorker pid=14440) 2022-07-24 21:38:37,243 ERROR worker.py:451 – Exception raised in creation task: The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=14440, ip=172.27.130.120, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f64865492e8>)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 634, in init
(RolloutWorker pid=14440) seed=seed,
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 1789, in _build_policy_map
(RolloutWorker pid=14440) name, orig_cls, obs_space, act_space, conf, merged_conf
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/policy_map.py”, line 141, in create_policy
(RolloutWorker pid=14440) observation_space, action_space, merged_config
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/tf_policy_template.py”, line 270, in init
(RolloutWorker pid=14440) get_batch_divisibility_req=get_batch_divisibility_req,
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/dynamic_tf_policy.py”, line 211, in init
(RolloutWorker pid=14440) framework=“tf”,
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 541, in get_model_v2
(RolloutWorker pid=14440) **customized_model_kwargs,
(RolloutWorker pid=14440) File “custom_keras.py”, line 41, in init
(RolloutWorker pid=14440) )(self.inputs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 824, in call
(RolloutWorker pid=14440) self._maybe_build(inputs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 2146, in _maybe_build
(RolloutWorker pid=14440) self.build(input_shapes)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/core.py”, line 1021, in build
(RolloutWorker pid=14440) trainable=True)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 529, in add_weight
(RolloutWorker pid=14440) aggregation=aggregation)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py”, line 712, in _add_variable_with_custom_getter
(RolloutWorker pid=14440) **kwargs_for_getter)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py”, line 139, in make_variable
(RolloutWorker pid=14440) shape=variable_shape if variable_shape else None)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 258, in call
(RolloutWorker pid=14440) return cls._variable_v1_call(*args, **kwargs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 219, in _variable_v1_call
(RolloutWorker pid=14440) shape=shape)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 65, in getter
(RolloutWorker pid=14440) return captured_getter(captured_previous, **kwargs)
(RolloutWorker pid=14440) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 518, in track_var_creation
(RolloutWorker pid=14440) created.add(v.ref())
(RolloutWorker pid=14440) AttributeError: ‘ResourceVariable’ object has no attribute ‘ref’
(RolloutWorker pid=14441) 2022-07-24 21:38:37,237 WARNING deprecation.py:47 – DeprecationWarning: callbacks dict interface
has been deprecated. Use a class extending rllib.agents.callbacks.DefaultCallbacks
instead. This will raise an error in the future!
(RolloutWorker pid=14441) WARNING:tensorflow:From /home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.init (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
(RolloutWorker pid=14441) Instructions for updating:
(RolloutWorker pid=14441) If using Keras pass *_constraint arguments to layers.
(RolloutWorker pid=14441) 2022-07-24 21:38:37,301 ERROR worker.py:451 – Exception raised in creation task: The actor died because of an error raised in its creation task, ray::RolloutWorker.init() (pid=14441, ip=172.27.130.120, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f2bf82ce2e8>)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 634, in init
(RolloutWorker pid=14441) seed=seed,
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/evaluation/rollout_worker.py”, line 1789, in _build_policy_map
(RolloutWorker pid=14441) name, orig_cls, obs_space, act_space, conf, merged_conf
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/policy_map.py”, line 141, in create_policy
(RolloutWorker pid=14441) observation_space, action_space, merged_config
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/tf_policy_template.py”, line 270, in init
(RolloutWorker pid=14441) get_batch_divisibility_req=get_batch_divisibility_req,
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/policy/dynamic_tf_policy.py”, line 211, in init
(RolloutWorker pid=14441) framework=“tf”,
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 541, in get_model_v2
(RolloutWorker pid=14441) **customized_model_kwargs,
(RolloutWorker pid=14441) File “custom_keras.py”, line 41, in init
(RolloutWorker pid=14441) )(self.inputs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 824, in call
(RolloutWorker pid=14441) self._maybe_build(inputs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 2146, in _maybe_build
(RolloutWorker pid=14441) self.build(input_shapes)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/layers/core.py”, line 1021, in build
(RolloutWorker pid=14441) trainable=True)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py”, line 529, in add_weight
(RolloutWorker pid=14441) aggregation=aggregation)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/training/tracking/base.py”, line 712, in _add_variable_with_custom_getter
(RolloutWorker pid=14441) **kwargs_for_getter)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer_utils.py”, line 139, in make_variable
(RolloutWorker pid=14441) shape=variable_shape if variable_shape else None)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 258, in call
(RolloutWorker pid=14441) return cls._variable_v1_call(*args, **kwargs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 219, in _variable_v1_call
(RolloutWorker pid=14441) shape=shape)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/tensorflow_core/python/ops/variables.py”, line 65, in getter
(RolloutWorker pid=14441) return captured_getter(captured_previous, **kwargs)
(RolloutWorker pid=14441) File “/home/sb/anaconda3/envs/gym/lib/python3.6/site-packages/ray/rllib/models/catalog.py”, line 518, in track_var_creation
(RolloutWorker pid=14441) created.add(v.ref())
(RolloutWorker pid=14441) AttributeError: ‘ResourceVariable’ object has no attribute ‘ref’