I want to create a custom model for my ppo agent, and it seemed like it should be easy enough. But I have had some problems which I don’t manage to debug myself so I tried to create a mwe in hope that someone here had a clue.
The initial problem seems to be that my environment has Tuple state spaces, and I read that __call__
for the TFModelV2
will flatten the state and make it available as input_dict["obs_flat"]
, while the original observation should be in input_dict["obs"]
. This seems to not be true, and I seems to get the same flattened state in both. I don’t understand why and have tracked the flattened observation to come from the function _get_input_dict_and_dummy_batch
in dynamic_tf_policy.py
.
I then tried to run it with just some Dense network assuming the flattened observation, but then I get other errors from keras that I’m not really understanding, the keras model runs find by itself so I assume it is still some error with the interaction with Ray that is the problem.
import numpy as np
import tensorflow as tf
import gym
import ray
import ray.tune as tune
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.utils.annotations import override
from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.agents.ppo.ppo_tf_policy import PPOTFPolicy
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.models.modelv2 import restore_original_dimensions
from ray.rllib.models import ModelCatalog
class TestEnv(gym.Env):
def __init__(self, config):
self.n_items = config["n_items"]
self.action_space = gym.spaces.Tuple((gym.spaces.Discrete(self.n_items), gym.spaces.Box(-1, 1, shape=(1,))))
self.observation_space = gym.spaces.Tuple((
gym.spaces.Box(-100, 100, shape=(self.n_items,)),
gym.spaces.Box(-100, 100, shape=(self.n_items,)),
gym.spaces.Box(-100, 100, shape=(2,)),
))
def reset(self):
self.items_feat1 = np.zeros(self.n_items)
self.items_feat2 = np.zeros(self.n_items)
return (self.items_feat1, self.items_feat2, np.array([np.sum(self.items_feat1), np.sum(self.items_feat2)]))
def step(self, action):
i, a = action
self.items_feat2[i] += a
self.items_feat1 += self.items_feat2
state = (self.items_feat1, self.items_feat2, np.array([np.sum(self.items_feat1), np.sum(self.items_feat2)]))
reward = np.sum(np.abs(1 - self.items_feat1))
return state, reward, False, {}
class MyConvNetwork(TFModelV2):
def __init__(self, obs_space, action_space, num_outputs, model_config, name, n_items):
self.original_space = obs_space.original_space if \
hasattr(obs_space, "original_space") else obs_space
assert isinstance(self.original_space, (gym.spaces.Tuple)), \
"`obs_space.original_space` must be Tuple!"
super().__init__(self.original_space, action_space, num_outputs,
model_config, name)
## Test 1: Assuming input is tuples
inputs_conv1 = tf.keras.layers.Input(shape=(n_items,))
inputs_conv2 = tf.keras.layers.Input(shape=(n_items,))
inputs_rest = tf.keras.layers.Input(shape=(2,))
inputs = [inputs_conv1, inputs_conv2, inputs_rest]
inputs_concat = tf.keras.layers.Concatenate()(inputs)
conv1 = tf.keras.layers.Reshape((n_items, 1))(inputs_conv1)
conv2 = tf.keras.layers.Reshape((n_items, 1))(inputs_conv2)
convall = tf.keras.layers.Concatenate()([conv1, conv2])
prob_conv = tf.keras.layers.Conv1D(1, 1)(convall)
prob_reshaped = tf.keras.layers.Reshape((-1,))(prob_conv)
dense = tf.keras.layers.Dense(256, activation='relu')(inputs_concat)
dense_out = tf.keras.layers.Dense(2)(dense)
action_out = tf.keras.layers.Concatenate()([prob_reshaped, dense_out])
dense = tf.keras.layers.Dense(256, activation='relu')(inputs_concat)
value_out = tf.keras.layers.Dense(1)(dense)
## Test 2: Assuming input is flattened, and making net simpler
# inputs = tf.keras.layers.Input(shape=(2 * n_items + 2,))
# catprob = tf.keras.layers.Dense(n_items)(inputs)
# dense_out = tf.keras.layers.Dense(2)(inputs) # 2 outputs for Gaussian
# action_out = tf.keras.layers.Concatenate()([catprob, dense_out])
# dense = tf.keras.layers.Dense(256, activation='relu')(inputs)
# value_out = tf.keras.layers.Dense(1)(dense)
## This is the same for both
self.base_model = tf.keras.Model(inputs=inputs, outputs=[action_out, value_out])
self.base_model.summary()
self.register_variables(self.base_model.variables)
@override(ModelV2)
def forward(self, input_dict, state, seq_lens):
# obs = restore_original_dimensions(input_dict["obs"], self.obs_space)
obs = input_dict["obs"]
logit_tuple, values = self.base_model(obs)
self._value_out = tf.reshape(values, [-1])
return logit_tuple, state
@override(ModelV2)
def value_function(self):
return self._value_out
ModelCatalog.register_custom_model("my_cnn_model", MyConvNetwork)
if __name__ == "__main__":
ray.init(address="auto")
trainer = PPOTrainer(
env=TestEnv,
config={
"env_config": {
"n_items": 10
},
"model": {
"custom_model": "my_cnn_model",
"custom_model_config": {
"n_items": 10
}
}
})
for i in range(1000):
result = trainer.train()
Error from Test 1:
2021-04-28 16:02:27.686484: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-04-28 16:02:29,703 INFO worker.py:656 -- Connecting to existing Ray cluster at address: 10.10.124.35:6379
2021-04-28 16:02:29,769 INFO trainer.py:616 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
Traceback (most recent call last):
File "mwe.py", line 101, in <module>
trainer = PPOTrainer(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer_template.py", line 106, in __init__
Trainer.__init__(self, config, env, logger_creator)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 465, in __init__
super().__init__(config, logger_creator)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/tune/trainable.py", line 96, in __init__
self.setup(copy.deepcopy(self.config))
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 629, in setup
self._init(self.config, self.env_creator)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer_template.py", line 133, in _init
self.workers = self._make_workers(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 700, in _make_workers
return WorkerSet(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 79, in __init__
remote_spaces = ray.get(self.remote_workers(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/worker.py", line 1379, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(ValueError): ray::RolloutWorker.foreach_policy() (pid=33668, ip=10.10.124.35)
File "python/ray/_raylet.pyx", line 422, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 456, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 459, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 463, in ray._raylet.execute_task
File "python/ray/_raylet.pyx", line 415, in ray._raylet.execute_task.function_executor
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 460, in __init__
self._build_policy_map(policy_dict, policy_config)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1077, in _build_policy_map
policy_map[name] = cls(obs_space, act_space, merged_conf)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/policy/tf_policy_template.py", line 217, in __init__
DynamicTFPolicy.__init__(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/policy/dynamic_tf_policy.py", line 282, in __init__
dist_inputs, self._state_out = self.model(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/models/modelv2.py", line 209, in __call__
res = self.forward(restored, state or [], seq_lens)
File "mwe.py", line 89, in forward
logit_tuple, values = self.base_model(obs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 760, in __call__
input_spec.assert_input_compatibility(self.input_spec, inputs,
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py", line 204, in assert_input_compatibility
raise ValueError('Layer ' + layer_name + ' expects ' +
ValueError: Layer model expects 3 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'default_policy/Placeholder:0' shape=(?, 22) dtype=float32>]
Error from Test 2:
2021-04-28 15:49:19.241016: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-04-28 15:49:21,260 INFO worker.py:656 -- Connecting to existing Ray cluster at address: 10.10.124.35:6379
2021-04-28 15:49:21,322 INFO trainer.py:616 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
2021-04-28 15:49:24.836316: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-04-28 15:49:24.837247: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2021-04-28 15:49:24.843727: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties:
pciBusID: 0000:82:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2021-04-28 15:49:24.843790: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-04-28 15:49:24.846549: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2021-04-28 15:49:24.846595: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2021-04-28 15:49:24.847591: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10
2021-04-28 15:49:24.847924: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10
2021-04-28 15:49:24.850834: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10
2021-04-28 15:49:24.851528: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11
2021-04-28 15:49:24.851704: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8
2021-04-28 15:49:24.852989: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2021-04-28 15:49:24.853550: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-04-28 15:49:24.855360: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2021-04-28 15:49:24.856966: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties:
pciBusID: 0000:82:00.0 name: GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2021-04-28 15:49:24.857017: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-04-28 15:49:24.857077: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.11
2021-04-28 15:49:24.857129: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.11
2021-04-28 15:49:24.857177: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10
2021-04-28 15:49:24.857225: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcurand.so.10
2021-04-28 15:49:24.857273: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusolver.so.10
2021-04-28 15:49:24.857321: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcusparse.so.11
2021-04-28 15:49:24.857369: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.8
2021-04-28 15:49:24.860264: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2021-04-28 15:49:24.860345: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
2021-04-28 15:49:25.508508: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix:
2021-04-28 15:49:25.508561: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267] 0
2021-04-28 15:49:25.508585: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0: N
2021-04-28 15:49:25.510938: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 9575 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:82:00.0, compute capability: 6.1)
Traceback (most recent call last):
File "mwe.py", line 101, in <module>
trainer = PPOTrainer(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer_template.py", line 106, in __init__
Trainer.__init__(self, config, env, logger_creator)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 465, in __init__
super().__init__(config, logger_creator)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/tune/trainable.py", line 96, in __init__
self.setup(copy.deepcopy(self.config))
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 629, in setup
self._init(self.config, self.env_creator)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer_template.py", line 133, in _init
self.workers = self._make_workers(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/agents/trainer.py", line 700, in _make_workers
return WorkerSet(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 87, in __init__
self._local_worker = self._make_worker(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/evaluation/worker_set.py", line 315, in _make_worker
worker = cls(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 462, in __init__
self.policy_map, self.preprocessors = self._build_policy_map(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/evaluation/rollout_worker.py", line 1077, in _build_policy_map
policy_map[name] = cls(obs_space, act_space, merged_conf)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/policy/eager_tf_policy.py", line 251, in __init__
self.model = ModelCatalog.get_model_v2(
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/models/catalog.py", line 347, in get_model_v2
raise e
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/models/catalog.py", line 332, in get_model_v2
instance = model_cls(obs_space, action_space,
File "mwe.py", line 73, in __init__
catprob = tf.keras.layers.Dense(n_items)(inputs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 951, in __call__
return self._functional_construction_call(inputs, args, kwargs,
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1090, in _functional_construction_call
outputs = self._keras_tensor_symbolic_call(
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 822, in _keras_tensor_symbolic_call
return self._infer_output_signature(inputs, args, kwargs, input_masks)
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 862, in _infer_output_signature
self._maybe_build(inputs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 2710, in _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/layers/core.py", line 1185, in build
self.kernel = self.add_weight(
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py", line 623, in add_weight
variable = self._add_variable_with_custom_getter(
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/training/tracking/base.py", line 805, in _add_variable_with_custom_getter
new_variable = getter(
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 130, in make_variable
return tf_variables.VariableV1(
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 260, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 206, in _variable_v1_call
return previous_getter(
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 67, in getter
return captured_getter(captured_previous, **kwargs)
File "/home/ubuntu/.local/lib/python3.8/site-packages/ray/rllib/models/catalog.py", line 324, in track_var_creation
created.add(v)
File "/home/ubuntu/.local/lib/python3.8/site-packages/tensorflow/python/ops/variables.py", line 1081, in __hash__
raise TypeError("Variable is unhashable. "
TypeError: Variable is unhashable. Instead, use tensor.ref() as the key.