Hi, I have been running the dreamerv3 with different environments but I got similar errors where all of them includes:
cuDNN launch failure : input shape ([1,1,512,1])
[[{{node mlp/layer_normalization_8/FusedBatchNormV3}}]] [Op:__inference_forward_train_9051]
Call arguments received by layer ‘dreamer_model’ (type DreamerModel):
• inputs=None
• observations=tf.Tensor(shape=(1, 16, 64, 64, 3), dtype=float32)
• actions=tf.Tensor(shape=(1, 16, 6), dtype=float32)
• is_first=tf.Tensor(shape=(1, 16), dtype=bool)
• start_is_terminated_BxT=tf.Tensor(shape=(16,), dtype=bool)
which is not informative to me at all. Any ideas on this?
My code:
from ray import air, tune
from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config
from ray.tune.logger import pretty_print
from ray.tune.registry import register_env
Number of GPUs to run on.
num_gpus = 4
config = (
DreamerV3Config()
.environment(env=‘DMC/walker/walk’, env_config={“from_pixels”: True})
.resources(
num_learner_workers=4,
num_gpus_per_learner_worker=1 if num_gpus else 0,
num_cpus_for_local_worker=1,
)
.rollouts(num_envs_per_worker=1 * (num_gpus or 1), remote_worker_envs=True)
.reporting(
metrics_num_episodes_for_smoothing=(num_gpus or 1),
report_images_and_videos=False,
report_dream_data=False,
report_individual_batch_item_stats=False,
)
.training(
batch_size_B=16,
horizon_H=15,
batch_length_T=16,
model_size="S",
symlog_obs=True,
use_float16=False,
)
.evaluation(evaluation_num_workers=1)
)
algo = config.build()
tuner = tune.Tuner(
trainable=“DreamerV3”,
# run_config=train.RunConfig(
# stop={“episode_reward_mean”: 20},
# ),
param_space=config,
)
tuner.fit()