I cant get my custom network to work

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

Hi all,

first i set up default ppo implementation with custom env,did troubleshooting and it was working,then i have built a custom neural network which subclasses TorchRLModule and overrides only _forward method for now,i am using new api,and this is my ppo config:

        config = (
            PPOConfig()
            .api_stack(
                enable_env_runner_and_connector_v2=True,
                enable_rl_module_and_learner=True,
            )
            .environment("custom-env-69-v0")
            .framework("torch")
            .env_runners(
                num_env_runners=2,
                num_envs_per_env_runner=1,
                num_cpus_per_env_runner=2,
                num_gpus_per_env_runner=0,
                rollout_fragment_length="auto",
            )
            .learners(
                num_learners=1,
                num_gpus_per_learner=1,
            )
            .rl_module(
                rl_module_spec=RLModuleSpec(
                    module_class=NeuralNetwork,
                    observation_space=self.env.observation_space,
                    action_space=self.env.action_space,
                    inference_only=False,
                    model_config={},
                    catalog_class=PPOCatalog,
                )
            )
            .training(
                lr=0.001,
                gamma=0.99,
                train_batch_size_per_learner=8,
                num_epochs=20,
                minibatch_size=2,
            )
            .evaluation(
                evaluation_num_env_runners=1,
                evaluation_duration=10,
                evaluation_interval=2,
                evaluation_duration_unit="episodes",
                evaluation_parallel_to_training=True,
            )
        )

my problem is i’m not sure if i’m using RLModule,Catalog,Connectorv2 properly.

so i couldn’t fix this error:

ray.exceptions.RayTaskError(KeyError): ray::_WrappedExecutable.apply() (pid=24266, ip=192.168.0.17, actor_id=ab82a87e1b792b29a914c9d701000000, repr=<ray.train._internal.worker_group._WrappedExecutable object at 0x7636ca5ca1d0>)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/core/learner/learner.py”, line 1588, in apply
return func(self, *_args, **_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/core/learner/learner_group.py”, line 383, in _learner_update
result = _learner.update_from_episodes(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/core/learner/learner.py”, line 1073, in update_from_episodes
return self._update_from_batch_or_episodes(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/core/learner/learner.py”, line 1400, in _update_from_batch_or_episodes
fwd_out, loss_per_module, tensor_metrics = self._update(
^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/core/learner/torch/torch_learner.py”, line 495, in _update
return self._possibly_compiled_update(batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/core/learner/torch/torch_learner.py”, line 146, in _uncompiled_update
loss_per_module = self.compute_losses(fwd_out=fwd_out, batch=batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/core/learner/learner.py”, line 913, in compute_losses
loss = self.compute_loss_for_module(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/algorithms/ppo/torch/ppo_torch_learner.py”, line 85, in compute_loss_for_module
batch[Postprocessing.ADVANTAGES] * logp_ratio,
~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “/home/alyy/PycharmProjects/RLProject/.venv/lib/python3.11/site-packages/ray/rllib/policy/sample_batch.py”, line 973, in getitem
value = dict.getitem(self, key)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyError: ‘advantages’

so batch doesn’t have a key named advantages,i’m assuming that this is related to my implementation of algorithm/network.

I’m only customizing env and neural network for now

@mAlyy0 thanks for raising this issue and welcome to the forum.

Could you provide a simple repro such that I can dig into it? I t looks to me that one of the connectors is not correctly loaded and I want to investigate why.

Thank you for the warm welcome.This is my main.py . Apart from this, I only have my custom neural network,environment and config—nothing else. Is there anything specific I need to do beyond setting it to True to enable connectorv2 ? By the way, my neural network doesn’t involve any operations related to the ppo algorithm.Guess I’m subclassing the wrong RLModule so my network lacks some ppo implementation.And batch_size doesnt do anything.

from ray.air.integrations.wandb import WandbLoggerCallback
from ewq import NeuralNetwork, set_global_seed
from ray.rllib.core.rl_module import RLModuleSpec
from ray.air import RunConfig, CheckpointConfig
from ray.rllib.algorithms.ppo import PPOConfig
from Environment import CustomEnv
from ray.rllib.algorithms.ppo.ppo_catalog import PPOCatalog
from ray.tune import register_env
from ray import tune
import logging
import ray


logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('training.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)


def env_creator(env_config):
    env = CustomEnv()
    return env


class Optimizer:
    def __init__(self):
        self.env = CustomEnv()

        set_global_seed(69)
        register_env("custom-env-69-v0", env_creator)


    def run(self):
        try:
            ray.init(num_cpus=7, num_gpus=1)

            config = (
                PPOConfig()
                .api_stack(
                    enable_env_runner_and_connector_v2=True,
                    enable_rl_module_and_learner=True,
                )
                .environment("custom-env-69-v0")
                .framework("torch")
                .env_runners(
                    num_env_runners=2,
                    num_envs_per_env_runner=2,
                    num_cpus_per_env_runner=2,
                    num_gpus_per_env_runner=0,
                    rollout_fragment_length=2,
                )
                .learners(
                    num_learners=1,
                    num_gpus_per_learner=1,
                )
                .rl_module(
                    rl_module_spec=RLModuleSpec(
                        module_class=NeuralNetwork,
                        observation_space=self.env.observation_space,
                        action_space=self.env.action_space,
                        inference_only=False,
                        model_config={},
                        catalog_class=PPOCatalog,
                    )
                )
                .training(
                    lr=1e-3,
                    gamma=0.99,
                    train_batch_size_per_learner=8,
                    num_epochs=20,
                    minibatch_size=2,
                )
                .evaluation(
                    evaluation_num_env_runners=1,
                    evaluation_duration=10,
                    evaluation_interval=2,
                    evaluation_duration_unit="episodes",
                    evaluation_parallel_to_training=True,
                )
                .debugging(log_level="DEBUG")
            )

            tuner = tune.Tuner(
                "PPO",
                param_space=config,
                tune_config=tune.TuneConfig(
                    trial_name_creator=lambda trial: f"trial_{trial.trial_id[:8]}",
                    trial_dirname_creator=lambda trial: f"trial_{trial.trial_id[:8]}",
                    max_concurrent_trials=4,
                ),
                run_config=RunConfig(
                    callbacks=[WandbLoggerCallback(project="RLProject",  entity="maligkbs0")],
                    storage_path="/home/alyy/PycharmProjects/RLProject/ray_results",
                    name="PPO",
                    checkpoint_config=CheckpointConfig(
                        num_to_keep=5,
                        checkpoint_score_attribute="training_iteration",
                        checkpoint_score_order="max",
                        checkpoint_frequency=1,
                        checkpoint_at_end=True,
                    )
                )
            ).fit()

        except Exception as e:
            logger.error(f"Error during training: {str(e)}", exc_info=True)
            raise
        finally:
            ray.shutdown()


if __name__ == "__main__":
    Optimizer().run()
1 Like