Can't Create Config from PPOConfig

  • High: It blocks me to complete my task.

I’ve been stuck on this for a while now. I keep receiving the error message that something is wrong with my config file, but I have no idea what it could be, especially since I don’t modify PPOConfig. I’ve attached the error message as well as my code.

2023-08-28 19:15:21,931	WARNING -- Cannot create PPOConfig from given `config_dict`! Property __stdout_file__ not supported.
2023-08-28 19:15:21,955	WARNING -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
2023-08-28 19:15:42,121	WARNING -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
import click
import random
from ray import tune, air
import gymnasium as gym
import ray.rllib.algorithms.ppo as ppo
from spr_rl.envs.spr_env import SprEnv
import os
import inspect
import numpy as np
from ray.tune.registry import register_env
import ray.air
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.policy.policy import PolicySpec
from ray.tune.stopper import MaximumIterationStopper
from ray.rllib.algorithms.algorithm import Algorithm

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

# Click decorators
# TODO: Add testing flag (timestamp and seed). Already set in params
@click.argument('network', type=click.Path(exists=True))
@click.argument('simulator_config', type=click.Path(exists=True))
@click.argument('services', type=click.Path(exists=True))
@click.argument('training_duration', type=int)
@click.option('-s', '--seed', type=int, help="Set the agent's seed", default=None)
@click.option('-t', '--test', help="Path to test timestamp and seed", default=None)
@click.option('-a', '--append_test', help="test after training", is_flag=True)
@click.option('-m', '--model_path', help="path to a model zip file", default=None)
@click.option('-ss', '--sim-seed', type=int, help="simulator seed", default=None)
@click.option('-b', '--best', help="Select the best agent", is_flag=True)
def main(network, simulator_config, services, training_duration,
         seed, test, append_test, model_path, sim_seed, best):
    SPR-RL DRL Scaling and Placement main executable

    # Get or set a seed

    if seed is None:
        seed = random.randint(0, 9999)

    # Seed random generators
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = ""

    print(f"Creating agent with seed: {seed}")
    print(f"Using network: {network}")

    param_config = {
        "seed": seed,
        "sim_config": simulator_config,
        "network": network,
        "services": services,
        "training_duration": training_duration,
        "test_mode": test,
        "sim_seed": sim_seed,
        "best": best

    # settings used for both stable baselines and rllib
    env_name = "SprEnv-v0"
    train_steps = 10000
    learning_rate = 1e-3
    save_dir = "saved_models"
    def register_custom_env():
            id='SprEnv-v0',  # Change this ID to your preference
            entry_point='spr_rl.envs:SprEnv',  # Specify the module and class name
            kwargs={'config': param_config}


    # Register the environment with the modified config
        lambda config: (

        num_cpus=2,  # change to your available number of CPUs

    config = (
        # Here, we configure all agents to share the same policy.

        # RLlib needs +1 CPU than configured below (for the driver/traininer?)

    # Create the Trainer/Tuner and define how long to train
    tuner = ray.tune.Tuner(
            # Save the training progress and checkpoints locally under the specified subfolder.
            # Control training length by setting the number of iterations. 1 iter = 4000 time steps by default.

    # Run training and save the result
    result_grid =

    best_result = result_grid.get_best_result(metric="episode_reward_mean", mode="max")
    ppo = Algorithm.from_checkpoint(best_result.checkpoint)
    env = gym.make("SprEnv-v0")
    obs, info = env.reset()
    done = False

    # run one episode with the trained model
    while not done:
        action = ppo.compute_single_action(obs)
        obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated

if __name__ == "__main__":
    network = "inputs/networks/interroute-in2-eg1-rand-cap0-2.graphml"
    services = "inputs/services/abc-start_delay0.yaml"
    sim_config = "inputs/config/simulator/mmpp-12-8.yaml"
    training_duration = "200000"
    main([network, sim_config, services, training_duration, '-a', '-s', '8443'])
    # main([network, agent_config, sim_config, services, training_duration, '-t', '2020-12-03_13:17:26_seed9834'])

    # main([network, agent_config, sim_config, services, training_duration, '--best'])
    # main([network, agent_config, sim_config, services, training_duration, '-t', 'best',
    #       '-m', 'results/models/poisson/'])

There seems to be an issue with the render_modes which are expected by gymnasium in your custom environment. As long as you have any rendering implemented, ensure that you define on class header level the meta data in the way gymnasium requires.


class SprEnv(gymnasium.Env):

    metadata = {"render_modes": ["human"], "render_fps": 4}