How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I’ve been stuck on this for a while now. I keep receiving the error message that something is wrong with my config file, but I have no idea what it could be, especially since I don’t modify PPOConfig. I’ve attached the error message as well as my code.
Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
2023-08-28 19:15:21,931 WARNING -- Cannot create PPOConfig from given `config_dict`! Property __stdout_file__ not supported.
2023-08-28 19:15:21,955 WARNING -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
/home/tails/.conda/envs/ray/lib/python3.9/site-packages/ray/rllib/algorithms/ RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
`UnifiedLogger` will be removed in Ray 2.7.
return UnifiedLogger(config, logdir, loggers=None)
/home/tails/.conda/envs/ray/lib/python3.9/site-packages/ray/tune/logger/ RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
The `JsonLogger interface is deprecated in favor of the `ray.tune.json.JsonLoggerCallback` interface and will be removed in Ray 2.7.
self._loggers.append(cls(self.config, self.logdir, self.trial))
/home/tails/.conda/envs/ray/lib/python3.9/site-packages/ray/tune/logger/ RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
The `CSVLogger interface is deprecated in favor of the `ray.tune.csv.CSVLoggerCallback` interface and will be removed in Ray 2.7.
self._loggers.append(cls(self.config, self.logdir, self.trial))
/home/tails/.conda/envs/ray/lib/python3.9/site-packages/ray/tune/logger/ RayDeprecationWarning: This API is deprecated and may be removed in future Ray releases. You could suppress this warning by setting env variable PYTHONWARNINGS="ignore::DeprecationWarning"
The `TBXLogger interface is deprecated in favor of the `ray.tune.tensorboardx.TBXLoggerCallback` interface and will be removed in Ray 2.7.
self._loggers.append(cls(self.config, self.logdir, self.trial))
2023-08-28 19:15:42,121 WARNING -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
2023-08-28 19:15:42,161 WARNING -- DeprecationWarning: `ValueNetworkMixin` has been deprecated. This will raise an error in the future!
2023-08-28 19:15:42,161 WARNING -- DeprecationWarning: `LearningRateSchedule` has been deprecated. This will raise an error in the future!
2023-08-28 19:15:42,161 WARNING -- DeprecationWarning: `EntropyCoeffSchedule` has been deprecated. This will raise an error in the future!
2023-08-28 19:15:42,161 WARNING -- DeprecationWarning: `KLCoeffMixin` has been deprecated. This will raise an error in the future!
/home/tails/.conda/envs/ray/lib/python3.9/site-packages/gymnasium/envs/ UserWarning: WARN: The environment creator metadata doesn't include `render_modes`, contains: ['render.modes']
import click
import random
from ray import tune, air
import click
import random
import gymnasium as gym
import ray.rllib.algorithms.ppo as ppo
from spr_rl.envs.spr_env import SprEnv
import os
import inspect
import numpy as np
from ray.tune.registry import register_env
import ray.air
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.policy.policy import PolicySpec
from ray.tune.stopper import MaximumIterationStopper
from ray.rllib.algorithms.algorithm import Algorithm
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Click decorators
# TODO: Add testing flag (timestamp and seed). Already set in params
@click.argument('network', type=click.Path(exists=True))
@click.argument('simulator_config', type=click.Path(exists=True))
@click.argument('services', type=click.Path(exists=True))
@click.argument('training_duration', type=int)
@click.option('-s', '--seed', type=int, help="Set the agent's seed", default=None)
@click.option('-t', '--test', help="Path to test timestamp and seed", default=None)
@click.option('-a', '--append_test', help="test after training", is_flag=True)
@click.option('-m', '--model_path', help="path to a model zip file", default=None)
@click.option('-ss', '--sim-seed', type=int, help="simulator seed", default=None)
@click.option('-b', '--best', help="Select the best agent", is_flag=True)
def main(network, simulator_config, services, training_duration,
seed, test, append_test, model_path, sim_seed, best):
SPR-RL DRL Scaling and Placement main executable
# Get or set a seed
if seed is None:
seed = random.randint(0, 9999)
# Seed random generators
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
print(f"Creating agent with seed: {seed}")
print(f"Using network: {network}")
param_config = {
"seed": seed,
"sim_config": simulator_config,
"network": network,
"services": services,
"training_duration": training_duration,
"test_mode": test,
"sim_seed": sim_seed,
"best": best
# settings used for both stable baselines and rllib
env_name = "SprEnv-v0"
train_steps = 10000
learning_rate = 1e-3
save_dir = "saved_models"
def register_custom_env():
id='SprEnv-v0', # Change this ID to your preference
entry_point='spr_rl.envs:SprEnv', # Specify the module and class name
kwargs={'config': param_config}
# Register the environment with the modified config
lambda config: (
num_cpus=2, # change to your available number of CPUs
config = (
# Here, we configure all agents to share the same policy.
# RLlib needs +1 CPU than configured below (for the driver/traininer?)
# Create the Trainer/Tuner and define how long to train
tuner = ray.tune.Tuner(
# Save the training progress and checkpoints locally under the specified subfolder.
# Control training length by setting the number of iterations. 1 iter = 4000 time steps by default.
# Run training and save the result
result_grid =
best_result = result_grid.get_best_result(metric="episode_reward_mean", mode="max")
ppo = Algorithm.from_checkpoint(best_result.checkpoint)
env = gym.make("SprEnv-v0")
obs, info = env.reset()
done = False
# run one episode with the trained model
while not done:
action = ppo.compute_single_action(obs)
obs, reward, terminated, truncated, info = env.step(action)
done = terminated or truncated
if __name__ == "__main__":
network = "inputs/networks/interroute-in2-eg1-rand-cap0-2.graphml"
services = "inputs/services/abc-start_delay0.yaml"
sim_config = "inputs/config/simulator/mmpp-12-8.yaml"
training_duration = "200000"
main([network, sim_config, services, training_duration, '-a', '-s', '8443'])
# main([network, agent_config, sim_config, services, training_duration, '-t', '2020-12-03_13:17:26_seed9834'])
# main([network, agent_config, sim_config, services, training_duration, '--best'])
# main([network, agent_config, sim_config, services, training_duration, '-t', 'best',
# '-m', 'results/models/poisson/'])