How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
Hi guys, I‘m a hardware engineer tring to use RL with Ray and gymnasium for accelerating analog circuit design.
Currently, I have a custom gymnasium env which passed the gymnasium.utils.env_checker. And I tried to apply this custom env with Ray Rlib. I wrote the train script based on provided example The package list(Python 3.10.13) and script are shown below.
# Ignore import pary
tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()
parser = argparse.ArgumentParser()
"--run", type=str, default="PPO", help="The RLlib-registered algorithm to use."
choices=["tf", "tf2", "torch"],
help="The DL framework specifier.",
help="Whether this script should be run as a test: --stop-reward must "
"be achieved within --stop-timesteps AND --stop-iters.",
"--stop-iters", type=int, default=50, help="Number of iterations to train."
"--stop-timesteps", type=int, default=100000, help="Number of timesteps to train."
"--stop-reward", type=float, default=0.1, help="Reward at which we stop training."
help="Run without Tune using a manual train loop instead. In this case,"
"use PPO without grid search and no TensorBoard.",
help="Init Ray in local mode for easier debugging.",
class OpampEnv(gym.Env):
metadata = {"render_modes": ["human"], "render_fps": 30}
def __init__(self, render_mode="human"):
# Load Essential YAML files
# Ignored
# Init variables
self.reward = []
self.curr_reward = 0
self.step_count = 0
self.working_root_dict = "/simulation"
# Define action space
variable_space, self.variable_magnitude = define_variable_space(value_range_yaml_path)
self.action_space = create_action_space(variable_space)
print(f"""Action space: {self.action_space}""")
print(f"""Variable magnitude: {self.variable_magnitude}""")
# Define observation space
self.obs_dim, self.obs_typical_vals = define_param_space(config_performance_path)
self.observation_space = create_obs_space(self.obs_dim)
# assert render_mode is None or render_mode in self.metadata["render_modes"]
self.render_mode = render_mode
def reset(self, seed=None, options=None):
# We need the following line to seed self.np_random
# Randomly select the observation
reset_obs = self.np_random.uniform(low=-1.0, high=1.0, size=(self.obs_dim,))
reset_obs = reset_obs.astype(np.float32)
observation = reset_obs
info = {}
if self.render_mode == "human":
return observation, info
def step(self, action):
self.step_count += 1
print(f"Step {self.step_count}! Action: {action}")
# Step 1: Create a new working directory
working_dir = create_working_directory(self.working_root_dict)
print(f"Create working directory: {working_dir}")
# Step 2: Translate action from index to variable values,
# Update config_value.yaml based on template and variable_dict
assigned_netlist_config_path = working_dir + '/config_value_assign.yaml'
generate_assign_config(action, self.config_value_range, self.config_value_template,
# Step 3: Run Spectre simulations
sim_results = run_spectre_simulations(working_dir, self.config_simulation, self.unassigned_netlist)
print(f"Results: {sim_results}")
# Step 4: Replace possible None with nan
sim_results = replace_none_with_nan(sim_results)
print(f"Results(replaced): {sim_results}")
save_results(sim_results, working_dir)
# Step 5: Calculate the reward
result_path = working_dir + "/results.yaml"
self.curr_reward = calculate_reward(result_path, self.config_performance_metric)
print(f"Reward: {self.curr_reward}")
print(f"Reward list: {self.reward}")
# Step 6: Normalize the observation
observation_norm = normalize_results(sim_results, self.obs_typical_vals)
observation_norm = observation_norm.astype(np.float32)
print(f"Normalized Result: {observation_norm}")
# Step 7: Check if the episode is done
terminated = self.curr_reward == 10
if terminated:
print(f"Episode terminated! Reward: {self.curr_reward}, Step count: {self.step_count}")
self.step_count = 0
info = {}
if self.render_mode == "human":
return observation_norm, self.curr_reward, terminated, False, info
def render(self):
render_modes = self.render_mode
if not self.reward:
print("No reward to plot!")
return None
plt.scatter(list(range(len(self.reward))), self.reward)
return None
if __name__ == "__main__":
args = parser.parse_args()
print(f"Running with following CLI options: {args}")
# Can also register the env creator function explicitly with:
# register_env("corridor", lambda config: SimpleCorridor(config))
config = (
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
stop = {
"training_iteration": args.stop_iters,
"timesteps_total": args.stop_timesteps,
"episode_reward_mean": args.stop_reward,
if args.no_tune:
# manual training with train loop using PPO and fixed learning rate
if != "PPO":
raise ValueError("Only support --run PPO with --no-tune.")
print("Running manual train loop without Ray Tune.")
# use fixed learning rate instead of grid search (needs tune) = 1e-3
algo =
# run manual training loop and print results after each iteration
for _ in range(args.stop_iters):
result = algo.train()
# stop training of the target train steps or reward are reached
if (
result["timesteps_total"] >= args.stop_timesteps
or result["episode_reward_mean"] >= args.stop_reward
# automated run with Tune and grid search and TensorBoard
print("Training automatically with Ray Tune")
tuner = tune.Tuner(,
results =
if args.as_test:
print("Checking if learning goals were achieved")
check_learning_achieved(results, args.stop_reward)
I run the above script via CLI python --run PPO --framework torch --stop-iters 200 --stop-timesteps 100 --stop-reward -0.02 --local-mode
. And several problems occur, which is listed below:
- Trials did not complete; the related log is shown below:
[2023-10-08 00:06:43,408 E 36878 36878] Pushed Error with JobID: 01000000 of type: task with message: ray::PPO.train() (pid=36878, ip=, actor_id=145d89b07c4158a9a05419c101000000, repr=PPO)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/tune/trainable/", line 400, in train
raise skipped from exception_cause(skipped)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/tune/trainable/", line 397, in train
result = self.step()
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/", line 853, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/", line 2838, in _run_one_training_iteration
results = self.training_step()
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/", line 448, in training_step
train_results = self.learner_group.update(
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/", line 184, in update
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/", line 1304, in update
) = self._update(nested_tensor_minibatch)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/torch/", line 365, in _update
return self._possibly_compiled_update(batch)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/torch/", line 123, in _uncompiled_update
loss_per_module = self.compute_loss(fwd_out=fwd_out, batch=batch)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/", line 1024, in compute_loss
loss = self.compute_loss_for_module(
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/torch/", line 87, in compute_loss_for_module
action_kl = prev_action_dist.kl(curr_action_dist)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/models/torch/", line 327, in kl
for cat, oth_cat in zip(self._cats, other.cats)
AttributeError: '<class 'ray.rllib.models.torch.torch_distributions' object has no attribute 'cats' at time: 1.69669e+09
Trial status: 1 RUNNING
Current time: 2023-10-08 00:06:43. Total running time: 7hr 53min 53s
Logical resource usage: 41.0/48 CPUs, 0/0 GPUs
��� Trial name status ���
��� PPO_OpampEnv_4e58f_00000 RUNNING ���
2023-10-08 00:06:43,440 ERROR -- Trial task failed for trial PPO_OpampEnv_4e58f_00000
Traceback (most recent call last):
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/air/execution/_internal/", line 110, in resolve_future
result = ray.get(future)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/_private/", line 24, in auto_init_wrapper
return fn(*args, **kwargs)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/_private/", line 103, in wrapper
return func(*args, **kwargs)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/_private/", line 2547, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(AttributeError): ray::PPO.train() (pid=36878, ip=, actor_id=145d89b07c4158a9a05419c101000000, repr=PPO)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/tune/trainable/", line 400, in train
raise skipped from exception_cause(skipped)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/tune/trainable/", line 397, in train
result = self.step()
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/", line 853, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/", line 2838, in _run_one_training_iteration
results = self.training_step()
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/", line 448, in training_step
train_results = self.learner_group.update(
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/", line 184, in update
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/", line 1304, in update
) = self._update(nested_tensor_minibatch)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/torch/", line 365, in _update
return self._possibly_compiled_update(batch)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/torch/", line 123, in _uncompiled_update
loss_per_module = self.compute_loss(fwd_out=fwd_out, batch=batch)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/core/learner/", line 1024, in compute_loss
loss = self.compute_loss_for_module(
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/torch/", line 87, in compute_loss_for_module
action_kl = prev_action_dist.kl(curr_action_dist)
File "/home/userName/anaconda3/envs/gym_env/lib/python3.10/site-packages/ray/rllib/models/torch/", line 327, in kl
for cat, oth_cat in zip(self._cats, other.cats)
AttributeError: '<class 'ray.rllib.models.torch.torch_distributions' object has no attribute 'cats'
Trial PPO_OpampEnv_4e58f_00000 errored after 0 iterations at 2023-10-08 00:06:43. Total running time: 7hr 53min 53s
Error file: /home/userName/ray_results/PPO_2023-10-07_16-12-49/PPO_OpampEnv_4e58f_00000_0_2023-10-07_16-12-50/error.txt
Trial status: 1 ERROR
Current time: 2023-10-08 00:06:43. Total running time: 7hr 53min 53s
Logical resource usage: 0/48 CPUs, 0/0 GPUs
��� Trial name status ���
��� PPO_OpampEnv_4e58f_00000 ERROR ���
Number of errored trials: 1
��� Trial name # failures error file ���
��� PPO_OpampEnv_4e58f_00000 1 /home/userName/ray_results/PPO_2023-10-07_16-12-49/PPO_OpampEnv_4e58f_00000_0_2023-10-07_16-12-50/error.txt ���
2023-10-08 00:06:43,806 ERROR -- Trials did not complete: [PPO_OpampEnv_4e58f_00000]
- While I can see the reward update and agent update by “print”, Tensorboard cannot access any data of this train. The folder structure looks like this:
├── PPO_OpampEnv_ce6ef_00000_0_2023-10-07_16-02-06
│ ├── events.out.tfevents.1696666029.eex
│ ├── params.json
│ ├── params.pkl
│ ├── result.json
├── basic-variant-state-2023-10-07_16-02-05.json
├── experiment_state-2023-10-07_16-02-05.json
└── tuner.pkl
Since no one on our team had attempted anything similar before, I didn’t have a promising idea of how to solve these problems.
Thanks in advance:)