How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
This is a custom environment that I’ve registered with Gymnasium, it is working fine in Gymnasium but when I test it in Ray with check_env, it is returning this error:
Code executed:
import ray
from ray.rllib.utils import check_env
import gymnasium as gym
from envs.tiger_env import TigerEnv
tiger_env = gym.make("Tiger-v0")
check_env([tiger_env])
Error:
python3.8/site-packages/ray/rllib/utils/pre_checks/env.py", line 73, in check_env
raise ValueError(
ValueError: Env must be of one of the following supported types: BaseEnv, gymnasium.Env, gym.Env, MultiAgentEnv, VectorEnv, RemoteBaseEnv, ExternalMultiAgentEnv, ExternalEnv, but instead is of type <class 'list'>.
The above error has been found in your environment! We've added a module for checking your custom environments. It may cause your experiment to fail if your environment is not set up correctly. You can disable this behavior via calling `config.environment(disable_env_checking=True)`. You can run the environment checking module standalone by calling ray.rllib.utils.check_env([your env]).
These are my library versions, running on Ubuntu 22.04 with Python 3.8.18:
ray == 2.9.1
gymnasium== 0.28.1
This is the custom environment:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
obs_start = 0
obs_growl_left = 1
obs_growl_right = 2
obs_map = {obs_start: "start",
obs_growl_left: "growl_left",
obs_growl_right: "growl_right"}
action_none = -1
action_open_left = 0
action_open_right = 1
action_listen = 2
action_map = {action_none: "none",
action_open_left: "open_left",
action_open_right: "open_right",
action_listen: "listen"}
class TigerEnv(gym.Env):
def __init__(self, reward_tiger=-100, reward_gold=10, reward_listen=-1,
obs_acc=0.85, max_steps_per_epi=100):
self.reward_tiger = reward_tiger
self.reward_gold = reward_gold
self.reward_listen = reward_listen
self.obs_acc = obs_acc
self.max_steps_per_epi = max_steps_per_epi
self.current_episode = -1
self.action_episode_memory = []
self.obs_episode_memory = []
self.reward_episode_memory = []
self.current_step = 0
self.reset()
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Discrete(3)
def step(self, action):
terminated = self.open_left or self.open_right
should_reset = self.take_action(action)
reward = self.get_reward()
self.action_episode_memory[self.current_episode].append(action)
obs = self.get_obs()
self.obs_episode_memory[self.current_episode].append(obs)
self.reward_episode_memory[self.current_episode].append(reward)
self.current_step += 1
truncated = self.current_step >= self.max_steps_per_epi
if should_reset:
self.step_reset()
info = {}
return obs, reward, terminated, truncated, info
def reset(self, seed=None, options=None):
super().reset(seed=seed)
np.random.seed(seed)
self.current_episode += 1
self.current_step = 0
self.open_left = False
self.open_right = False
self.tiger_left = np.random.randint(0,2)
self.tiger_right = 1 - self.tiger_left
initial_obs = obs_start
self.action_episode_memory.append([-1])
self.obs_episode_memory.append([initial_obs])
self.reward_episode_memory.append([0])
info = {}
return initial_obs, info
def render(self):
pass
def close(self):
pass
def read_obs(self, obs):
if obs not in obs_map:
raise ValueError("Invalid observation: {}".format(obs))
else:
return obs_map[obs]
def read_action(self, action):
if action not in action_map:
raise ValueError("Invalid action: {}".format(action))
else:
return action_map[action]
def take_action(self, action):
should_reset = False
if action == action_open_left:
self.open_left = True
should_reset = True
elif action == action_open_right:
self.open_right = True
should_reset = True
elif action == action_listen:
pass
else:
raise ValueError("Invalid action: {}".format(action))
return should_reset
def get_reward(self):
if not (self.open_left or self.open_right):
return self.reward_listen
if self.open_left:
if self.tiger_left:
return self.reward_tiger
else:
return self.reward_gold
if self.open_right:
if self.tiger_right:
return self.reward_tiger
else:
return self.reward_gold
raise ValueError("Invalid state: open_left={}, open_right={}".format(self.open_left, self.open_right))
def get_obs(self):
last_action = self.action_episode_memory[self.current_episode][-1]
if last_action != action_listen:
if self.tiger_left:
return obs_growl_left
else:
return obs_growl_right
if np.random.rand() < self.obs_acc:
if self.tiger_left:
return obs_growl_left
else:
return obs_growl_right
else:
if self.tiger_left:
return obs_growl_right
else:
return obs_growl_left
def step_reset(self):
self.open_left = False
self.open_right = False
self.tiger_left = np.random.randint(0,2)
self.tiger_right = 1 - self.tiger_left
I really don’t understand what I am doing wrong here, do I need to register the env. again additionally for Ray?