[check_env] Custom Gymnasium environment returns error

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

This is a custom environment that I’ve registered with Gymnasium, it is working fine in Gymnasium but when I test it in Ray with check_env, it is returning this error:

Code executed:

import ray

from ray.rllib.utils import check_env

import gymnasium as gym

from envs.tiger_env import TigerEnv

tiger_env = gym.make("Tiger-v0")

check_env([tiger_env])

Error:

python3.8/site-packages/ray/rllib/utils/pre_checks/env.py", line 73, in check_env
    raise ValueError(
ValueError: Env must be of one of the following supported types: BaseEnv, gymnasium.Env, gym.Env, MultiAgentEnv, VectorEnv, RemoteBaseEnv, ExternalMultiAgentEnv, ExternalEnv, but instead is of type <class 'list'>.

The above error has been found in your environment! We've added a module for checking your custom environments. It may cause your experiment to fail if your environment is not set up correctly. You can disable this behavior via calling `config.environment(disable_env_checking=True)`. You can run the environment checking module standalone by calling ray.rllib.utils.check_env([your env]).

These are my library versions, running on Ubuntu 22.04 with Python 3.8.18:
ray == 2.9.1
gymnasium== 0.28.1

This is the custom environment:

import gymnasium as gym
from gymnasium import spaces
import numpy as np

obs_start = 0
obs_growl_left = 1
obs_growl_right = 2
obs_map = {obs_start: "start", 
           obs_growl_left: "growl_left", 
           obs_growl_right: "growl_right"}

action_none = -1
action_open_left = 0
action_open_right = 1
action_listen = 2
action_map = {action_none: "none", 
              action_open_left: "open_left", 
              action_open_right: "open_right", 
              action_listen: "listen"}

class TigerEnv(gym.Env):

    def __init__(self, reward_tiger=-100, reward_gold=10, reward_listen=-1,
                 obs_acc=0.85, max_steps_per_epi=100):
        self.reward_tiger = reward_tiger
        self.reward_gold = reward_gold
        self.reward_listen = reward_listen
        self.obs_acc = obs_acc
        self.max_steps_per_epi = max_steps_per_epi

        self.current_episode = -1
        self.action_episode_memory = []
        self.obs_episode_memory = []
        self.reward_episode_memory = []

        self.current_step = 0

        self.reset()

        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Discrete(3)

    def step(self, action):
        terminated = self.open_left or self.open_right
        should_reset = self.take_action(action)
        reward = self.get_reward()
        self.action_episode_memory[self.current_episode].append(action)
        obs = self.get_obs()
        self.obs_episode_memory[self.current_episode].append(obs)
        self.reward_episode_memory[self.current_episode].append(reward)

        self.current_step += 1
        truncated = self.current_step >= self.max_steps_per_epi

        if should_reset:
            self.step_reset()
        
        info = {}

        return obs, reward, terminated, truncated, info
    
    def reset(self, seed=None, options=None):

        super().reset(seed=seed)
        np.random.seed(seed)

        self.current_episode += 1
        self.current_step = 0
        self.open_left = False
        self.open_right = False
        self.tiger_left = np.random.randint(0,2)
        self.tiger_right = 1 - self.tiger_left
        initial_obs = obs_start
        self.action_episode_memory.append([-1])
        self.obs_episode_memory.append([initial_obs])
        self.reward_episode_memory.append([0])

        info = {}
        return initial_obs, info
    
    def render(self):
        pass

    def close(self):
        pass

    def read_obs(self, obs):
        if obs not in obs_map:
            raise ValueError("Invalid observation: {}".format(obs))
        else:
            return obs_map[obs]
        
    def read_action(self, action):
        if action not in action_map:
            raise ValueError("Invalid action: {}".format(action))
        else:
            return action_map[action]
        
    def take_action(self, action):
        should_reset = False
        if action == action_open_left:
            self.open_left = True
            should_reset = True
        elif action == action_open_right:
            self.open_right = True
            should_reset = True
        elif action == action_listen:
            pass
        else:
            raise ValueError("Invalid action: {}".format(action))
        return should_reset
    
    def get_reward(self):
        if not (self.open_left or self.open_right):
            return self.reward_listen
        
        if self.open_left:
            if self.tiger_left:
                return self.reward_tiger
            else:
                return self.reward_gold
            
        if self.open_right:
            if self.tiger_right:
                return self.reward_tiger
            else:
                return self.reward_gold
            
        raise ValueError("Invalid state: open_left={}, open_right={}".format(self.open_left, self.open_right))
    
    def get_obs(self):

        last_action = self.action_episode_memory[self.current_episode][-1]
        if last_action != action_listen:
            if self.tiger_left:
                return obs_growl_left
            else:
                return obs_growl_right
            
        if np.random.rand() < self.obs_acc:
            if self.tiger_left:
                return obs_growl_left
            else:
                return obs_growl_right
            
        else:
            if self.tiger_left:
                return obs_growl_right
            else:
                return obs_growl_left
            
    def step_reset(self):
        self.open_left = False
        self.open_right = False
        self.tiger_left = np.random.randint(0,2)
        self.tiger_right = 1 - self.tiger_left

I really don’t understand what I am doing wrong here, do I need to register the env. again additionally for Ray?

You provided a list to the check_env function. Try check_env(tiger_env) and see if it works.

:man_facepalming:

yes, thank you, figured it out several hours later.