Env with supersuit is not working properly

Hojin_DAN · July 7, 2021, 2:10am

I tried to apply supersuit (2.6.6) with ray.version = (‘2.0.0.dev0’) to CartPole-v0/1 if it worked or not. I have no luck so far with rllib. Model doesn’t improve.

Custom environment without supersuit with
env_config = {‘supersuit’: False} works well with both rllib and stable baseline3 under default configuration. Agent easily learns how to play. But env_config = {‘supersuit’: True} works only with SB3. I’ve tried several variations but they all failed with rllib. MyEnv seems ok but… Any Idea or suggestions?

import gym
import torch
import ray
from copy import deepcopy
from ray.rllib.agents.registry import get_trainer_class
from ray.rllib.agents.ppo import PPOTrainer, DEFAULT_CONFIG
# from ray.rllib.agents.pg import PGTrainer, DEFAULT_CONFIG
from ray.tune.logger import pretty_print
from ray.tune.registry import register_env
import supersuit as ss

# torch.__version__, ray.__version__  = ('1.9.0+cu102', '2.0.0.dev0')
class MyEnv(gym.Env):
    def __init__(self, env_config):
        self.env = gym.make('CartPole-v1')
        self.supersuit = env_config.get('supersuit', False)
        if self.supersuit:
            self.num_stacks = env_config.get('num_stacks', 2)
            self.env = ss.frame_stack_v1(self.env, self.num_stacks)
        self.observation_space = self.env.observation_space
        self.action_space = self.env.action_space
        
    def reset(self):
        obs = self.env.reset()
        if self.supersuit:
            for _ in range(self.num_stacks - 1):
                obs, reward, done, info = self.step(self.action_space.sample())    
        return obs
    
    def step(self, action):
        return self.env.step(action)
    
    def render(self):
        return self.env.render()
    
    def close(self):
        self.env.close()

ray.init()
config = deepcopy(DEFAULT_CONFIG)
config['framework'] = 'torch'
env_config = {
    'supersuit': True,
    'num_stacks': 2,
}
config.update({
    'env_config': env_config
})

agent = PPOTrainer(config, env=MyEnv)

for i in range(1, 11):
    result = agent.train()
    print(f"{i:02d} reward: {result['episode_reward_mean']:>6.1f}")
#     print(pretty_print(result))

env = MyEnv(env_config)
obs = env.reset()
for i in range(1000):
    action = agent.compute_single_action(obs)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        obs = env.reset()

env.close()


################# stable baselines3
from stable_baselines3 import PPO
import supersuit as ss

env = MyEnv(env_config)
print(env.observation_space)

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=20000)

obs = env.reset()
for i in range(1000):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = env.step(action)
    env.render()
    if done:
        obs = env.reset()

env.close()

Topic		Replies	Views
Registering Custom Environment for `CartPole-v1` with RLlib and Running via Command Line RLlib	8	1809	April 14, 2023
Custom Gym Environment NaN RLlib	0	286	June 16, 2023
Failed to register a custom RLlib	4	783	April 24, 2023
Questions and Confusion: Getting started with RLlib Configure Algorithm, Training, Evaluation, Scaling	0	41	February 19, 2025
Getting RLlib running with custom environments on Jupyter RLlib	1	552	August 16, 2022

Env with supersuit is not working properly

Related topics