Error with custom environments

from ray.rllib.algorithms.ppo import PPOConfig

def initialize_ppo_trainer():
global ppo_trainer

register_env("custom_env", lambda config2: CustomEnv(config2))

# Configure PPO
ppo_config = (
    PPOConfig()
    .environment("custom_env")  # Name of the registered environment
    .framework("torch")  # Use torch or tf depending on your setup
    .rollouts(num_rollout_workers=0, create_env_on_local_worker=True)  # Adjust number of workers
    .training(
        train_batch_size=BATCH_SIZE,
        sgd_minibatch_size=min(32, BATCH_SIZE),  # Minibatch size <= train batch size
    )
    .resources(num_gpus=0)  # Adjust GPU resources as needed
)

# Build the PPO trainer
ppo_trainer = ppo_config.build()
logger.info("PPO trainer initialized.")

CustomEnv:

class CustomEnv(gym.Env):
“”"
Custom Environment for Incident Response using OpenAI Gym interface.
“”"

metadata = {'render.modes': ['human']}

def __init__(self, config: Dict[str, Any] = None):
    super(CustomEnv, self).__init__()
    
    # Define action and observation space
    # Actions: Discrete actions corresponding to incident response strategies

    self.action_space = spaces.Discrete(len(ACTIONS))  # len(ACTIONS) = 9
    
    # Observation space: 77 continuous features, normalized between 0 and 1
    # Adjust the low and high values based on actual data ranges for better normalization
    self.observation_space = spaces.Box(
        low=0.0, 
        high=1.0, 
        shape=(len(STATE_FEATURES),), 
        dtype=np.float32
    )

    print(f"CustomEnv Initialized: Observation space - {self.observation_space}, Action space - {self.action_space}")
    logger.info(f"CustomEnv Initialized: Observation space - {self.observation_space}, Action space - {self.action_space}")

    
    # Initialize state
    self.state = self.reset()
    
    # Optional: Define additional parameters from config if needed
    if config is not None:
        self.config = config
    else:
        self.config = {}

ValueError: observation_space not provided in PolicySpec for default_policy and env does not have an observation space OR no spaces received from other workers’ env(s) OR no observation_space specified in config!

Even on this simple code, gives me it:

import ray
from ray.rllib.algorithms.ppo import PPOConfig
from ray.tune.registry import register_env
import gym

Initialize Ray

ray.init(ignore_reinit_error=True)

Define the default gym environment creator

def env_creator(env_config):
return gym.make(“CartPole-v1”)

Register the environment

register_env(“cartpole_env”, env_creator)

Create a PPO configuration and train the agent

config = PPOConfig().environment(env=“cartpole_env”).framework(“torch”).rollouts(num_rollout_workers=1)

Build the PPO trainer using the config

trainer = config.build()

Train for a few iterations

for i in range(3):
result = trainer.train()
print(f"Iteration {i + 1}: reward = {result[‘episode_reward_mean’]}")

Cleanup Ray

ray.shutdown()