RLLib: errors creating Alphazero environment with custom Gym environment

Following the example here https://github.com/ray-project/ray/blob/master/rllib/contrib/alpha_zero/environments/cartpole.py, I have created a wrapper for my custom Gym environment (AmlRlEnvRllib_alphazero) and I ran tune.run command below

ModelCatalog.register_custom_model(“dense_model”, DenseModel)

start = time.time()
analysis = tune.run(
“contrib/AlphaZero”,
metric=“episode_reward_mean”,
mode=“max”,
stop={“training_iteration”: 10},
config={
“env”: AmlRlEnvRllib_alphazero(dict_acct_type=acct_type, dict_env_accts=env_accts2,
action_multiple=5, action_power=3,
lookback_period=2, rule_run_frequency=1,
rule_threshold_amt=10000, rule_dict=rule_dict), #CartPole, #select_env,
“lr”: tune.loguniform(1e-3, 1e-5),
“horizon”: tune.choice([20]),
“ranked_rewards”: {
“enable”: True,
},
“model”: {
“custom_model”: “dense_model”,
},
},
num_samples=8,
# checkpoint_score_attr=“min-episode_len_mean”,
checkpoint_freq = 1,
checkpoint_at_end=True
)

class AmlRlEnvRllib_alphazero:
“”"
Wrapper for gym AmlRlEnvRllib environment where the reward
is accumulated to the end
“”"

def __init__(self, dict_acct_type=None, dict_env_accts=None, action_multiple=5, action_power=2, time_step=24, all_channels=['CASH', 'MI', 'WIRE'],
             rule_threshold_amt=6000, magnitude_action_penalty=-0.5,lookback_period=14,time_step_limit_per_episode=120,rule_dict=None,
             add_rule_penalty=True,rule_run_frequency=7):
    select_env = "rl_env-v2"
    from ray.tune import register_env
    register_env(select_env, lambda config: AmlRlEnvRllib(dict_acct_type=dict_acct_type, dict_env_accts=dict_env_accts,
                                                          action_multiple=action_multiple,
                                                          action_power=action_power, time_step=time_step,
                                                          all_channels=all_channels,
                                                          rule_threshold_amt=rule_threshold_amt,
                                                          magnitude_action_penalty=magnitude_action_penalty,
                                                          lookback_period=lookback_period,
                                                          time_step_limit_per_episode=time_step_limit_per_episode,
                                                          rule_dict=rule_dict, add_rule_penalty=add_rule_penalty,
                                                          rule_run_frequency=rule_run_frequency))

    self.env = AmlRlEnvRllib(dict_acct_type=dict_acct_type, dict_env_accts=dict_env_accts,
                             action_multiple=action_multiple, action_power=action_power, time_step=time_step,
                             all_channels=all_channels, rule_threshold_amt=rule_threshold_amt, magnitude_action_penalty=magnitude_action_penalty,
                             lookback_period=lookback_period, time_step_limit_per_episode=time_step_limit_per_episode,
                             rule_dict=rule_dict, add_rule_penalty=add_rule_penalty, rule_run_frequency=rule_run_frequency)

    self.observation_space = Dict({
        "obs": self.env.observation_space,
        "action_mask": Box(low=0, high=1, shape=(self.env.action_space.n, ))
    })
    self.running_reward = 0

It threw following error

2021-05-21 14:01:29,331 ERROR ray_trial_executor.py:532 – Trial contrib_AlphaZero_<ofs_aml_rl_env.envs.aml_rl_env_rllib.AmlRlEnvRllib_alphazero object at 0x00000173E2669A88>_913e2_00000: Unexpected error starting runner.
Traceback (most recent call last):
File “C:\Users\saararor\AppData\Local\Programs\Python\Python37\lib\site-packages\ray\tune\ray_trial_executor.py”, line 522, in start_trial
return self._start_trial(trial, checkpoint, train=train)
File “C:\Users\saararor\AppData\Local\Programs\Python\Python37\lib\site-packages\ray\tune\ray_trial_executor.py”, line 424, in _start_trial
runner = self._setup_remote_runner(trial)
File “C:\Users\saararor\AppData\Local\Programs\Python\Python37\lib\site-packages\ray\tune\ray_trial_executor.py”, line 253, in setup_remote_runner
trial.init_logdir()
File “C:\Users\saararor\AppData\Local\Programs\Python\Python37\lib\site-packages\ray\tune\trial.py”, line 419, in init_logdir
self.local_dir)
File “C:\Users\saararor\AppData\Local\Programs\Python\Python37\lib\site-packages\ray\tune\trial.py”, line 146, in create_logdir
os.makedirs(logdir, exist_ok=True)
File “C:\Users\saararor\AppData\Local\Programs\Python\Python37\lib\os.py”, line 223, in makedirs
mkdir(name, mode)
OSError: [WinError 123] The filename, directory name, or volume label syntax is incorrect: 'C:\Users\saararor\ray_results\contrib/AlphaZero\contrib_AlphaZero
<ofs_aml_rl_env.envs.aml_rl_env_rllib.AmlRlEnvRllib_alphazero object at 0x00000173E2669A88>_913e2_00000_0_horizo_2021-05-21_14-01-29’

I get the same error even if I register environment AmlRlEnvRllib_alphazero outside the class AmlRlEnvRllib_alphazero. can anyone help with this issue?

Try changing this to

analysis = tune.run(
“contrib\AlphaZero”,

You should also provide the repr dunder for your custom environment to replace this <ofs_aml_rl_env.envs.aml_rl_env_rllib.AmlRlEnvRllib_alphazero object at 0x00000173E2669A88> with something that will be a legal file path.

1 Like

thanks @mannyv . what are the steps to provide the repr dunder for custom environment ?

1 Like