I followed the code here to record evaluation videos. However, no video folder is created.
Anyone has any idea?
This is my code:
import ray
from ray import air, tune
from ray.air.config import CheckpointConfig
from ray.rllib.algorithms import ppo
from ray.tune.stopper import MaximumIterationStopper
from ray.tune.registry import register_env
from HideAndSeek import HideAndSeek
ray.init()
filename = '../Env_Linux_Nav_1_Seekers_Topdown/Hide and Seek'
register_env("HAS", lambda config: HideAndSeek(config))
config = ppo.DEFAULT_CONFIG.copy()
config["num_workers"] = 3
config["num_envs_per_worker"] = 2
config["num_gpus"] = 1
config["preprocessor_pref"] = "rllib"
config["evaluation_duration"] = 2
config["evaluation_config"] = {"record_env": True}
config["evaluation_parallel_to_training"] = True
config["evaluation_num_workers"] = 1
config["evaluation_interval"] = 1
config["env_config"] = {"file_name": filename, "worker_id": 0}
config["model"] = {"use_lstm": True, "lstm_cell_size": 256, "max_seq_len": 20,
"fcnet_hiddens": [],
"conv_filters": [[32, [8, 8], 4], [64, [4, 4], 2], [64, [3, 3], 1],
[512, [7, 7], 1]],
}
config["framework"] = "torch"
config["lr"] = 0.00025
config["kl_coeff"] = 0
config["grad_clip"] = tune.grid_search([1, 5, 20, 40])
config["clip_param"] = 0.2
config["vf_clip_param"] = 30
config["normalize_actions"] = False
config["batch_mode"] = 'complete_episodes'
config["rollout_fragment_length"] = 300
config["train_batch_size"] = 300
config["sgd_minibatch_size"] = config["train_batch_size"]
spec.config.exploration_config.type=="StochasticSampling" else EpsilonPrintCallback)
config["env"] = "HAS"
stopper = MaximumIterationStopper(300)
checkpoint_config = CheckpointConfig(num_to_keep=20, checkpoint_frequency=25,
checkpoint_at_end=True)
results = tune.Tuner(
ppo.PPO,
param_space=config,
run_config=air.RunConfig(
local_dir="ray_results/",
stop=stopper,
checkpoint_config=checkpoint_config,
)).fit()
This is my environment in case something is wrong here
import gym
import mlagents_envs
from mlagents_envs.environment import UnityEnvironment
from unity_wrappers.envs import MultiUnityWrapper
from gym.spaces.discrete import Discrete
import numpy as np
class HideAndSeek(gym.Env):
def __init__(self, env_config):
super().__init__()
worker_id = env_config["worker_id"]
file_name = env_config["file_name"]
while True:
self.worker_id = worker_id
try:
self.unity_env = UnityEnvironment(
file_name=file_name,
worker_id=worker_id,
)
except mlagents_envs.exception.UnityWorkerInUseException as e:
worker_id += 1
# Hard limit.
if worker_id > 100:
raise e
else:
break
self.env = MultiUnityWrapper(unity_env=self.unity_env, uint8_visual=True, allow_multiple_obs=True)
self.agent_id = tuple(self.env.agent_id_to_behaviour_name.keys())[0]
self.action_shape = self.env.action_space[self.agent_id].nvec
self.action_space = Discrete(np.prod(self.action_shape))
self.observation_space = self.env.observation_space[self.agent_id][0]
def reset(self):
return self.env.reset()[self.agent_id][0]
def step(self, action):
action = np.array(np.unravel_index(action, self.action_shape))
(obs_dict, reward_dict, done_dict, info_dict) = self.env.step({self.agent_id: action})
obs, reward, done = obs_dict[self.agent_id][0], reward_dict[self.agent_id], done_dict[self.agent_id]
if obs_dict[self.agent_id][1][0] == 0:
done = True
return obs, reward, done, {}