I am configuring the SAC DRL algorithm with GPU to train my custom environment. The same configuration for the workers and GPU allocation works fine with PPO but it terminates early with SAC.
Any help will be appreciated!
Below is the code and configuration I am using to train agents for my environment.
System and library configuration:
Ray version: 1.4.0
Python Version: 3.7.10
Tensorflow version: 1.14.0
OS: Fedora 34
#!/usr/bin/env python
# encoding: utf-8
from NR_IES.envs.NR_IES_env import NR_IES_v0
from ray.tune.registry import register_env
import gym
import os
import ray
from ray.rllib.agents.sac.sac import SACTrainer, DEFAULT_CONFIG
import shutil
from ray import tune
import logging
def main ():
chkpt_root = "sac_training_26oct/NR_IES"
shutil.rmtree(chkpt_root, ignore_errors=True, onerror=None)
ray.init(local_mode=True, include_dashboard= False, logging_level=logging.DEBUG)
# custom environment registration
select_env = "NR_IES-v0"
register_env(select_env, lambda config: NR_IES_v0())
# create agent and environment configuration
config = DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
config['num_workers'] = 10
config['num_gpus'] = 1
config['num_gpus_per_worker'] = (1-0.0001)/10
agent = SACTrainer(config, env=select_env)
status = "{:2d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:4.2f} saved {}"
n_iter = 1000000
for n in range(n_iter):
result = agent.train()
chkpt_file = agent.save(chkpt_root)
print(status.format(
n + 1,
result["episode_reward_min"],
result["episode_reward_mean"],
result["episode_reward_max"],
result["episode_len_mean"],
chkpt_file
))
if __name__ == "__main__":
main()