Hi, Below is my code. Previously with ray 2.0.0 the eval.py worked fine. Now I have shifted to ray 2.2.0 I am not able to load the trained model correctly.
This is how I save the checkpoints
ray.init()
agent = ppo.PPOTrainer(config, env="MA_env")
n_iter = 2800
for n in range(n_iter):
result = agent.train()
print(pretty_print(result))
if n % 5 == 0:
checkpoint = agent.save()
print("checkpoint saved at", checkpoint)
This Evaluation
This gives error that tf is not present but I am using torch and it is present in my conda environment.
import os, pdb, matplotlib, tempfile, sys
import numpy as np
import matplotlib.pyplot as plt
import time
from datetime import datetime
from sklearn.metrics import mean_squared_error
import inspect
from pathlib import Path
import gym, ray, natsort
from gym import spaces
from scipy.spatial import distance
from ase import Atoms
# from gpaw import GPAW, PW, FD
from ase.optimize import QuasiNewton, BFGS
from ase.io.trajectory import Trajectory
from ase.io import read, write
from ase.build import minimize_rotation_and_translation
import torch
import torchani
from ray import tune
from typing import Dict
from ray.tune.logger import pretty_print
from ray.tune.logger import Logger, UnifiedLogger
from ray.rllib.utils.annotations import override
from ray.rllib.models import ModelCatalog
from ray.tune.registry import register_env
import ray.rllib.agents.ppo as ppo
from ray.rllib.env import BaseEnv
from ray.rllib.policy import Policy
# from ray.rllib.agents.callbacks import DefaultCallbacks
from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.utils.schedules import ConstantSchedule, LinearSchedule, ExponentialSchedule, PiecewiseSchedule
from ray.rllib.algorithms.algorithm import Algorithm
from eval_utils import do_optim, read_traj, calc_rmsd
model_A = PolicyNetwork
ModelCatalog.register_custom_model("modelA", model_A)
act_space = spaces.Box(low=-0.05,high=0.05, shape=(3,))
obs_space = spaces.Box(low=-1000,high=1000, shape=(128+3+3+1,))
def gen_policy(atom):
model = "model{}".format(atom)
config = {"model": {"custom_model": model,},}
return (None, obs_space, act_space, config)
policies = {"policy_A": gen_policy("A")}
policy_ids = list(policies.keys())
def policy_mapping_fn(agent_id, **kwargs):
pol_id = "policy_A"
return pol_id
def env_creator(env_config):
return ma_env.MA_env(env_config) # return an env instance
register_env("MA_env", env_creator)
config = ppo.DEFAULT_CONFIG.copy()
config["multiagent"] = {
"policy_mapping_fn": policy_mapping_fn,
"policies": policies,
"policies_to_train": ["policy_A"],#, "policy_N", "policy_O", "policy_H"],
}
config["in_evaluation"] = True
config["explore"] = False
config["log_level"] = "WARN"
config["framework"] = "torch"
# config["num_gpus"] = int(os.environ.get("RLLIB_NUM_GPUS", "0"))
config["num_gpus"] = 1
config["num_workers"] = 1
config["env_config"] = {"atoms":["C", "H"]}
config["rollout_fragment_length"] = 200
config["vf_share_layers"] = True
def custom_log_creator(custom_path, custom_str):
timestr = datetime.today().strftime("%Y-%m-%d_%H-%M-%S")
logdir_prefix = "{}_{}".format(custom_str, timestr)
def logger_creator(config):
if not os.path.exists(custom_path):
os.makedirs(custom_path)
logdir = tempfile.mkdtemp(prefix=logdir_prefix, dir=custom_path)
return UnifiedLogger(config, logdir, loggers=None)
return logger_creator
ray.init()
checkpoint_dir = f"checkpoint_{self.iteration_num:06d}/"
self.agent = Algorithm().from_checkpoint(model_restore + checkpoint_dir)
self.env = ma_env.MA_env({})
for i in range(20):
# while done["__all__"] != True:
for agent_id, agent_obs in obs.items():
# print(agent_id, agent_obs)
policy_id = config['multiagent']['policy_mapping_fn'](agent_id)
# action[agent_id] = agent.compute_action(agent_obs, policy_id=policy_id)
action[agent_id] = self.agent.compute_single_action(agent_obs, policy_id=policy_id)
obs, rew, done, info = self.env.step(action)