How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
I’ve got a reasonable amount of experience with RL, RLLib, Gym, e.g., but I’m running into an issue transitioning into the AIR framework.
I have a testing suite set up in a containerized environment; I’m using the following package versions:
ray[default]==2.3.0 # Now at 2.2.0
ray[tune]==2.3.0
ray[rllib]==2.3.0
ray[air]==2.3.0
torch>=1.13.1
# This lets us investigate ppo training during development
tensorboard>=2.12.0
# This is required even though we're using Torch.
tensorflow_probability>=0.19.0
xgboost>=1.7.4
xgboost_ray>=0.1.15
My workflow creates data (validated, tests pass), fits an XGBoost model to the data (validated, tests pass), then spins up the RL experiment (Nonfunctional.)
The offending code:
####################################################################
# Test RL Environments #
# ---------------------------------------------------------------- #
# Test the reinforcement learning environments. #
####################################################################
@pytest.mark.usefixtures("environment", "logged_model", "raycluster", "tempfolder")
@pytest.mark.parametrize(
"simulator",
[("Sim1", 'model'), ("Sim2", None)],
ids=["Sim1", "Sim2"]
)
def test_rllib_on_env(environment, simulator, logged_model, metadata, raycluster, tempfolder):
#############################
# Prepare data and simulator#
#############################
train_dataset = metadata.get_dataset("TRAIN_DATASET")
model_folder = os.path.join(tempfolder, 'rllibtest', 'model')
simulator, sim_model = simulator
env_kwargs = {}
if sim_model is not None:
# Create a folder to dump things into.
# Let's redefine the model.
# This doesn't actually fit anything, it just reuses the
# structure!
env_kwargs['model'] = CompetitionModel(
training_dataset=metadata.get_dataset("TRAIN_DATASET"),
testing_dataset=metadata.get_dataset("TEST_DATASET"),
target_name="TARGET",
artifact_dir=os.path.join(tempfolder, "rayresults"),
random_state=10,
)
# Go get the artifacts *for this guy*
metadata.get_artifacts(logged_lapse_model, model_folder)
# And spin him back up
env_kwargs['model'].fit(restore_dir=os.path.join(model_folder, 'artifacts'))
############################
# Register the Environment #
############################
env_name, env_class = environment
register_env(env_name, lambda config: env_class(**config))
#############################
# Register the Custom Model #
#############################
# ModelCatalog.register_custom_model("my_model", TorchCustomModel)
#########################################
# Declare an RL Algorithm Configuration #
#########################################
_env_config = {
"dataset": train_dataset,
"target_column": "TARGET",
"current_premium_column": "FEATURE_1",
"indicated_premium_column": "FEATURE_3",
"simulator": simulator,
**env_kwargs
}
config = (
get_trainable_cls("PPO")
.get_default_config()
.environment(
env_name,
env_config=_env_config,
)
.framework("torch")
.rollouts(num_rollout_workers=1)
# .training(
# num_sgd_iter = 2,
# model={
# "custom_model": "my_model",
# "vf_share_layers": True,
# },
# )
)
########################################
# Declare Experiment Stopping Criteria #
########################################
stop = {
"training_iteration": 1,
"timesteps_total": 10,
"episode_reward_mean": 100,
}
ppo_folder = os.path.join(tempfolder, 'rllibtest', simulator, 'ppo')
os.makedirs(ppo_folder, exist_ok=True)
tuner = tune.Tuner(
"PPO",
param_space=config.to_dict(),
run_config=air.RunConfig(
name='ppotest',
local_dir=ppo_folder,
stop=stop,
verbose=2,
log_to_file=True,
failure_config=air.FailureConfig(
max_failures=0,
fail_fast=True
)
)
)
results = tuner.fit()
print("Checking if learning goals were achieved")
print(check_learning_achieved(results, 100))
raise Exception("WE BE DONE!", config.to_dict())
This runs (apparently endlessly) and logs no events to tensorboard.
Is there a way to force the framework to dump verbose logs that I’m not using already? I’m trying to troubleshoot what’s going on in the PPO training algorithm to cause it to silently die.