Hi @cade
Sorry for my late reply.
I’ve just been running the code below:
import os
import numpy as np
import gymnasium
from gymnasium.spaces import Discrete, Box
from gymnasium.wrappers import ResizeObservation
import ray
from ray.tune.registry import register_env
import ray.rllib.algorithms.impala as impala
from ray.tune.logger import pretty_print
config = {
"observation_space": Box(
low=0,
high=255,
shape=(72, 128, 3),
dtype=np.uint8),
"action_space": Discrete(5),
"p_terminated": 1e-4, # to prevent early termination - decrease if needed
"max_episode_len":495,
# "sleeping":1.0 # to mimic slow env - increase if needed
}
def env_creator(env_config):
kwargs = {}
env = gymnasium.make('my_random:my_random/MyRandomEnv-v0',config=env_config,**kwargs)
env = ResizeObservation(env, 84)
env = gymnasium.wrappers.FrameStack(env, 4)
return env
os.environ["RAY_DEDUP_LOGS"] = "0"
ray.init()
register_env("myrandomenv", env_creator)
algo = (
impala.ImpalaConfig()
.training(
lr=5e-4,
lr_schedule=[[0, 5e-4],[200e6, 0.0]],
vf_loss_coeff=0.5,
train_batch_size=600, #1200,
entropy_coeff=5e-3,
entropy_coeff_schedule=[[0, 5e-3],[100e6, 1e-3],[200e6, 5e-5]],
grad_clip=40.0)
.environment(env="myrandomenv",env_config=config)
.framework(framework="tf2", eager_tracing=True)
.rollouts(
num_rollout_workers=6, # 6 for single machine
num_envs_per_worker=4,
rollout_fragment_length=100,
remote_worker_envs=True,
remote_env_batch_wait_ms=10,
preprocessor_pref=None,
)
.resources(num_gpus=1,num_cpus_per_worker=5)
.fault_tolerance(recreate_failed_workers=True, restart_failed_sub_environments=True)
.build()
)
for i in range(10):
result = algo.train()
print(pretty_print(result))
if i % 5 == 0:
checkpoint_dir = algo.save("./ray_30_test")
print(f"Checkpoint saved in directory {checkpoint_dir}")
Using the environment found here.
And I get this output:
2023-05-25 10:02:12,852 INFO worker.py:1616 -- Started a local Ray instance. View the dashboard at 127.0.0.1:8265
2023-05-25 10:02:13,333 INFO algorithm_config.py:3307 -- Executing eagerly (framework='tf2'), with eager_tracing=True. For production workloads, make sure to set eager_tracing=True in order to match the speed of tf-static-graph (framework='tf'). For debugging purposes, `eager_tracing=False` is the best choice.
2023-05-25 10:02:13,340 INFO algorithm.py:527 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
(RolloutWorker pid=42162) ###################################
(RolloutWorker pid=42162) Seeding succeded using env_config
(RolloutWorker pid=42162) ###################################
(RolloutWorker pid=42162) I'm now being reset by a worker
(RolloutWorker pid=42162) I'm now being reset by a worker
(RolloutWorker pid=42162) /home/novelty/miniconda3/envs/ray240/lib/python3.9/site-packages/gymnasium/spaces/box.py:230: UserWarning: WARN: Casting input x to numpy array.
(RolloutWorker pid=42162) gym.logger.warn("Casting input x to numpy array.")
(_RemoteSingleAgentEnv pid=43065) ################################### [repeated 58x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)
(_RemoteSingleAgentEnv pid=43065) Seeding succeded using env_config [repeated 29x across cluster]
(_RemoteSingleAgentEnv pid=42950) I'm now being reset by a worker [repeated 83x across cluster]
(_RemoteSingleAgentEnv pid=43041) I'm now being reset by a worker [repeated 3x across cluster]
agent_timesteps_total: 7200
connector_metrics:
StateBufferConnector_ms: 0.011156002680460611
ViewRequirementAgentConnector_ms: 0.2080609401067098
counters:
num_agent_steps_sampled: 7200
num_agent_steps_trained: 7200
num_env_steps_sampled: 7200
num_env_steps_trained: 7200
num_samples_added_to_queue: 7200
num_training_step_calls_since_last_synch_worker_weights: 1171
num_weight_broadcasts: 13
I’ve also tried setting
env RAY_DEDUP_LOGS=0 ray start --head
which doesn’t fix it either.
BR
Jorgen