@arturn Does RLlib populate this timesteps_this_iter
thing at all? I canāt seem to find it. btw, these are all the fields I see from a random PPO result
Result for PPO_CartPole-v0_364df_00000:
agent_timesteps_total: 4000
counters:
num_agent_steps_sampled: 4000
num_agent_steps_trained: 4000
num_env_steps_sampled: 4000
num_env_steps_trained: 4000
custom_metrics: {}
date: 2022-11-09_16-40-56
done: false
episode_len_mean: 23.48235294117647
episode_media: {}
episode_reward_max: 75.0
episode_reward_mean: 23.48235294117647
episode_reward_min: 9.0
episodes_this_iter: 170
episodes_total: 170
experiment_id: 8bffa69bf5e04857aa83536c8ef3736a
hostname: xw
info:
learner:
default_policy:
custom_metrics: {}
learner_stats:
cur_kl_coeff: 0.20000000298023224
cur_lr: 4.999999873689376e-05
entropy: 0.665932297706604
entropy_coeff: 0.0
kl: 0.027817677706480026
policy_loss: -0.039548929780721664
total_loss: 8.858890533447266
vf_explained_var: 0.008993614464998245
vf_loss: 8.892877578735352
num_agent_steps_trained: 125.0
num_agent_steps_sampled: 4000
num_agent_steps_trained: 4000
num_env_steps_sampled: 4000
num_env_steps_trained: 4000
iterations_since_restore: 1
node_ip: 127.0.0.1
num_agent_steps_sampled: 4000
num_agent_steps_trained: 4000
num_env_steps_sampled: 4000
num_env_steps_sampled_this_iter: 4000
num_env_steps_trained: 4000
num_env_steps_trained_this_iter: 4000
num_faulty_episodes: 0
num_healthy_workers: 2
num_recreated_workers: 0
num_steps_trained_this_iter: 4000
perf:
cpu_util_percent: 14.412244897959184
ram_util_percent: 55.08775510204081
pid: 43619
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
mean_action_processing_ms: 0.06086085844030104
mean_env_render_ms: 0.0
mean_env_wait_ms: 0.06126414658226595
mean_inference_ms: 3.9059675275660926
mean_raw_obs_processing_ms: 0.33546808511207116
sampler_results:
custom_metrics: {}
episode_len_mean: 23.48235294117647
episode_media: {}
episode_reward_max: 75.0
episode_reward_mean: 23.48235294117647
episode_reward_min: 9.0
episodes_this_iter: 170
hist_stats:
episode_lengths: [17, 15, 20, 15, 19, 15, 18, 15, 18, 15, 25, 23, 14, 20, 75,
30, 10, 20, 54, 28, 31, 44, 50, 10, 28, 16, 34, 12, 17, 44, 26, 41, 9, 51, 71,
16, 12, 19, 29, 14, 27, 23, 38, 11, 24, 30, 40, 15, 21, 11, 25, 36, 30, 58,
19, 18, 37, 29, 24, 32, 17, 13, 26, 13, 22, 25, 14, 25, 21, 15, 13, 15, 9, 24,
17, 34, 35, 18, 19, 21, 14, 18, 40, 15, 14, 17, 23, 17, 10, 15, 17, 15, 38,
33, 15, 26, 12, 16, 13, 24, 15, 41, 13, 20, 31, 18, 18, 17, 45, 25, 25, 18,
15, 12, 18, 33, 30, 12, 22, 50, 13, 23, 25, 18, 56, 13, 11, 24, 22, 67, 23,
13, 21, 30, 15, 26, 25, 26, 14, 59, 27, 45, 19, 34, 11, 16, 15, 12, 10, 26,
12, 17, 15, 10, 23, 13, 39, 28, 58, 17, 14, 21, 12, 25, 19, 20, 14, 14, 12,
16]
episode_reward: [17.0, 15.0, 20.0, 15.0, 19.0, 15.0, 18.0, 15.0, 18.0, 15.0, 25.0,
23.0, 14.0, 20.0, 75.0, 30.0, 10.0, 20.0, 54.0, 28.0, 31.0, 44.0, 50.0, 10.0,
28.0, 16.0, 34.0, 12.0, 17.0, 44.0, 26.0, 41.0, 9.0, 51.0, 71.0, 16.0, 12.0,
19.0, 29.0, 14.0, 27.0, 23.0, 38.0, 11.0, 24.0, 30.0, 40.0, 15.0, 21.0, 11.0,
25.0, 36.0, 30.0, 58.0, 19.0, 18.0, 37.0, 29.0, 24.0, 32.0, 17.0, 13.0, 26.0,
13.0, 22.0, 25.0, 14.0, 25.0, 21.0, 15.0, 13.0, 15.0, 9.0, 24.0, 17.0, 34.0,
35.0, 18.0, 19.0, 21.0, 14.0, 18.0, 40.0, 15.0, 14.0, 17.0, 23.0, 17.0, 10.0,
15.0, 17.0, 15.0, 38.0, 33.0, 15.0, 26.0, 12.0, 16.0, 13.0, 24.0, 15.0, 41.0,
13.0, 20.0, 31.0, 18.0, 18.0, 17.0, 45.0, 25.0, 25.0, 18.0, 15.0, 12.0, 18.0,
33.0, 30.0, 12.0, 22.0, 50.0, 13.0, 23.0, 25.0, 18.0, 56.0, 13.0, 11.0, 24.0,
22.0, 67.0, 23.0, 13.0, 21.0, 30.0, 15.0, 26.0, 25.0, 26.0, 14.0, 59.0, 27.0,
45.0, 19.0, 34.0, 11.0, 16.0, 15.0, 12.0, 10.0, 26.0, 12.0, 17.0, 15.0, 10.0,
23.0, 13.0, 39.0, 28.0, 58.0, 17.0, 14.0, 21.0, 12.0, 25.0, 19.0, 20.0, 14.0,
14.0, 12.0, 16.0]
num_faulty_episodes: 0
policy_reward_max: {}
policy_reward_mean: {}
policy_reward_min: {}
sampler_perf:
mean_action_processing_ms: 0.06086085844030104
mean_env_render_ms: 0.0
mean_env_wait_ms: 0.06126414658226595
mean_inference_ms: 3.9059675275660926
mean_raw_obs_processing_ms: 0.33546808511207116
time_since_restore: 34.13855314254761
time_this_iter_s: 34.13855314254761
time_total_s: 34.13855314254761
timers:
learn_throughput: 157.822
learn_time_ms: 25345.008
synch_weights_time_ms: 3.063
training_iteration_time_ms: 34133.688
timestamp: 1668040856
timesteps_since_restore: 0
timesteps_total: 4000
training_iteration: 1
trial_id: 364df_00000
warmup_time: 9.969342947006226