I’m trying to evaluate model inference performance in rllib. To do so, I use the rollout.py
script to evaluate previously trained PPO models with gym env Pong-v0 and measure this time. I read that rollout.py
doesn’t allow parallel execution and that creating more workers doesn’t improve its performance. That’s why looking at many sources I saw that the latest commit for this script ([RLlib] Allow `rllib rollout` to run distributed via evaluation worke… · ray-project/ray@d001af3 · GitHub) included evaluation workers to perform rollouts (in the sense of evaluating a model), so I tried to adapt these scripts (including some timers). I tried both options: first with the rollout.py script form ray 1.2.0 (which calls to agent.compute_action()
) and second with the version in the commit that I mentioned before (which calls to agent._evaluate()
). In the first option as I realised that increasing the number of workers didn’t improve the performance, I tried to give more resources to the driver (2 GPUs and 8 CPUs) but the timing results reamined the same. In the second option I tried to give more resources to both driver and evaluation workers and I didn’t get any improvement.
So, my questions are:
- Am I right when considering that rollout.py from ray 1.2.0 doesn’t paralellize tasks and doesn’t make sense to increase number or workers? In this case, should I get time improvements when adding resources to the driver (up to 2 GPUs and 8 CPUs)?
- When using the version from the mentioned commit, does it make sense to improve evaluation workers resource? Should I observe some improvements whne doing so?
- Are both versions evaluating the same with the difference that the second one parallelizes tasks?
Thanks you so much in advance for your answers, I’m new to ray and I’m need to know how to properly measure times in model inference for an university project.
P.D.: I show here how I measure time in both options that I mentioned
- Modified
rollout.py
from ray 1.2.0
steps = 0
episodes = 0
######################################
model_times_totals_per_episode = []
steps_per_episode =[]
model_times_per_episode = []
results = []
######################################
while keep_going(steps, num_steps, episodes, num_episodes):
mapping_cache = {} # in case policy_agent_mapping is stochastic
saver.begin_rollout()
obs = env.reset()
agent_states = DefaultMapping(
lambda agent_id: state_init[mapping_cache[agent_id]])
prev_actions = DefaultMapping(
lambda agent_id: action_init[mapping_cache[agent_id]])
prev_rewards = collections.defaultdict(lambda: 0.)
done = False
reward_total = 0.0
###################################
this_episode_time = 0.0
steps_this_episode = 0
model_times_this_episode = []
results_this_episode = {}
###################################
while not done and keep_going(steps, num_steps, episodes,
num_episodes):
multi_obs = obs if multiagent else {_DUMMY_AGENT_ID: obs}
action_dict = {}
for agent_id, a_obs in multi_obs.items():
if a_obs is not None:
policy_id = mapping_cache.setdefault(
agent_id, policy_agent_mapping(agent_id))
p_use_lstm = use_lstm[policy_id]
# Aqui es donde empezamos a aplicar el modelo para ver que accion tomar.
################
t0 = time.time()
################
if p_use_lstm:
a_action, p_state, _ = agent.compute_action(
a_obs,
state=agent_states[agent_id],
prev_action=prev_actions[agent_id],
prev_reward=prev_rewards[agent_id],
policy_id=policy_id)
agent_states[agent_id] = p_state
else:
a_action = agent.compute_action(
a_obs,
prev_action=prev_actions[agent_id],
prev_reward=prev_rewards[agent_id],
policy_id=policy_id)
########################
t1 = time.time()
model_times_this_episode.append(t1-t0)
this_episode_time += (t1-t0)
########################
a_action = flatten_to_single_ndarray(a_action)
action_dict[agent_id] = a_action
prev_actions[agent_id] = a_action
action = action_dict
action = action if multiagent else action[_DUMMY_AGENT_ID]
next_obs, reward, done, info = env.step(action)
if multiagent:
for agent_id, r in reward.items():
prev_rewards[agent_id] = r
else:
prev_rewards[_DUMMY_AGENT_ID] = reward
if multiagent:
done = done["__all__"]
reward_total += sum(
r for r in reward.values() if r is not None)
else:
reward_total += reward
if not no_render:
env.render()
saver.append_step(obs, action, next_obs, reward, done, info)
steps += 1
######################
steps_this_episode +=1
######################
obs = next_obs
saver.end_rollout()
print("Episode #{}: reward: {}".format(episodes, reward_total))
####################################################################
print("Episode #{}: model_time: {}".format(episodes, this_episode_time))
print("Episode #{}: steps: {}".format(episodes, steps_this_episode))
print("Episode #{}: average model time per step: {}".format(episodes, (this_episode_time/steps_this_episode)))
print("-------------------------------------------------------------")
model_times_totals_per_episode.append(this_episode_time)
steps_per_episode.append(steps_this_episode)
model_times_per_episode.append(model_times_this_episode)
results_this_episode['episode']=episodes
results_this_episode['total_model_time'] = this_episode_time
results_this_episode['num_steps'] = steps_this_episode
results_this_episode['average_model_time_per_step'] = this_episode_time/steps_this_episode
results_this_episode['reward'] = reward_total
results.append(results_this_episode)
####################################################################
if done:
episodes += 1
########################
print("Episodes times:")
print(model_times_totals_per_episode)
print("Total model time: {}".format(sum(model_times_totals_per_episode)))
print("Average model time per episode: {}".format(sum(model_times_totals_per_episode)/episodes))
print("Average model time per step: {}".format(sum(model_times_totals_per_episode)/sum(steps_per_episode)))
print("Average steps per episode: {}".format(sum(steps_per_episode)/episodes))
- Modified
rollout.py
from [RLlib] Allow `rllib rollout` to run distributed via evaluation worke… · ray-project/ray@d001af3 · GitHub
steps = 0
episodes = 0
model_times_totals_per_episode = []
steps_per_episode =[]
results = []
while keep_going(steps, num_steps, episodes, num_episodes):
saver.begin_rollout()
results_this_episode = {}
################
t0 = time.time()
################
eval_result = agent._evaluate()["evaluation"]
########################
t1 = time.time()
this_episode_time = (t1-t0)
########################
# Increase timestep and episode counters.
eps = agent.config["evaluation_num_episodes"]
episodes += eps
steps_this_episode = eps * eval_result["episode_len_mean"]
steps += eps * eval_result["episode_len_mean"]
# Print out results and continue.
print("Episode #{}: reward: {}".format(episodes, eval_result["episode_reward_mean"]))
saver.end_rollout()
print("Episode #{}: model_time: {}".format(episodes, this_episode_time))
print("Episode #{}: steps: {}".format(episodes, steps_this_episode))
print("Episode #{}: average model time per step: {}".format(episodes, (this_episode_time/steps_this_episode)))
print("-------------------------------------------------------------")
model_times_totals_per_episode.append(this_episode_time)
steps_per_episode.append(steps_this_episode)
results_this_episode['episode']=episodes
results_this_episode['total_model_time'] = this_episode_time
results_this_episode['num_steps'] = steps_this_episode
results_this_episode['average_model_time_per_step'] = this_episode_time/steps_this_episode
results_this_episode['reward'] = eval_result["episode_reward_mean"]
results.append(results_this_episode)
########################
print("Total model time: {}".format(sum(model_times_totals_per_episode)))
print("Average model time per episode: {}".format(sum(model_times_totals_per_episode)/episodes))
print("Average model time per step: {}".format(sum(model_times_totals_per_episode)/sum(steps_per_episode)))
print("Average steps per episode: {}".format(sum(steps_per_episode)/episodes))
return