Parallelize generating experience from a ray trained agent

How severe does this issue affect your experience of using Ray?

  • High: It blocks me to complete my task.

I am trying to create a dataset by loading a checkpoint of a trained agent which was also trained using parallelizing through num_rollout_workers = 4. Below is my code.

ray version = 2.5.1

from ray.rllib.algorithms.algorithm import Algorithm
from ray.tune.registry import register_env
from scipy.special import softmax
import gym
import pandas as pd
import glob
import gym_breastcancer
import ray

env = gym.make(‘myenv-v3’)
register_env(“myEnv”, lambda config: env)

ray.init(num_cpus = 1, object_store_memory=10**9)

@ray.remote
def generate_episode(x, agent_type):
dataset = {‘action’ : , ‘all_action_probabilities’ : ,
‘episode_id’ : , ‘episode_name’ : , ‘reward’ : ,
‘transition_number’ : ,‘features’ : }

state, _ = env.reset()
done = False
transition_number = 0
dataset['features'].append(state)
while not done:
    dataset['transition_number'].append(transition_number)
    transition_number += 1
    action, _, extra = agent_type.compute_single_action(env.state, full_fetch = True)
    dataset['action'].append(action)
    n_state, reward, done, _, _ = env.step(action)
    dataset['features'].append(n_state)
    dataset['episode_id'].append(x)
    dataset['episode_name'].append('cancer')
    dataset['reward'].append(reward)
    probs = softmax(extra['q_values'])
    dataset['all_action_probabilities'].append(list(probs))
    
    if done:
        dataset['action'].append(None)
        dataset['all_action_probabilities'].append(None)
        dataset['reward'].append(None)
        dataset['transition_number'].append(None)
        dataset['episode_id'].append(x)
        dataset['episode_name'].append('cancer')

return dataset

strings = [‘05’, ‘20’]
filter = [i for i in links for j in strings if i.endswith(j)]

agent = ray.put(Algorithm.from_checkpoint(filter[0]))

data1_obj = [generate_episode.remote(i, agent) for i in range(0, 100)]
data1 = ray.get(data1_obj)

The above code throws the following error when the ray.get() line runs:

(generate_episode pid=45507) 2023-07-16 20:05:21,923 ERROR serialization.py:387 – ‘DQN’ object has no attribute ‘local_replay_buffer’
(generate_episode pid=45507) Traceback (most recent call last):
(generate_episode pid=45507) File “/home/s/shossain1/miniconda3/envs/ray_new/lib/python3.7/site-packages/ray/_private/serialization.py”, line 385, in deserialize_objects
(generate_episode pid=45507) obj = self._deserialize_object(data, metadata, object_ref)
(generate_episode pid=45507) File “/home/s/shossain1/miniconda3/envs/ray_new/lib/python3.7/site-packages/ray/_private/serialization.py”, line 268, in _deserialize_object
(generate_episode pid=45507) return self._deserialize_msgpack_data(data, metadata_fields)
(generate_episode pid=45507) File “/home/s/shossain1/miniconda3/envs/ray_new/lib/python3.7/site-packages/ray/_private/serialization.py”, line 223, in _deserialize_msgpack_data
(generate_episode pid=45507) python_objects = self._deserialize_pickle5_data(pickle5_data)
(generate_episode pid=45507) File “/home/s/shossain1/miniconda3/envs/ray_new/lib/python3.7/site-packages/ray/_private/serialization.py”, line 211, in _deserialize_pickle5_data
(generate_episode pid=45507) obj = pickle.loads(in_band, buffers=buffers)
(generate_episode pid=45507) File “/home/s/shossain1/miniconda3/envs/ray_new/lib/python3.7/site-packages/ray/rllib/algorithms/algorithm.py”, line 2630, in setstate
(generate_episode pid=45507) if self.local_replay_buffer is not None:
(generate_episode pid=45507) AttributeError: ‘DQN’ object has no attribute ‘local_replay_buffer’