How severe does this issue affect your experience of using Ray?
- High: It blocks me to complete my task.
My env details are:
ray = 1.13.0
gym= 0.15.3
recsim=0.2.4
My code:
import ray
from ray.rllib.agents.slateq import SlateQTrainer
from ray.rllib.examples.env.recommender_system_envs_with_recsim import InterestEvolutionRecSimEnv
config = {
"env": InterestEvolutionRecSimEnv,
"env_config": {
"num_candidates": 10,
"slate_size": 1,
"wrap_for_bandits": False,
"resample_documents": True,
"convert_to_discrete_action_space": False,
},
"framework": "torch",
}
trainer = SlateQTrainer(config=config)
I keep getting error that observation_space[“doc”] has no len.
Error message here:
NotImplementedError Traceback (most recent call last)
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in setup(self, config)
934 try:
--> 935 self._init(self.config, self.env_creator)
936 # New design: Override `Trainable.setup()` (as indented by Trainable)
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in _init(self, config, env_creator)
1073 def _init(self, config: TrainerConfigDict, env_creator: EnvCreator) -> None:
-> 1074 raise NotImplementedError
1075
NotImplementedError:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/tmp/ipykernel_82885/3574079892.py in <module>
14 "framework": "torch",
15 }
---> 16 trainer = SlateQTrainer(config=config)
17
18
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in __init__(self, config, env, logger_creator, remote_checkpoint_dir, sync_function_tpl)
869
870 super().__init__(
--> 871 config, logger_creator, remote_checkpoint_dir, sync_function_tpl
872 )
873
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/tune/trainable.py in __init__(self, config, logger_creator, remote_checkpoint_dir, sync_function_tpl)
154 start_time = time.time()
155 self._local_ip = self.get_current_ip()
--> 156 self.setup(copy.deepcopy(self.config))
157 setup_time = time.time() - start_time
158 if setup_time > SETUP_TIME_THRESHOLD:
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in setup(self, config)
955 num_workers=self.config["num_workers"],
956 local_worker=True,
--> 957 logdir=self.logdir,
958 )
959 # By default, collect metrics for all remote workers.
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py in __init__(self, env_creator, validate_env, policy_class, trainer_config, num_workers, local_worker, logdir, _setup)
176 num_workers=num_workers,
177 config=self._local_config,
--> 178 spaces=spaces,
179 )
180
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py in _make_worker(self, cls, env_creator, validate_env, policy_cls, worker_index, num_workers, recreated_worker, config, spaces)
670 extra_python_environs=extra_python_environs,
671 spaces=spaces,
--> 672 disable_env_checking=config["disable_env_checking"],
673 )
674
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py in __init__(self, env_creator, validate_env, policy_spec, policy_mapping_fn, policies_to_train, tf_session_creator, rollout_fragment_length, count_steps_by, batch_mode, episode_horizon, preprocessor_pref, sample_async, compress_observations, num_envs, observation_fn, observation_filter, clip_rewards, normalize_actions, clip_actions, env_config, model_config, policy_config, worker_index, num_workers, recreated_worker, record_env, log_dir, log_level, callbacks, input_creator, input_evaluation, output_creator, remote_worker_envs, remote_env_batch_wait_ms, soft_horizon, no_done_at_end, seed, extra_python_environs, fake_sampler, spaces, policy, monitor_path, disable_env_checking)
632 policy_config,
633 session_creator=tf_session_creator,
--> 634 seed=seed,
635 )
636
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py in _build_policy_map(self, policy_dict, policy_config, session_creator, seed)
1787 # Create the actual policy object.
1788 self.policy_map.create_policy(
-> 1789 name, orig_cls, obs_space, act_space, conf, merged_conf
1790 )
1791
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/policy/policy_map.py in create_policy(self, policy_id, policy_cls, observation_space, action_space, config_override, merged_config)
150 else:
151 class_ = policy_cls
--> 152 self[policy_id] = class_(observation_space, action_space, merged_config)
153
154 # Store spec (class, obs-space, act-space, and config overrides) such
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/policy/policy_template.py in __init__(self, obs_space, action_space, config)
277 elif make_model_and_action_dist:
278 self.model, dist_class = make_model_and_action_dist(
--> 279 self, obs_space, action_space, config
280 )
281 # Use default model and default action dist.
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/slateq/slateq_torch_policy.py in build_slateq_model_and_distribution(policy, obs_space, action_space, config)
53 model_config=config["model"],
54 name="slateq_model",
---> 55 fcnet_hiddens_per_candidate=config["fcnet_hiddens_per_candidate"],
56 )
57
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/slateq/slateq_torch_model.py in __init__(self, obs_space, action_space, num_outputs, model_config, name, fcnet_hiddens_per_candidate, double_q)
169 self.choice_model = UserChoiceModel()
170
--> 171 self.q_model = QValueModel(self.obs_space, fcnet_hiddens_per_candidate)
172
173 def get_q_values(self, user: TensorType, docs: List[TensorType]) -> TensorType:
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/slateq/slateq_torch_model.py in __init__(self, obs_space, fcnet_hiddens_per_candidate)
33 self.orig_obs_space = obs_space
34 self.embedding_size = self.orig_obs_space["doc"]["0"].shape[0]
---> 35 self.num_candidates = len(self.orig_obs_space["doc"])
36 assert self.orig_obs_space["user"].shape[0] == self.embedding_size
37
**TypeError: object of type 'Dict' has no len()**
I am also getting same error when using BanditLinUCBTrainer with same recsim env.
Code:
from ray.rllib.agents.bandit import BanditLinUCBTrainer
from ray.rllib.examples.env.recommender_system_envs_with_recsim import InterestEvolutionRecSimEnv
bandits_config = {
"env": InterestEvolutionRecSimEnv,
"env_config": {
"num_candidates": 10, # action_sapce = Discrete(10) -> int 0-9
"slate_size": 1, # only 1 item recommendation
# Set to False for re-using the same candidate doecuments each timestep.
# Set True to change the candidate documents at each time step
"resample_documents": True,
# Convert MultiDiscrete actions to Discrete (flatten action space).
# e.g. slate_size=2 and num_candidates=10 -> MultiDiscrete([10, 10]) -> Discrete(100) # 10x10
"convert_to_discrete_action_space": True,
# Convert "doc" key into "item" key.
"wrap_for_bandits": True,
"seed": 0,
},
# trainer information now
"seed": 0,
# rewards are smoothed mean over last these many episodes
'metrics_num_episodes_for_smoothing': 100,
# Generate a result dict every single time step.
"timesteps_per_iteration": 1,
}
bandits_trainer = BanditLinUCBTrainer(config = bandits_config)
bandits_trainer
Error is:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in setup(self, config)
934 try:
--> 935 self._init(self.config, self.env_creator)
936 # New design: Override `Trainable.setup()` (as indented by Trainable)
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in _init(self, config, env_creator)
1073 def _init(self, config: TrainerConfigDict, env_creator: EnvCreator) -> None:
-> 1074 raise NotImplementedError
1075
NotImplementedError:
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
/tmp/ipykernel_82885/1175594674.py in <module>
26 }
27
---> 28 bandits_trainer = BanditLinUCBTrainer(config = bandits_config)
29 bandits_trainer
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in __init__(self, config, env, logger_creator, remote_checkpoint_dir, sync_function_tpl)
869
870 super().__init__(
--> 871 config, logger_creator, remote_checkpoint_dir, sync_function_tpl
872 )
873
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/tune/trainable.py in __init__(self, config, logger_creator, remote_checkpoint_dir, sync_function_tpl)
154 start_time = time.time()
155 self._local_ip = self.get_current_ip()
--> 156 self.setup(copy.deepcopy(self.config))
157 setup_time = time.time() - start_time
158 if setup_time > SETUP_TIME_THRESHOLD:
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in setup(self, config)
955 num_workers=self.config["num_workers"],
956 local_worker=True,
--> 957 logdir=self.logdir,
958 )
959 # By default, collect metrics for all remote workers.
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py in __init__(self, env_creator, validate_env, policy_class, trainer_config, num_workers, local_worker, logdir, _setup)
176 num_workers=num_workers,
177 config=self._local_config,
--> 178 spaces=spaces,
179 )
180
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/evaluation/worker_set.py in _make_worker(self, cls, env_creator, validate_env, policy_cls, worker_index, num_workers, recreated_worker, config, spaces)
670 extra_python_environs=extra_python_environs,
671 spaces=spaces,
--> 672 disable_env_checking=config["disable_env_checking"],
673 )
674
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/evaluation/rollout_worker.py in __init__(self, env_creator, validate_env, policy_spec, policy_mapping_fn, policies_to_train, tf_session_creator, rollout_fragment_length, count_steps_by, batch_mode, episode_horizon, preprocessor_pref, sample_async, compress_observations, num_envs, observation_fn, observation_filter, clip_rewards, normalize_actions, clip_actions, env_config, model_config, policy_config, worker_index, num_workers, recreated_worker, record_env, log_dir, log_level, callbacks, input_creator, input_evaluation, output_creator, remote_worker_envs, remote_env_batch_wait_ms, soft_horizon, no_done_at_end, seed, extra_python_environs, fake_sampler, spaces, policy, monitor_path, disable_env_checking)
504 ):
505 # Run the `env_creator` function passing the EnvContext.
--> 506 self.env = env_creator(copy.deepcopy(self.env_context))
507
508 if self.env is not None:
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/agents/trainer.py in <lambda>(cfg)
2773 register_env(name, lambda cfg: _wrapper.remote(cfg))
2774 else:
-> 2775 register_env(name, lambda cfg: env_object(cfg))
2776 return name
2777 elif env_object is None:
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/env/wrappers/recsim.py in __init__(self, config)
258 gym_env,
259 config["convert_to_discrete_action_space"],
--> 260 config["wrap_for_bandits"],
261 )
262 # Call the super (Wrapper constructor) passing it the created env.
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/env/wrappers/recsim.py in recsim_gym_wrapper(recsim_gym_env, convert_to_discrete_action_space, wrap_for_bandits)
193 env = MultiDiscreteToDiscreteActionWrapper(env)
194 if wrap_for_bandits:
--> 195 env = RecSimObservationBanditWrapper(env)
196 return env
197
~/SageMaker/python_for_rl_venv/lib/python3.7/site-packages/ray/rllib/env/wrappers/recsim.py in __init__(self, env)
80 obs_space = self.env.observation_space
81
---> 82 num_items = len(obs_space["doc"])
83 embedding_dim = next(iter(obs_space["doc"].values())).shape[-1]
84 self.observation_space = Dict(
TypeError: object of type 'Dict' has no len()
I am not sure what error I am doing here?