Hi Rohan,
Installed the latest daily Ray version from this wheel file: ray-3.0.0.dev0-cp39-cp39-manylinux2014_x86_64.whl
Installed RLLib and Tune dependencies using the following:
pip install ray[rllib]==3.0.0.dev0
pip install ray[tune]==3.0.0.dev0
But when I try to replicate your off-policy estimation example described in this link: ray/rllib-offline.rst at master · ray-project/ray · GitHub the following error occurs:
Traceback (most recent call last):
File "/home/stefan/PycharmProjects/RLInvManagement2/ope/off_policy_estimation.py", line 54, in <module>
algo = config.build()
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm_config.py", line 307, in build
return self.algo_class(
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py", line 308, in __init__
super().__init__(config=config, logger_creator=logger_creator, **kwargs)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/tune/trainable/trainable.py", line 157, in __init__
self.setup(copy.deepcopy(self.config))
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py", line 580, in setup
self.reward_estimators[name] = method_type(
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/offline/estimators/direct_method.py", line 55, in __init__
self.model = model_cls(
TypeError: __init__() got an unexpected keyword argument 'tau'
Commented out the code that sets the tau parameter in the q_model_config dictionary, but then the following error occurs. The complete sample code is shown below.
Traceback (most recent call last):
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 1065, in _worker
self._loss(self, model, self.dist_class, sample_batch)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/dqn/dqn_torch_policy.py", line 270, in build_q_losses
{"obs": train_batch[SampleBatch.NEXT_OBS]},
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/sample_batch.py", line 705, in __getitem__
value = dict.__getitem__(self, key)
KeyError: 'new_obs'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/stefan/PycharmProjects/RLInvManagement2/ope/off_policy_estimation.py", line 57, in <module>
algo.train()
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/tune/trainable/trainable.py", line 347, in train
result = self.step()
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py", line 661, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py", line 2378, in _run_one_training_iteration
num_recreated += self.try_recover_from_step_attempt(
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py", line 2190, in try_recover_from_step_attempt
raise error
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/algorithm.py", line 2373, in _run_one_training_iteration
results = self.training_step()
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/dqn/dqn.py", line 400, in training_step
train_results = multi_gpu_train_one_step(self, train_batch)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/execution/train_ops.py", line 176, in multi_gpu_train_one_step
results = policy.learn_on_loaded_batch(
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 564, in learn_on_loaded_batch
return self.learn_on_batch(batch)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/utils/threading.py", line 24, in wrapper
return func(self, *a, **k)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 462, in learn_on_batch
grads, fetches = self.compute_gradients(postprocessed_batch)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/policy_template.py", line 386, in compute_gradients
return parent_cls.compute_gradients(self, batch)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/utils/threading.py", line 24, in wrapper
return func(self, *a, **k)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 645, in compute_gradients
tower_outputs = self._multi_gpu_parallel_grad_calc([postprocessed_batch])
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 1149, in _multi_gpu_parallel_grad_calc
raise last_result[0] from last_result[1]
ValueError: new_obs
tracebackTraceback (most recent call last):
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/torch_policy.py", line 1065, in _worker
self._loss(self, model, self.dist_class, sample_batch)
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/algorithms/dqn/dqn_torch_policy.py", line 270, in build_q_losses
{"obs": train_batch[SampleBatch.NEXT_OBS]},
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/rllib/policy/sample_batch.py", line 705, in __getitem__
value = dict.__getitem__(self, key)
KeyError: 'new_obs'
In tower 0 on device cpu
Exception ignored in: <function RolloutWorker.__del__ at 0x7f35a8754b80>
Traceback (most recent call last):
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/util/tracing/tracing_helper.py", line 465, in _resume_span
TypeError: 'NoneType' object is not callable
Exception ignored in: <function RolloutWorker.__del__ at 0x7f35cfd0adc0>
Traceback (most recent call last):
File "/home/stefan/anaconda3/envs/py39_ray_nightly2/lib/python3.9/site-packages/ray/util/tracing/tracing_helper.py", line 465, in _resume_span
TypeError: 'NoneType' object is not callable
Create train data:
--------------------------
rllib train \
--run=PG \
--env=CartPole-v0 \
--config='{"output": "/tmp/cartpole-out", "output_max_file_size": 5000000}' \
--stop='{"timesteps_total": 100000}'
Create evaluation data:
---------------------------------
rllib train \
--run=PG \
--env=CartPole-v0 \
--config='{"output": "/tmp/cartpole-eval", "output_max_file_size": 5000000}' \
--stop='{"timesteps_total": 10000}'
Sample code:
-------------------
from ray.rllib.algorithms.dqn import DQNConfig
from ray.rllib.offline.estimators import (
ImportanceSampling,
WeightedImportanceSampling,
DirectMethod,
DoublyRobust,
)
from ray.rllib.offline.estimators.fqe_torch_model import FQETorchModel
config = (
DQNConfig()
.environment(env="CartPole-v0")
.framework("torch")
.offline_data(input_="/tmp/cartpole-out")
.evaluation(
evaluation_interval=1,
evaluation_duration=10,
evaluation_num_workers=1,
evaluation_duration_unit="episodes",
evaluation_config={"input": "/tmp/cartpole-eval"},
off_policy_estimation_methods={
"is": {"type": ImportanceSampling},
"wis": {"type": WeightedImportanceSampling},
"dm_fqe": {
"type": DirectMethod,
"q_model_config": {"type": FQETorchModel, "tau": 0.05},
},
"dr_fqe": {
"type": DoublyRobust,
"q_model_config": {"type": FQETorchModel, "tau": 0.05},
},
},
)
)
algo = config.build()
for _ in range(100):
algo.train()
What am I doing wrong?
Thanks,
Stefan