### What happened + What you expected to happen
I want to train a PPO agent i…n my custom environment called RankingEnv, but I'm encountering several errors and warnings that result in the agent's termination. The majority of these issues are:
`
observation = OrderedDict(sorted(observation.items()))
AttributeError: 'NoneType' object has no attribute 'items'
`
```
WARNING deprecation.py:50 -- DeprecationWarning: `ValueNetworkMixin` has been deprecated. This will raise an error in the future!
WARNING deprecation.py:50 -- DeprecationWarning: `LearningRateSchedule` has been deprecated. This will raise an error in the future!
WARNING deprecation.py:50 -- DeprecationWarning: `EntropyCoeffSchedule` has been deprecated. This will raise an error in the future!
WARNING deprecation.py:50 -- DeprecationWarning: `KLCoeffMixin` has been deprecated. This will raise an error in the future!
```
```
DeprecationWarning: `DirectStepOptimizer` has been deprecated. This will raise an error in the future!
```
```
WARNING algorithm_config.py:672 -- Cannot create PPOConfig from given `config_dict`! Property __stdout_file__ not supported.
```
### Costum Environment
```ruby
class RankingEnv(gym.Env):
def __init__(self, config: dict):
super().__init__()
self.coins = config['df']['tic'].unique()
features_col = config['df'].columns.difference(['date','tic','score','close_growth(%)'])
self.df=config['df'].groupby('date')
self.dates = list(self.df.groups.keys())
self.observation_space = gym.spaces.Dict({coin: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(len(features_col),), dtype=np.float64) for coin in self.coins})
self.action_space = gym.spaces.Dict({coin: gym.spaces.Box(low=np.float64(1.0), high=np.float64(len(self.coins)), shape=(1,), dtype=np.float64) for coin in self.coins})#actions are the scores
def step(self, action):
group = self.df.get_group(self.dates[self.time])
true_scores = group.set_index('tic')['score'].to_dict()
# The indexes are according to predicted score and the values are true score!(Sorting true score by predicted score)
scores = [true_scores[coin] for coin in sorted(action, key=action.get, reverse=True)]
ideal_scores = sorted(true_scores.values(), reverse=True) #This is just the True_score as list
dcg = self.calculate_dcg(scores)
idcg = self.calculate_dcg(ideal_scores)
reward = dcg/idcg # reward = ndcg
self.time+=1
terminated = self.time >= len(self.dates)
info = {}
return self._get_obs() if not terminated else None, reward, terminated, False, info
def calculate_dcg(self, scores):
dcg = 0.0
for i in range(len(scores)):
dcg += (2 ** scores[i] - 1) / np.log2(i + 2)
return dcg
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed,options=options)
self.time = 0
info={}
return self._get_obs() ,info
def _get_obs(self):
group = self.df.get_group(self.dates[self.time])
obs = group.drop(['date','score','close_growth(%)'], axis=1)
obs = obs.set_index('tic').agg(lambda x: np.array(x.tolist()), axis=1).to_dict()
return obs
def render(self):
pass
def close(self):
pass
```
### Agent
```ruby
class DRLlibv2:
def __init__(
self,
trainable: str | Any,
params: dict,
train_env=None,
run_name: str = "tune_run",
local_dir: str = "tune_results",
search_alg=None,
concurrent_trials: int = 0,
num_samples: int = 0,
scheduler_=None,
num_cpus: float | int = 2,
dataframe_save: str = "tune.csv",
metric: str = "episode_reward_mean",
mode: str | list[str] = "max",
max_failures: int = 0,
training_iterations: int = 100,
checkpoint_num_to_keep: None | int = None,
checkpoint_freq: int = 0,
reuse_actors: bool = True
):
self.params = params
self.train_env = train_env
self.run_name = run_name
self.local_dir = local_dir
self.search_alg = search_alg
if concurrent_trials != 0:
self.search_alg = ConcurrencyLimiter(
self.search_alg, max_concurrent=concurrent_trials
)
self.scheduler_ = scheduler_
self.num_samples = num_samples
self.trainable = trainable
if isinstance(self.trainable, str):
self.trainable = self.trainable.upper()
self.num_cpus = num_cpus
self.dataframe_save = dataframe_save
self.metric = metric
self.mode = mode
self.max_failures = max_failures
self.training_iterations = training_iterations
self.checkpoint_freq = checkpoint_freq
self.checkpoint_num_to_keep = checkpoint_num_to_keep
self.reuse_actors = reuse_actors
def train_tune_model(self):
if ray.is_initialized():
ray.shutdown()
ray.init(num_cpus=self.num_cpus, num_gpus=self.params['num_gpus'], ignore_reinit_error=True)
if self.train_env is not None:
register_env(self.params['env'], lambda env_config: self.train_env)
tuner = tune.Tuner(
self.trainable,
param_space=self.params,
tune_config=TuneConfig(
search_alg=self.search_alg,
scheduler=self.scheduler_,
num_samples=self.num_samples,
# metric=self.metric,
# mode=self.mode,
**({'metric': self.metric, 'mode': self.mode} if self.scheduler_ is None else {}),
reuse_actors=self.reuse_actors,
),
run_config=RunConfig(
name=self.run_name,
storage_path=self.local_dir,
failure_config=FailureConfig(
max_failures=self.max_failures, fail_fast=False
),
stop={"training_iteration": self.training_iterations},
checkpoint_config=CheckpointConfig(
num_to_keep=self.checkpoint_num_to_keep,
checkpoint_score_attribute=self.metric,
checkpoint_score_order=self.mode,
checkpoint_frequency=self.checkpoint_freq,
checkpoint_at_end=True,
),
verbose=3,#Verbosity mode. 0 = silent, 1 = default, 2 = verbose, 3 = detailed
),
)
self.results = tuner.fit()
if self.search_alg is not None:
self.search_alg.save_to_dir(self.local_dir)
# ray.shutdown()
return self.results
def infer_results(self, to_dataframe: str = None, mode: str = "a"):
results_df = self.results.get_dataframe()
if to_dataframe is None:
to_dataframe = self.dataframe_save
results_df.to_csv(to_dataframe, mode=mode)
best_result = self.results.get_best_result()
# best_result = self.results.get_best_result()
# best_metric = best_result.metrics
# best_checkpoint = best_result.checkpoint
# best_trial_dir = best_result.log_dir
# results_df = self.results.get_dataframe()
return results_df, best_result
def restore_agent(
self,
checkpoint_path: str = "",
restore_search: bool = False,
resume_unfinished: bool = True,
resume_errored: bool = False,
restart_errored: bool = False,
):
# if restore_search:
# self.search_alg = self.search_alg.restore_from_dir(self.local_dir)
if checkpoint_path == "":
checkpoint_path = self.results.get_best_result().checkpoint._local_path
restored_agent = tune.Tuner.restore(
checkpoint_path,
restart_errored=restart_errored,
resume_unfinished=resume_unfinished,
resume_errored=resume_errored,
)
print(restored_agent)
self.results = restored_agent.fit()
if self.search_alg is not None:
self.search_alg.save_to_dir(self.local_dir)
return self.results
def get_test_agent(self, test_env_name: str, test_env=None, checkpoint=None):
# if test_env is not None:
# register_env(test_env_name, lambda config: [test_env])
if checkpoint is None:
checkpoint = self.results.get_best_result().checkpoint
testing_agent = Algorithm.from_checkpoint(checkpoint)
# testing_agent.config['env'] = test_env_name
return testing_agent
```
### Versions / Dependencies
- Operating system: Google Colab
- Python version: 3.10.12
- Ray version: 2.7.0
### Reproduction script
```ruby
train_env_config = {'df': train_data}
train_config = (PPOConfig()
.training(lr=tune.loguniform(5e-5, 0.001), entropy_coeff=tune.loguniform(0.00000001, 0.1),sgd_minibatch_size=tune.choice([32, 64, 128, 256, 512]),lambda_=tune.choice([0.1,0.3,0.5,0.7,0.9,1.0]))
.resources(num_gpus=0)
.debugging(log_level="DEBUG", seed = 1234)
.rollouts(num_rollout_workers=1)
.framework("torch")
.environment(env=RankingEnv, disable_env_checking=True, env_config=train_env_config)
)
train_config.model['fcnet_hiddens'] = [256, 256]
search_alg = OptunaSearch(metric="episode_reward_mean",mode="max")#what if metric=step_reward??
scheduler_ = ASHAScheduler(metric="episode_reward_mean",mode="max",max_t=5,grace_period=1,reduction_factor=2)#max_t: The maximum budget for each trial(hyperparameter), in seconds. grace_period: The number of seconds to wait before terminating a trial that has not reported any results.
# wandb_callback = WandbLoggerCallback(project="Ray Tune Trial Run",log_config=True,save_checkpoints=True)
drl_agent = DRLlibv2(
trainable="PPO",
# train_env = RankingEnv,
run_name = "PPO_TRAIN",
local_dir = "/content/PPO_TRAIN",
params = train_config.to_dict(),
num_samples = 1,#Number of samples of hyperparameters config to run
training_iterations=5,
checkpoint_freq=5,
# scheduler_=scheduler_,
search_alg=search_alg,
metric = "episode_reward_mean",
mode = "max"
# callbacks=[wandb_callback]
)
res = drl_agent.train_tune_model()
results_df, best_result = drl_agent.infer_results()
```
```2023-09-22 12:20:05,276 INFO worker.py:1633 -- Started a local Ray instance. View the dashboard at 127.0.0.1:8265
2023-09-22 12:20:08,561 INFO tune.py:654 -- [output] This will use the new output engine with verbosity 2. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/ray/issues/36949
2023-09-22 12:20:08,640 WARNING deprecation.py:50 -- DeprecationWarning: `build_tf_policy` has been deprecated. This will raise an error in the future!
2023-09-22 12:20:08,650 WARNING deprecation.py:50 -- DeprecationWarning: `build_policy_class` has been deprecated. This will raise an error in the future!
2023-09-22 12:20:08,737 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
/usr/local/lib/python3.10/dist-packages/gymnasium/spaces/box.py:130: UserWarning: WARN: Box bound precision lowered by casting to float32
gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
/usr/local/lib/python3.10/dist-packages/gymnasium/utils/passive_env_checker.py:164: UserWarning: WARN: The obs returned by the `reset()` method was expecting numpy array dtype to be float32, actual type: float64
logger.warn(
/usr/local/lib/python3.10/dist-packages/gymnasium/utils/passive_env_checker.py:188: UserWarning: WARN: The obs returned by the `reset()` method is not within the observation space.
logger.warn(f"{pre} is not within the observation space.")
2023-09-22 12:20:08,838 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
[I 2023-09-22 12:20:08,896] A new study created in memory with name: optuna
2023-09-22 12:20:08,944 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
+--------------------------------------------------+
| Configuration for experiment PPO_TRAIN |
+--------------------------------------------------+
| Search algorithm SearchGenerator |
| Scheduler FIFOScheduler |
| Number of trials 1 |
+--------------------------------------------------+
View detailed results here: /content/PPO_TRAIN/PPO_TRAIN
To visualize your results with TensorBoard, run: `tensorboard --logdir /root/ray_results/PPO_TRAIN`
Trial status: 1 PENDING
Current time: 2023-09-22 12:20:09. Total running time: 0s
Logical resource usage: 0/2 CPUs, 0/0 GPUs
+------------------------------------------------------------------------------------------------------+
| Trial name status lr sgd_minibatch_size entropy_coeff lambda |
+------------------------------------------------------------------------------------------------------+
| PPO_RankingEnv_8689c516 PENDING 0.000166759 256 4.82692e-07 0.1 |
+------------------------------------------------------------------------------------------------------+
(pid=2755) /usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/__init__.py:57: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
(pid=2755) if (distutils.version.LooseVersion(tf.__version__) <
(pid=2755) DeprecationWarning: `DirectStepOptimizer` has been deprecated. This will raise an error in the future!
(pid=2755) /usr/local/lib/python3.10/dist-packages/google/rpc/__init__.py:20: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.rpc')`.
(pid=2755) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=2755) pkg_resources.declare_namespace(__name__)
(pid=2755) /usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:2349: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
(pid=2755) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=2755) declare_namespace(parent)
(PPO pid=2755) 2023-09-22 12:20:18,560 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
(PPO pid=2755) 2023-09-22 12:20:18,561 WARNING algorithm_config.py:672 -- Cannot create PPOConfig from given `config_dict`! Property __stdout_file__ not supported.
(pid=2819) /usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/__init__.py:57: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
(pid=2819) if (distutils.version.LooseVersion(tf.__version__) <
(pid=2819) DeprecationWarning: `DirectStepOptimizer` has been deprecated. This will raise an error in the future!
(pid=2819) /usr/local/lib/python3.10/dist-packages/google/rpc/__init__.py:20: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.rpc')`.
(pid=2819) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=2819) pkg_resources.declare_namespace(__name__)
(pid=2819) /usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:2349: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
(pid=2819) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=2819) declare_namespace(parent)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,813 DEBUG rollout_worker.py:1761 -- Creating policy for default_policy
(RolloutWorker pid=2819) 2023-09-22 12:20:29,813 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,814 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,814 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,815 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,815 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,815 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,816 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,816 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,816 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,816 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,817 DEBUG catalog.py:789 -- Created preprocessor <ray.rllib.models.preprocessors.DictFlatteningPreprocessor object at 0x7e19968ff280>: Dict('AAVEUSDT': Box(-inf, inf, (25,), float64), 'AVAXUSDT': Box(-inf, inf, (25,), float64), 'BTCUSDT': Box(-inf, inf, (25,), float64), 'ETHUSDT': Box(-inf, inf, (25,), float64), 'LINKUSDT': Box(-inf, inf, (25,), float64), 'LTCUSDT': Box(-inf, inf, (25,), float64), 'MATICUSDT': Box(-inf, inf, (25,), float64), 'NEARUSDT': Box(-inf, inf, (25,), float64), 'SOLUSDT': Box(-inf, inf, (25,), float64), 'UNIUSDT': Box(-inf, inf, (25,), float64)) -> (250,)
(RolloutWorker pid=2819) 2023-09-22 12:20:29,820 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
(RolloutWorker pid=2819) 2023-09-22 12:20:29,885 INFO policy.py:1294 -- Policy (worker=1) running on CPU.
(RolloutWorker pid=2819) 2023-09-22 12:20:29,886 INFO torch_policy_v2.py:113 -- Found 0 visible cuda devices.
(RolloutWorker pid=2819) 2023-09-22 12:20:29,886 WARNING deprecation.py:50 -- DeprecationWarning: `ValueNetworkMixin` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=2819) 2023-09-22 12:20:29,886 WARNING deprecation.py:50 -- DeprecationWarning: `LearningRateSchedule` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=2819) 2023-09-22 12:20:29,886 WARNING deprecation.py:50 -- DeprecationWarning: `EntropyCoeffSchedule` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=2819) 2023-09-22 12:20:29,886 WARNING deprecation.py:50 -- DeprecationWarning: `KLCoeffMixin` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=2819) 2023-09-22 12:20:30,074 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,075 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,075 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,075 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,075 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,075 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,076 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,076 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,076 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,076 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float64)
(RolloutWorker pid=2819) 2023-09-22 12:20:30,077 INFO util.py:118 -- Using connectors:
(RolloutWorker pid=2819) 2023-09-22 12:20:30,077 INFO util.py:119 -- AgentConnectorPipeline
(RolloutWorker pid=2819) ObsPreprocessorConnector
(RolloutWorker pid=2819) StateBufferConnector
(RolloutWorker pid=2819) ViewRequirementAgentConnector
(RolloutWorker pid=2819) 2023-09-22 12:20:30,078 INFO util.py:120 -- ActionConnectorPipeline
(RolloutWorker pid=2819) ConvertToNumpyConnector
(RolloutWorker pid=2819) NormalizeActionsConnector
(RolloutWorker pid=2819) ImmutableActionsConnector
(RolloutWorker pid=2819) 2023-09-22 12:20:30,078 DEBUG rollout_worker.py:645 -- Created rollout worker with env <ray.rllib.env.vector_env.VectorEnvWrapper object at 0x7e19969901f0> (<RankingEnv instance>), policies <PolicyMap lru-caching-capacity=100 policy-IDs=['default_policy']>
(PPO pid=2755) 2023-09-22 12:20:30,116 INFO worker_set.py:297 -- Inferred observation/action spaces from remote worker (local worker has no env): {'default_policy': (Dict('AAVEUSDT': Box(-inf, inf, (25,), float64), 'AVAXUSDT': Box(-inf, inf, (25,), float64), 'BTCUSDT': Box(-inf, inf, (25,), float64), 'ETHUSDT': Box(-inf, inf, (25,), float64), 'LINKUSDT': Box(-inf, inf, (25,), float64), 'LTCUSDT': Box(-inf, inf, (25,), float64), 'MATICUSDT': Box(-inf, inf, (25,), float64), 'NEARUSDT': Box(-inf, inf, (25,), float64), 'SOLUSDT': Box(-inf, inf, (25,), float64), 'UNIUSDT': Box(-inf, inf, (25,), float64)), Dict('AAVEUSDT': Box(1.0, 10.0, (1,), float64), 'AVAXUSDT': Box(1.0, 10.0, (1,), float64), 'BTCUSDT': Box(1.0, 10.0, (1,), float64), 'ETHUSDT': Box(1.0, 10.0, (1,), float64), 'LINKUSDT': Box(1.0, 10.0, (1,), float64), 'LTCUSDT': Box(1.0, 10.0, (1,), float64), 'MATICUSDT': Box(1.0, 10.0, (1,), float64), 'NEARUSDT': Box(1.0, 10.0, (1,), float64), 'SOLUSDT': Box(1.0, 10.0, (1,), float64), 'UNIUSDT': Box(1.0, 10.0, (1,), float64))), '__env__': (Dict('AAVEUSDT': Box(-inf, inf, (25,), float64), 'AVAXUSDT': Box(-inf, inf, (25,), float64), 'BTCUSDT': Box(-inf, inf, (25,), float64), 'ETHUSDT': Box(-inf, inf, (25,), float64), 'LINKUSDT': Box(-inf, inf, (25,), float64), 'LTCUSDT': Box(-inf, inf, (25,), float64), 'MATICUSDT': Box(-inf, inf, (25,), float64), 'NEARUSDT': Box(-inf, inf, (25,), float64), 'SOLUSDT': Box(-inf, inf, (25,), float64), 'UNIUSDT': Box(-inf, inf, (25,), float64)), Dict('AAVEUSDT': Box(1.0, 10.0, (1,), float64), 'AVAXUSDT': Box(1.0, 10.0, (1,), float64), 'BTCUSDT': Box(1.0, 10.0, (1,), float64), 'ETHUSDT': Box(1.0, 10.0, (1,), float64), 'LINKUSDT': Box(1.0, 10.0, (1,), float64), 'LTCUSDT': Box(1.0, 10.0, (1,), float64), 'MATICUSDT': Box(1.0, 10.0, (1,), float64), 'NEARUSDT': Box(1.0, 10.0, (1,), float64), 'SOLUSDT': Box(1.0, 10.0, (1,), float64), 'UNIUSDT': Box(1.0, 10.0, (1,), float64)))}
(PPO pid=2755) 2023-09-22 12:20:30,151 INFO policy.py:1294 -- Policy (worker=local) running on CPU.
(PPO pid=2755) 2023-09-22 12:20:30,173 INFO rollout_worker.py:1742 -- Built policy map: <PolicyMap lru-caching-capacity=100 policy-IDs=['default_policy']>
(PPO pid=2755) 2023-09-22 12:20:30,173 INFO rollout_worker.py:1743 -- Built preprocessor map: {'default_policy': None}
(PPO pid=2755) 2023-09-22 12:20:30,173 INFO rollout_worker.py:550 -- Built filter map: defaultdict(<class 'ray.rllib.utils.filter.NoFilter'>, {})
(PPO pid=2755) 2023-09-22 12:20:30,173 DEBUG rollout_worker.py:645 -- Created rollout worker with env None (None), policies <PolicyMap lru-caching-capacity=100 policy-IDs=['default_policy']>
Trial PPO_RankingEnv_8689c516 started with configuration:
+---------------------------------------------------------------------------+
| Trial PPO_RankingEnv_8689c516 config |
+---------------------------------------------------------------------------+
| _AlgorithmConfig__prior_exploration_config/type StochasticSampling |
| _disable_action_flattening False |
| _disable_execution_plan_api True |
| _disable_initialize_loss_from_dummy_batch False |
| _disable_preprocessor_api False |
| _enable_learner_api True |
| _enable_rl_module_api True |
| _fake_gpus False |
| _is_atari |
| _learner_class |
| _tf_policy_handles_more_than_one_loss False |
| action_mask_key action_mask |
| action_space |
| actions_in_input_normalized False |
| always_attach_evaluation_results False |
| auto_wrap_old_gym_envs True |
| batch_mode truncate_episodes |
| callbacks ...efaultCallbacks'> |
| checkpoint_trainable_policies_only False |
| clip_actions False |
| clip_param 0.3 |
| clip_rewards |
| compress_observations False |
| count_steps_by env_steps |
| create_env_on_driver False |
| custom_eval_function |
| delay_between_worker_restarts_s 60. |
| disable_env_checking True |
| eager_max_retraces 20 |
| eager_tracing True |
| enable_async_evaluation False |
| enable_connectors True |
| enable_tf1_exec_eagerly False |
| entropy_coeff 0. |
| entropy_coeff_schedule |
| env ...in__.RankingEnv'> |
| env_config/df ...ows x 29 columns] |
| env_runner_cls |
| env_task_fn |
| evaluation_config |
| evaluation_duration 10 |
| evaluation_duration_unit episodes |
| evaluation_interval |
| evaluation_num_workers 0 |
| evaluation_parallel_to_training False |
| evaluation_sample_timeout_s 180. |
| explore True |
| export_native_model_files False |
| fake_sampler False |
| framework torch |
| gamma 0.99 |
| grad_clip |
| grad_clip_by global_norm |
| ignore_worker_failures False |
| in_evaluation False |
| input sampler |
| keep_per_episode_custom_metrics False |
| kl_coeff 0.2 |
| kl_target 0.01 |
| lambda 0.1 |
| local_gpu_idx 0 |
| local_tf_session_args/inter_op_parallelism_threads 8 |
| local_tf_session_args/intra_op_parallelism_threads 8 |
| log_level DEBUG |
| log_sys_usage True |
| logger_config |
| logger_creator |
| lr 0.00017 |
| lr_schedule |
| max_num_worker_restarts 1000 |
| max_requests_in_flight_per_sampler_worker 2 |
| metrics_episode_collection_timeout_s 60. |
| metrics_num_episodes_for_smoothing 100 |
| min_sample_timesteps_per_iteration 0 |
| min_time_s_per_iteration |
| min_train_timesteps_per_iteration 0 |
| model/_disable_action_flattening False |
| model/_disable_preprocessor_api False |
| model/_time_major False |
| model/_use_default_native_models -1 |
| model/always_check_shapes False |
| model/attention_dim 64 |
| model/attention_head_dim 32 |
| model/attention_init_gru_gate_bias 2.0 |
| model/attention_memory_inference 50 |
| model/attention_memory_training 50 |
| model/attention_num_heads 1 |
| model/attention_num_transformer_units 1 |
| model/attention_position_wise_mlp_dim 32 |
| model/attention_use_n_prev_actions 0 |
| model/attention_use_n_prev_rewards 0 |
| model/conv_activation relu |
| model/conv_filters |
| model/custom_action_dist |
| model/custom_model |
| model/custom_preprocessor |
| model/dim 84 |
| model/encoder_latent_dim |
| model/fcnet_activation tanh |
| model/fcnet_hiddens [256, 256] |
| model/framestack True |
| model/free_log_std False |
| model/grayscale False |
| model/lstm_cell_size 256 |
| model/lstm_use_prev_action False |
| model/lstm_use_prev_action_reward -1 |
| model/lstm_use_prev_reward False |
| model/max_seq_len 20 |
| model/no_final_linear False |
| model/post_fcnet_activation relu |
| model/post_fcnet_hiddens [] |
| model/use_attention False |
| model/use_lstm False |
| model/vf_share_layers False |
| model/zero_mean True |
| normalize_actions True |
| num_consecutive_worker_failures_tolerance 100 |
| num_cpus_for_driver 1 |
| num_cpus_per_learner_worker 1 |
| num_cpus_per_worker 1 |
| num_envs_per_worker 1 |
| num_gpus 0 |
| num_gpus_per_learner_worker 0 |
| num_gpus_per_worker 0 |
| num_learner_workers 0 |
| num_sgd_iter 30 |
| num_workers 1 |
| observation_filter NoFilter |
| observation_fn |
| observation_space |
| offline_sampling False |
| ope_split_batch_by_episode True |
| output |
| output_compress_columns ['obs', 'new_obs'] |
| output_max_file_size 67108864 |
| placement_strategy PACK |
| policies/default_policy ...None, None, None) |
| policies_to_train |
| policy_map_cache -1 |
| policy_map_capacity 100 |
| policy_mapping_fn ...t 0x7ad1d496b910> |
| policy_states_are_swappable False |
| postprocess_inputs False |
| preprocessor_pref deepmind |
| recreate_failed_workers False |
| remote_env_batch_wait_ms 0 |
| remote_worker_envs False |
| render_env False |
| replay_sequence_length |
| restart_failed_sub_environments False |
| rl_module_spec |
| rollout_fragment_length auto |
| sample_async False |
| sample_collector ...leListCollector'> |
| sampler_perf_stats_ema_coef |
| seed 1234 |
| sgd_minibatch_size 256 |
| shuffle_buffer_size 0 |
| shuffle_sequences True |
| simple_optimizer -1 |
| sync_filters_on_rollout_workers_timeout_s 60. |
| synchronize_filters -1 |
| tf_session_args/allow_soft_placement True |
| tf_session_args/device_count/CPU 1 |
| tf_session_args/gpu_options/allow_growth True |
| tf_session_args/inter_op_parallelism_threads 2 |
| tf_session_args/intra_op_parallelism_threads 2 |
| tf_session_args/log_device_placement False |
| torch_compile_learner False |
| torch_compile_learner_dynamo_backend inductor |
| torch_compile_learner_dynamo_mode |
| torch_compile_learner_what_to_compile ...ile.FORWARD_TRAIN |
| torch_compile_worker False |
| torch_compile_worker_dynamo_backend onnxrt |
| torch_compile_worker_dynamo_mode |
| train_batch_size 4000 |
| update_worker_filter_stats True |
| use_critic True |
| use_gae True |
| use_kl_loss True |
| use_worker_filter_stats True |
| validate_workers_after_construction True |
| vf_clip_param 10. |
| vf_loss_coeff 1. |
| vf_share_layers -1 |
| worker_cls -1 |
| worker_health_probe_timeout_s 60 |
| worker_restore_timeout_s 1800 |
+---------------------------------------------------------------------------+
(PPO pid=2755) Trainable.setup took 11.358 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
(PPO pid=2755) Install gputil for GPU system monitoring.
(RolloutWorker pid=2819) 2023-09-22 12:20:30,361 INFO rollout_worker.py:690 -- Generating sample batch of size 4000
2023-09-22 12:20:30,496 ERROR tune_controller.py:1502 -- Trial task failed for trial PPO_RankingEnv_8689c516
Traceback (most recent call last):
File "/usr/local/lib/python3.10/dist-packages/ray/air/execution/_internal/event_manager.py", line 110, in resolve_future
result = ray.get(future)
File "/usr/local/lib/python3.10/dist-packages/ray/_private/auto_init_hook.py", line 24, in auto_init_wrapper
return fn(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/ray/_private/client_mode_hook.py", line 103, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/ray/_private/worker.py", line 2547, in get
raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(AttributeError): ray::PPO.train() (pid=2755, ip=172.28.0.12, actor_id=334c1673a75a460bc3f3ad2101000000, repr=PPO)
File "/usr/local/lib/python3.10/dist-packages/ray/tune/trainable/trainable.py", line 400, in train
raise skipped from exception_cause(skipped)
File "/usr/local/lib/python3.10/dist-packages/ray/tune/trainable/trainable.py", line 397, in train
result = self.step()
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/algorithms/algorithm.py", line 853, in step
results, train_iter_ctx = self._run_one_training_iteration()
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/algorithms/algorithm.py", line 2838, in _run_one_training_iteration
results = self.training_step()
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/algorithms/ppo/ppo.py", line 429, in training_step
train_batch = synchronous_parallel_sample(
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/execution/rollout_ops.py", line 85, in synchronous_parallel_sample
sample_batches = worker_set.foreach_worker(
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/worker_set.py", line 680, in foreach_worker
handle_remote_call_result_errors(remote_results, self._ignore_worker_failures)
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/worker_set.py", line 76, in handle_remote_call_result_errors
raise r.get()
ray.exceptions.RayTaskError(AttributeError): ray::RolloutWorker.apply() (pid=2819, ip=172.28.0.12, actor_id=fc63482e3750d98323a830f401000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7e19975be8c0>)
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/utils/actor_manager.py", line 185, in apply
raise e
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/utils/actor_manager.py", line 176, in apply
return func(self, *args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/execution/rollout_ops.py", line 86, in <lambda>
lambda w: w.sample(), local_worker=False, healthy_only=True
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 696, in sample
batches = [self.input_reader.next()]
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/sampler.py", line 92, in next
batches = [self.get_data()]
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/sampler.py", line 277, in get_data
item = next(self._env_runner)
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/env_runner_v2.py", line 344, in run
outputs = self.step()
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/env_runner_v2.py", line 370, in step
active_envs, to_eval, outputs = self._process_observations(
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/env_runner_v2.py", line 637, in _process_observations
processed = policy.agent_connectors(acd_list)
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/agent/pipeline.py", line 41, in __call__
ret = c(ret)
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/connector.py", line 265, in __call__
return [self.transform(d) for d in acd_list]
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/connector.py", line 265, in <listcomp>
return [self.transform(d) for d in acd_list]
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/agent/obs_preproc.py", line 58, in transform
d[SampleBatch.NEXT_OBS] = self._preprocessor.transform(
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/models/preprocessors.py", line 319, in transform
self.write(observation, array, 0)
File "/usr/local/lib/python3.10/dist-packages/ray/rllib/models/preprocessors.py", line 325, in write
observation = OrderedDict(sorted(observation.items()))
AttributeError: 'NoneType' object has no attribute 'items'
(PPO pid=2755) 2023-09-22 12:20:30,490 ERROR actor_manager.py:500 -- Ray error, taking actor 1 out of service. ray::RolloutWorker.apply() (pid=2819, ip=172.28.0.12, actor_id=fc63482e3750d98323a830f401000000, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7e19975be8c0>)
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/utils/actor_manager.py", line 185, in apply
(PPO pid=2755) raise e
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/utils/actor_manager.py", line 176, in apply
(PPO pid=2755) return func(self, *args, **kwargs)
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/execution/rollout_ops.py", line 86, in <lambda>
(PPO pid=2755) lambda w: w.sample(), local_worker=False, healthy_only=True
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/rollout_worker.py", line 696, in sample
(PPO pid=2755) batches = [self.input_reader.next()]
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/sampler.py", line 92, in next
(PPO pid=2755) batches = [self.get_data()]
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/sampler.py", line 277, in get_data
(PPO pid=2755) item = next(self._env_runner)
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/env_runner_v2.py", line 344, in run
(PPO pid=2755) outputs = self.step()
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/env_runner_v2.py", line 370, in step
(PPO pid=2755) active_envs, to_eval, outputs = self._process_observations(
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/evaluation/env_runner_v2.py", line 637, in _process_observations
(PPO pid=2755) processed = policy.agent_connectors(acd_list)
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/agent/pipeline.py", line 41, in __call__
(PPO pid=2755) ret = c(ret)
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/connector.py", line 265, in __call__
(PPO pid=2755) return [self.transform(d) for d in acd_list]
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/connector.py", line 265, in <listcomp>
(PPO pid=2755) return [self.transform(d) for d in acd_list]
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/connectors/agent/obs_preproc.py", line 58, in transform
(PPO pid=2755) d[SampleBatch.NEXT_OBS] = self._preprocessor.transform(
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/models/preprocessors.py", line 319, in transform
(PPO pid=2755) self.write(observation, array, 0)
(PPO pid=2755) File "/usr/local/lib/python3.10/dist-packages/ray/rllib/models/preprocessors.py", line 325, in write
(PPO pid=2755) observation = OrderedDict(sorted(observation.items()))
(PPO pid=2755) AttributeError: 'NoneType' object has no attribute 'items'
2023-09-22 12:20:30,568 WARNING experiment_state.py:371 -- Experiment checkpoint syncing has been triggered multiple times in the last 30.0 seconds. A sync will be triggered whenever a trial has checkpointed more than `num_to_keep` times since last sync or if 300 seconds have passed since last sync. If you have set `num_to_keep` in your `CheckpointConfig`, consider increasing the checkpoint frequency or keeping more checkpoints. You can supress this warning by changing the `TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S` environment variable.
Trial PPO_RankingEnv_8689c516 errored after 0 iterations at 2023-09-22 12:20:30. Total running time: 21s
Error file: /root/ray_results/PPO_TRAIN/PPO_RankingEnv_8689c516_1_type=StochasticSampling,disable_action_flattening=False,disable_execution_plan_api=True,disable_initiali_2023-09-22_12-20-08/error.txt
Trial status: 1 ERROR
Current time: 2023-09-22 12:20:30. Total running time: 21s
Logical resource usage: 2.0/2 CPUs, 0/0 GPUs
+------------------------------------------------------------------------------------------------------+
| Trial name status lr sgd_minibatch_size entropy_coeff lambda |
+------------------------------------------------------------------------------------------------------+
| PPO_RankingEnv_8689c516 ERROR 0.000166759 256 4.82692e-07 0.1 |
+------------------------------------------------------------------------------------------------------+
Number of errored trials: 1
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Trial name # failures error file |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| PPO_RankingEnv_8689c516 1 /root/ray_results/PPO_TRAIN/PPO_RankingEnv_8689c516_1_type=StochasticSampling,disable_action_flattening=False,disable_execution_plan_api=True,disable_initiali_2023-09-22_12-20-08/error.txt |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
2023-09-22 12:20:30,939 ERROR tune.py:1139 -- Trials did not complete: [PPO_RankingEnv_8689c516]
```
### Issue Severity
High: It blocks me from completing my task.