I tried to pass my custom environment like your example, but I am getting many warnings that cause the agent to die.
class RankingEnv(gym.Env):
def __init__(self, config: dict):
super().__init__()
self.coins = config['df']['tic'].unique()
features_col = config['df'].columns.difference(['date','tic','score','close_growth(%)'])
# self.time = 0
self.df=config['df'].groupby('date')
self.dates = list(self.df.groups.keys())
# group = self.df.get_group(self.dates[self.time])
# self.coins = group['tic'].unique()
# features_col = group.columns.difference(['tic','score','close_growth(%)'])
self.observation_space = gym.spaces.Dict({coin: gym.spaces.Box(low=-np.inf, high=np.inf, shape=(len(features_col),), dtype=np.float32) for coin in self.coins})
self.action_space = gym.spaces.Dict({coin: gym.spaces.Box(low=np.float32(1.0), high=np.float32(len(self.coins)), shape=(1,), dtype=np.float32) for coin in self.coins})#actions are the scores
# self.action_space = gym.spaces.Dict({coin: gym.spaces.Discrete(len(coins),start = 1) for coin in coins})
def step(self, action):
group = self.df.get_group(self.dates[self.time])
# self.mask = {coin: coin in group['tic'].unique() for coin in self.coins} #masking method!
true_scores = group.set_index('tic')['score'].to_dict()
# The indexes are according to predicted score and the values are true score!(Sorting true score by predicted score)
scores = [true_scores[coin] for coin in sorted(action, key=action.get, reverse=True)]
ideal_scores = sorted(true_scores.values(), reverse=True) #This is just the True_score as list
dcg = self.calculate_dcg(scores)
idcg = self.calculate_dcg(ideal_scores)
reward = dcg/idcg # reward = ndcg
self.time+=1
terminated = self.time >= len(self.dates)
info = {}
return self._get_obs() if not terminated else None, reward, terminated, False, info
def calculate_dcg(self, scores):
dcg = 0.0
for i in range(len(scores)):
dcg += (2 ** scores[i] - 1) / np.log2(i + 2)
return dcg
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed,options=options)
self.time = 0
info={}
return self._get_obs() ,info
def _get_obs(self):
group = self.df.get_group(self.dates[self.time])
obs = group.drop(['date','score','close_growth(%)'], axis=1)
obs = obs.set_index('tic').agg(list, axis=1).to_dict()
return obs
def render(self):
pass
def close(self):
pass
# @Author: Astarag Mohapatra
from __future__ import annotations
import ray
assert (
ray.__version__ > "2.0.0"
), "Please install ray 2.2.0 by doing 'pip install ray[rllib] ray[tune] lz4' , lz4 is for population based tuning"
from pprint import pprint
from ray import tune
from ray.tune.search import ConcurrencyLimiter
from ray.rllib.algorithms import Algorithm
from ray.tune import register_env
from ray.air import RunConfig, FailureConfig, ScalingConfig
from ray.tune.tune_config import TuneConfig
from ray.air.config import CheckpointConfig
import psutil
psutil_memory_in_bytes = psutil.virtual_memory().total
ray._private.utils.get_system_memory = lambda: psutil_memory_in_bytes
from typing import Dict, Optional, Any, List, Union
class DRLlibv2: #you should change everything to gymnasium
"""
It instantiates RLlib model with Ray tune functionality
Params
-------------------------------------
trainable:
Any Trainable class that takes config as parameter
train_env:
Training environment instance
train_env_name: str
Name of the training environment
params: dict
hyperparameters dictionary
run_name: str
tune run name
framework: str
"torch" or "tf" for tensorflow
local_dir: str
to save the results and tensorboard plots
num_workers: int
number of workers
search_alg
search space for hyperparameters
concurrent_trials:
Number of concurrent hyperparameters trial to run
num_samples: int
Number of samples of hyperparameters config to run
scheduler:
Stopping suboptimal trials
log_level: str = "WARN",
Verbosity: "DEBUG"
num_gpus: Union[float, int] = 0
GPUs for trial
num_cpus: Union[float, int] = 2
CPUs for rollout collection
dataframe_save: str
Saving the tune results
metric: str
Metric for hyperparameter optimization in Bayesian Methods
mode: str
Maximize or Minimize the metric
max_failures: int
Number of failures to TuneError
training_iterations: str
Number of times session.report() is called
checkpoint_num_to_keep: int
Number of checkpoints to keep
checkpoint_freq: int
Checkpoint freq wrt training iterations
reuse_actors:bool
Reuse actors for tuning
It has the following methods:
Methods
-------------------------------------
train_tune_model: It takes in the params dictionary and fits in sklearn style to our trainable class
restore_agent: It restores previously errored or stopped trials or experiments
infer_results: It returns the results dataframe and trial informations
get_test_agent: It returns the testing agent for inference
Example
---------------------------------------
def sample_ppo_params():
return {
"entropy_coeff": tune.loguniform(0.00000001, 0.1),
"lr": tune.loguniform(5e-5, 0.001),
"sgd_minibatch_size": tune.choice([ 32, 64, 128, 256, 512]),
"lambda": tune.choice([0.1,0.3,0.5,0.7,0.9,1.0]),
}
optuna_search = OptunaSearch(
metric="episode_reward_mean",
mode="max")
drl_agent = DRLlibv2(
trainable="PPO",
train_env=env(train_env_config),
train_env_name="StockTrading_train",
framework="torch",
num_workers=1,
log_level="DEBUG",
run_name = 'test',
local_dir = "test",
params = sample_ppo_params(),
num_samples = 1,
num_gpus=1,
training_iterations=10,
search_alg = optuna_search,
checkpoint_freq=5
)
#Tune or train the model
res = drl_agent.train_tune_model()
#Get the tune results
results_df, best_result = drl_agent.infer_results()
#Get the best testing agent
test_agent = drl_agent.get_test_agent(test_env_instance,'StockTrading_testenv')
"""
def __init__(
self,
trainable: str | Any,
params: dict,
run_name: str = "tune_run",
local_dir: str = "tune_results",
search_alg=None,
concurrent_trials: int = 0,
num_samples: int = 0,
scheduler_=None,
num_cpus: float | int = 2,
dataframe_save: str = "tune.csv",
metric: str = "episode_reward_mean",
mode: str | list[str] = "max",
max_failures: int = 0,
training_iterations: int = 100,
checkpoint_num_to_keep: None | int = None,
checkpoint_freq: int = 0,
reuse_actors: bool = False
):
self.params = params
self.train_env = train_env
self.run_name = run_name
self.local_dir = local_dir
self.search_alg = search_alg
if concurrent_trials != 0:
self.search_alg = ConcurrencyLimiter(
self.search_alg, max_concurrent=concurrent_trials
)
self.scheduler_ = scheduler_
self.num_samples = num_samples
self.trainable = trainable
if isinstance(self.trainable, str):
self.trainable = self.trainable.upper()
self.num_cpus = num_cpus
self.dataframe_save = dataframe_save
self.metric = metric
self.mode = mode
self.max_failures = max_failures
self.training_iterations = training_iterations
self.checkpoint_freq = checkpoint_freq
self.checkpoint_num_to_keep = checkpoint_num_to_keep
self.reuse_actors = reuse_actors
def train_tune_model(self):
if ray.is_initialized():
ray.shutdown()
ray.init(num_cpus=self.num_cpus, num_gpus=self.params['num_gpus'], ignore_reinit_error=True)
if self.train_env is not None:
register_env(self.params['env'], self.train_env)
tuner = tune.Tuner(
self.trainable,
param_space=self.params,
tune_config=TuneConfig(
search_alg=self.search_alg,
scheduler=self.scheduler_,
num_samples=self.num_samples,
# metric=self.metric,
# mode=self.mode,
**({'metric': self.metric, 'mode': self.mode} if self.scheduler_ is None else {}),
reuse_actors=self.reuse_actors,
),
run_config=RunConfig(
name=self.run_name,
storage_path=self.local_dir,
failure_config=FailureConfig(
max_failures=self.max_failures, fail_fast=False
),
stop={"training_iteration": self.training_iterations},
checkpoint_config=CheckpointConfig(
num_to_keep=self.checkpoint_num_to_keep,
checkpoint_score_attribute=self.metric,
checkpoint_score_order=self.mode,
checkpoint_frequency=self.checkpoint_freq,
checkpoint_at_end=True,
),
verbose=3,#Verbosity mode. 0 = silent, 1 = default, 2 = verbose, 3 = detailed
),
)
self.results = tuner.fit()
if self.search_alg is not None:
self.search_alg.save_to_dir(self.local_dir)
# ray.shutdown()
return self.results
def infer_results(self, to_dataframe: str = None, mode: str = "a"):
results_df = self.results.get_dataframe()
if to_dataframe is None:
to_dataframe = self.dataframe_save
results_df.to_csv(to_dataframe, mode=mode)
best_result = self.results.get_best_result()
# best_result = self.results.get_best_result()
# best_metric = best_result.metrics
# best_checkpoint = best_result.checkpoint
# best_trial_dir = best_result.log_dir
# results_df = self.results.get_dataframe()
return results_df, best_result
def restore_agent(
self,
checkpoint_path: str = "",
restore_search: bool = False,
resume_unfinished: bool = True,
resume_errored: bool = False,
restart_errored: bool = False,
):
# if restore_search:
# self.search_alg = self.search_alg.restore_from_dir(self.local_dir)
if checkpoint_path == "":
checkpoint_path = self.results.get_best_result().checkpoint._local_path
restored_agent = tune.Tuner.restore(
checkpoint_path,
restart_errored=restart_errored,
resume_unfinished=resume_unfinished,
resume_errored=resume_errored,
)
print(restored_agent)
self.results = restored_agent.fit()
if self.search_alg is not None:
self.search_alg.save_to_dir(self.local_dir)
return self.results
def get_test_agent(self, test_env_name: str, test_env=None, checkpoint=None):
if checkpoint is None:
checkpoint = self.results.get_best_result().checkpoint
testing_agent = Algorithm.from_checkpoint(checkpoint)
# testing_agent.config['env'] = test_env_name
return testing_agent
Reproduction script (Cannot create PPOConfig from given config_dict
! Property stdout_file not supported.)
train_env_config = {'df': train_data}
train_config = (PPOConfig()
.training(lr=tune.loguniform(5e-5, 0.001), entropy_coeff=tune.loguniform(0.00000001, 0.1),sgd_minibatch_size=tune.choice([32, 64, 128, 256, 512]),lambda_=tune.choice([0.1,0.3,0.5,0.7,0.9,1.0]))
.resources(num_gpus=0)
.debugging(log_level="DEBUG", seed = 1234)
.rollouts(num_rollout_workers=1)
.framework("torch")
.environment(env= RankingEnv, disable_env_checking=True, env_config=train_env_config)
)
train_config.model['fcnet_hiddens'] = [256, 256]
search_alg = OptunaSearch(metric="episode_reward_mean",mode="max")#what if metric=step_reward??
scheduler_ = ASHAScheduler(metric="episode_reward_mean",mode="max",max_t=5,grace_period=1,reduction_factor=2)#max_t: The maximum budget for each trial(hyperparameter), in seconds. grace_period: The number of seconds to wait before terminating a trial that has not reported any results.
# wandb_callback = WandbLoggerCallback(project="Ray Tune Trial Run",log_config=True,save_checkpoints=True)
drl_agent = DRLlibv2(
trainable="PPO",
# train_env = lambda env_config: RankingEnv(env_config),
run_name = "PPO_TRAIN",
local_dir = "/content/PPO_TRAIN",
params = train_config.to_dict(),
num_samples = 1,#Number of samples of hyperparameters config to run
training_iterations=5,
checkpoint_freq=5,
# scheduler_=scheduler_,
search_alg=search_alg,
metric = "episode_reward_mean",
mode = "max"
# callbacks=[wandb_callback]
)
res = drl_agent.train_tune_model()
results_df, best_result = drl_agent.infer_results()
2023-09-21 12:47:58,847 WARNING deprecation.py:50 -- DeprecationWarning: `build_tf_policy` has been deprecated. This will raise an error in the future!
2023-09-21 12:47:58,859 WARNING deprecation.py:50 -- DeprecationWarning: `build_policy_class` has been deprecated. This will raise an error in the future!
2023-09-21 12:47:58,942 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
/usr/local/lib/python3.10/dist-packages/gymnasium/spaces/box.py:130: UserWarning: WARN: Box bound precision lowered by casting to float32
gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
/usr/local/lib/python3.10/dist-packages/gymnasium/utils/passive_env_checker.py:164: UserWarning: WARN: The obs returned by the `reset()` method was expecting numpy array dtype to be float32, actual type: float64
logger.warn(
/usr/local/lib/python3.10/dist-packages/gymnasium/utils/passive_env_checker.py:188: UserWarning: WARN: The obs returned by the `reset()` method is not within the observation space.
logger.warn(f"{pre} is not within the observation space.")
2023-09-21 12:47:59,044 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
[I 2023-09-21 12:47:59,114] A new study created in memory with name: optuna
2023-09-21 12:47:59,152 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
(pid=1235) /usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/__init__.py:57: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
(pid=1235) if (distutils.version.LooseVersion(tf.__version__) <
(pid=1235) DeprecationWarning: `DirectStepOptimizer` has been deprecated. This will raise an error in the future!
(pid=1235) /usr/local/lib/python3.10/dist-packages/google/rpc/__init__.py:20: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.rpc')`.
(pid=1235) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=1235) pkg_resources.declare_namespace(__name__)
(pid=1235) /usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:2349: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
(pid=1235) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=1235) declare_namespace(parent)
(PPO pid=1235) 2023-09-21 12:48:15,338 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
(PPO pid=1235) 2023-09-21 12:48:15,339 WARNING algorithm_config.py:672 -- Cannot create PPOConfig from given `config_dict`! Property __stdout_file__ not supported.
(pid=1329) /usr/local/lib/python3.10/dist-packages/tensorflow_probability/python/__init__.py:57: DeprecationWarning: distutils Version classes are deprecated. Use packaging.version instead.
(pid=1329) if (distutils.version.LooseVersion(tf.__version__) <
(pid=1329) DeprecationWarning: `DirectStepOptimizer` has been deprecated. This will raise an error in the future!
(pid=1329) /usr/local/lib/python3.10/dist-packages/google/rpc/__init__.py:20: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.rpc')`.
(pid=1329) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=1329) pkg_resources.declare_namespace(__name__)
(pid=1329) /usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:2349: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.
(pid=1329) Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
(pid=1329) declare_namespace(parent)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,634 DEBUG rollout_worker.py:1761 -- Creating policy for default_policy
(RolloutWorker pid=1329) 2023-09-21 12:48:25,634 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,635 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,636 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,636 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,636 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,638 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,638 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,640 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,641 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,641 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,642 DEBUG catalog.py:789 -- Created preprocessor <ray.rllib.models.preprocessors.DictFlatteningPreprocessor object at 0x7b8028d17070>: Dict('AAVEUSDT': Box(-inf, inf, (25,), float32), 'AVAXUSDT': Box(-inf, inf, (25,), float32), 'BTCUSDT': Box(-inf, inf, (25,), float32), 'ETHUSDT': Box(-inf, inf, (25,), float32), 'LINKUSDT': Box(-inf, inf, (25,), float32), 'LTCUSDT': Box(-inf, inf, (25,), float32), 'MATICUSDT': Box(-inf, inf, (25,), float32), 'NEARUSDT': Box(-inf, inf, (25,), float32), 'SOLUSDT': Box(-inf, inf, (25,), float32), 'UNIUSDT': Box(-inf, inf, (25,), float32)) -> (250,)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,648 WARNING algorithm_config.py:2578 -- Setting `exploration_config={}` because you set `_enable_rl_module_api=True`. When RLModule API are enabled, exploration_config can not be set. If you want to implement custom exploration behaviour, please modify the `forward_exploration` method of the RLModule at hand. On configs that have a default exploration config, this must be done with `config.exploration_config={}`.
(RolloutWorker pid=1329) 2023-09-21 12:48:25,744 INFO policy.py:1294 -- Policy (worker=1) running on CPU.
(RolloutWorker pid=1329) 2023-09-21 12:48:25,745 INFO torch_policy_v2.py:113 -- Found 0 visible cuda devices.
(RolloutWorker pid=1329) 2023-09-21 12:48:25,745 WARNING deprecation.py:50 -- DeprecationWarning: `ValueNetworkMixin` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=1329) 2023-09-21 12:48:25,745 WARNING deprecation.py:50 -- DeprecationWarning: `LearningRateSchedule` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=1329) 2023-09-21 12:48:25,745 WARNING deprecation.py:50 -- DeprecationWarning: `EntropyCoeffSchedule` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=1329) 2023-09-21 12:48:25,745 WARNING deprecation.py:50 -- DeprecationWarning: `KLCoeffMixin` has been deprecated. This will raise an error in the future!
(RolloutWorker pid=1329) 2023-09-21 12:48:25,965 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,966 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,966 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,966 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,966 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,967 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,967 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,967 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,967 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(RolloutWorker pid=1329) 2023-09-21 12:48:25,968 DEBUG preprocessors.py:304 -- Creating sub-preprocessor for Box(-inf, inf, (25,), float32)
(PPO pid=1235) 2023-09-21 12:48:26,010 INFO worker_set.py:297 -- Inferred observation/action spaces from remote worker (local worker has no env): {'default_policy': (Dict('AAVEUSDT': Box(-inf, inf, (25,), float32), 'AVAXUSDT': Box(-inf, inf, (25,), float32), 'BTCUSDT': Box(-inf, inf, (25,), float32), 'ETHUSDT': Box(-inf, inf, (25,), float32), 'LINKUSDT': Box(-inf, inf, (25,), float32), 'LTCUSDT': Box(-inf, inf, (25,), float32), 'MATICUSDT': Box(-inf, inf, (25,), float32), 'NEARUSDT': Box(-inf, inf, (25,), float32), 'SOLUSDT': Box(-inf, inf, (25,), float32), 'UNIUSDT': Box(-inf, inf, (25,), float32)), Dict('AAVEUSDT': Box(1.0, 10.0, (1,), float32), 'AVAXUSDT': Box(1.0, 10.0, (1,), float32), 'BTCUSDT': Box(1.0, 10.0, (1,), float32), 'ETHUSDT': Box(1.0, 10.0, (1,), float32), 'LINKUSDT': Box(1.0, 10.0, (1,), float32), 'LTCUSDT': Box(1.0, 10.0, (1,), float32), 'MATICUSDT': Box(1.0, 10.0, (1,), float32), 'NEARUSDT': Box(1.0, 10.0, (1,), float32), 'SOLUSDT': Box(1.0, 10.0, (1,), float32), 'UNIUSDT': Box(1.0, 10.0, (1,), float32))), '__env__': (Dict('AAVEUSDT': Box(-inf, inf, (25,), float32), 'AVAXUSDT': Box(-inf, inf, (25,), float32), 'BTCUSDT': Box(-inf, inf, (25,), float32), 'ETHUSDT': Box(-inf, inf, (25,), float32), 'LINKUSDT': Box(-inf, inf, (25,), float32), 'LTCUSDT': Box(-inf, inf, (25,), float32), 'MATICUSDT': Box(-inf, inf, (25,), float32), 'NEARUSDT': Box(-inf, inf, (25,), float32), 'SOLUSDT': Box(-inf, inf, (25,), float32), 'UNIUSDT': Box(-inf, inf, (25,), float32)), Dict('AAVEUSDT': Box(1.0, 10.0, (1,), float32), 'AVAXUSDT': Box(1.0, 10.0, (1,), float32), 'BTCUSDT': Box(1.0, 10.0, (1,), float32), 'ETHUSDT': Box(1.0, 10.0, (1,), float32), 'LINKUSDT': Box(1.0, 10.0, (1,), float32), 'LTCUSDT': Box(1.0, 10.0, (1,), float32), 'MATICUSDT': Box(1.0, 10.0, (1,), float32), 'NEARUSDT': Box(1.0, 10.0, (1,), float32), 'SOLUSDT': Box(1.0, 10.0, (1,), float32), 'UNIUSDT': Box(1.0, 10.0, (1,), float32)))}
(PPO pid=1235) 2023-09-21 12:48:26,054 INFO policy.py:1294 -- Policy (worker=local) running on CPU.
(RolloutWorker pid=1329) 2023-09-21 12:48:25,981 INFO util.py:118 -- Using connectors:
(RolloutWorker pid=1329) 2023-09-21 12:48:25,981 INFO util.py:119 -- AgentConnectorPipeline
(RolloutWorker pid=1329) ObsPreprocessorConnector
(RolloutWorker pid=1329) StateBufferConnector
(RolloutWorker pid=1329) ViewRequirementAgentConnector
(RolloutWorker pid=1329) 2023-09-21 12:48:25,981 INFO util.py:120 -- ActionConnectorPipeline
(RolloutWorker pid=1329) ConvertToNumpyConnector
(RolloutWorker pid=1329) NormalizeActionsConnector
(RolloutWorker pid=1329) ImmutableActionsConnector
(RolloutWorker pid=1329) 2023-09-21 12:48:25,982 DEBUG rollout_worker.py:645 -- Created rollout worker with env <ray.rllib.env.vector_env.VectorEnvWrapper object at 0x7b8028d17100> (<RankingEnv instance>), policies <PolicyMap lru-caching-capacity=100 policy-IDs=['default_policy']>