Hi! First time using RLLib, so hard to navigate among all abstractions.
My error:
File "/Users/etrapeznikov/Desktop/eugene/tech/buro/venv/lib/python3.10/site-packages/ray/rllib/policy/policy.py", line 1513, in _initialize_loss_from_dummy_batch
self.loss(self.model, self.dist_class, train_batch)
File "/Users/etrapeznikov/Desktop/eugene/tech/buro/venv/lib/python3.10/site-packages/ray/rllib/algorithms/ppo/ppo_tf_policy.py", line 142, in loss
curr_action_dist.logp(train_batch[SampleBatch.ACTIONS])
File "/Users/etrapeznikov/Desktop/eugene/tech/buro/venv/lib/python3.10/site-packages/ray/rllib/models/tf/tf_action_dist.py", line 357, in logp
tf.math.square((tf.cast(x, tf.float32) - self.mean) / self.std), axis=1
File "/Users/etrapeznikov/Desktop/eugene/tech/buro/venv/lib/python3.10/site-packages/tensorflow/python/util/traceback_utils.py", line 153, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/Users/etrapeznikov/Desktop/eugene/tech/buro/venv/lib/python3.10/site-packages/tensorflow/python/framework/ops.py", line 7262, in raise_from_not_ok_status
raise core._status_to_exception(e) from None # pylint: disable=protected-access
tensorflow.python.framework.errors_impl.InvalidArgumentError: {{function_node __wrapped__Sub_device_/job:localhost/replica:0/task:0/device:CPU:0}} Incompatible shapes: [32,2] vs. [32,32] [Op:Sub]
Looks like the problem is in action space. [32,2] looks reasonable. 32 is a batch and 2 is number of actions in my PPO env. But how did it get [32,32]?
Trying to post all meaningful code:
Agent:
class MarketMakerCPPO(object):
name = 'MarketMakerCPPO'
def __init__(
self,
number_of_training_steps=1e5,
load_weights=False,
visualize=False,
model_file: str = "",
testing_files: list = [""],
action_repeats: int = 5,
model_report_path: str = "",
**kwargs):
# Init RLLib agent
ModelCatalog.register_custom_model(
NZTModelV1.name, NZTModelV1
)
ppo_config = (
PPOConfig()
.resources(num_gpus=0)
.rollouts(num_rollout_workers=0) # 0 - local
.environment(MarketMaker, env_config=kwargs)
.framework("tf2")
.rl_module(
_enable_rl_module_api=False, # required to use custom model
)
.training(
_enable_learner_api=False, # required to use custom model
train_batch_size=1024, # try different values
lr_schedule=[[0, 2e-5], [1e6, 5e-6]], # [timestep, lr], this is from Nagy/Zohren paper
model={
"custom_model": NZTModelV1.name,
"custom_model_config": {
"lob_model_path": model_file,
}
},
)
)
ray.init(ignore_reinit_error=True)
self.agent = PPO(config=ppo_config)
def start(self) -> None:
"""
Entry point for agent training and testing
"""
done = False
while not done:
result = self.agent.train()
done = result["done"]
self.agent.save('../models/ppo_model_agent_v1')
Env:
class MarketMaker(BaseEnvironment[float]):
id = 'market-maker-v0'
description = "Environment where limit orders are tethered to LOB price levels"
def __init__(self, config):
super().__init__(config)
self.actions = np.zeros(2, dtype=np.float32)
self.action_space = gym.spaces.Box(low=0., high=1.,
shape=self.actions.shape,
dtype=np.float32)
self.observation_shape = (50, 68, 1)
self.observation_space = gym.spaces.Box(low=-10., high=10.,
shape=self.observation_shape,
dtype=np.float32)
And one more question:
Would self.agent.train() call train_batch_size times step of env?