I run the code below
"""Example of using a custom image env and model.
Both the model and env are trivial (and super-fast), so they are useful
for running perf microbenchmarks.
"""
import argparse
import os
import ray
import ray.tune as tune
from ray.tune import sample_from
from fast_image_env import FastImageEnv
from fast_model import TorchFastModel,TorchCustomFastModel
from ray.rllib.models import ModelCatalog
from ray.rllib.agents.ppo import PPOTrainer
if __name__ == "__main__":
ray.shutdown()
ray.init()
config = {
"env": FastImageEnv,
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
"num_gpus": 0,
"num_workers": 1,
"framework": "torch",
}
trainer=PPOTrainer(config=config)
print(trainer.get_policy().model)
ray.shutdown()
And the code gets the model summary as below
VisionNetwork(
(_logits): SlimConv2d(
(_model): Sequential(
(0): ZeroPad2d(padding=(0, 0, 0, 0), value=0.0)
(1): Conv2d(256, 2, kernel_size=[1, 1], stride=(1, 1))
)
)
(_convs): Sequential(
(0): SlimConv2d(
(_model): Sequential(
(0): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
(1): Conv2d(4, 16, kernel_size=[8, 8], stride=(4, 4))
(2): ReLU()
)
)
(1): SlimConv2d(
(_model): Sequential(
(0): ZeroPad2d(padding=(1, 2, 1, 2), value=0.0)
(1): Conv2d(16, 32, kernel_size=[4, 4], stride=(2, 2))
(2): ReLU()
)
)
(2): SlimConv2d(
(_model): Sequential(
(0): Conv2d(32, 256, kernel_size=[11, 11], stride=(1, 1))
(1): ReLU()
)
)
)
(_value_branch_separate): Sequential(
(0): SlimConv2d(
(_model): Sequential(
(0): ZeroPad2d(padding=(2, 2, 2, 2), value=0.0)
(1): Conv2d(4, 16, kernel_size=[8, 8], stride=(4, 4))
(2): ReLU()
)
)
(1): SlimConv2d(
(_model): Sequential(
(0): ZeroPad2d(padding=(1, 2, 1, 2), value=0.0)
(1): Conv2d(16, 32, kernel_size=[4, 4], stride=(2, 2))
(2): ReLU()
)
)
(2): SlimConv2d(
(_model): Sequential(
(0): Conv2d(32, 256, kernel_size=[11, 11], stride=(1, 1))
(1): ReLU()
)
)
(3): SlimConv2d(
(_model): Sequential(
(0): Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1))
)
)
I want to know how to give the env’s obs to the model and get the action and value outputs of the model?