I was doing the “A multi-input capable model for Tuple observation spaces (for PPO)” example and encountered a problem.
The observation in the environment is initialized to
from gym.spaces import Tuple as tp obs_spaces = { "actor1": tp(( Box(float("-inf"), float("inf"), (84, 84, 1)), Discrete(7), )), }
The return value of the observation is
#image: (84,84,1) #a:int return {"actor1": (image,a)}
The function I use is https://github.com/ray-project/ray/blob/master/rllib/models/torch/complex_input_net.py.
Training file
class ComplexInputNetwork(TorchModelV2, nn.Module):
def init(self, obs_space, action_space, num_outputs, model_config, name):
def forward(self, input_dict, state, seq_lens):
def value_function(self):
. . . . . . . .
ModelCatalog.register_custom_model(“testmodel”, ComplexInputNetwork)
“model”: {
“custom_model”: “testmodel”,
# Extra kwargs to be passed to your model’s c’tor.
“custom_model_config”: {},
},
Policy is set to PPO
The result of the operation is
File “C:\ProgramData\Anaconda3\Lib\site-packages\ray\rllib\examples\b_comlex.py”, line 250, in forward
cnn_out, _ = self.cnns[i]({“obs”: component})
File “C:\ProgramData\Anaconda3\lib\site-packages\ray\rllib\models\modelv2.py”, line 213, in call
res = self.forward(restored, state or , seq_lens)
File “C:\ProgramData\Anaconda3\lib\site-packages\ray\rllib\models\torch\visionnet.py”, line 192, in forward
self._features = self._features.permute(0, 3, 1, 2)
RuntimeError: number of dims don’t match in permute
I printed some of the results and found that obs was paved in ComplexInputNetwork.forward.
Hasattr(obs_space, "original_space")
is True
in ComplexInputNetwork.init. But in forward(self, input_dict, state, seq_lens)
, it called https://github.com/ray-project/ray/blob/master/rllib/models/modelv2.py was paved, resulting in progress
def forward(self, input_dict, state, seq_lens): # Push image observations through our CNNs. outs = [] for i, component in enumerate(input_dict["obs"]): if i in self.cnns: cnn_out, _ = self.cnns[i]({"obs": component}) outs.append(cnn_out) elif i in self.one_hot: if component.dtype in [torch.int32, torch.int64, torch.uint8]: outs.append( one_hot(component, self.original_space.spaces[i])) else: outs.append(component) else: outs.append(torch.reshape(component, [-1, self.flatten[i]])) # Concat all outputs and the non-image inputs. out = torch.cat(outs, dim=1) # Push through (optional) FC-stack (this may be an empty stack). out, _ = self.post_fc_stack({"obs": out}, [], None)
, The input dimension is
torch.Size([7057])
#84841+1=7057
How can I set up so that the input is not paved and kept as original ([84,84,1],1)
Thank you very much