Hi!

Thanks for your visit to this topic! I am trying to create two seperate custom models (here lstm for example) and I want to know how can I handle the hidden state for the value network in the ppo.

```
class RecurrentTorchModel(TorchModelV2, nn.Module):
def __init__(self, obs_space, action_space, num_outputs, model_config,
name):
TorchModelV2.__init__(self, obs_space, action_space, num_outputs,model_config, name)
nn.Module.__init__(self)
@override(ModelV2)
def forward(self, input_dict, state, seq_lens):
if isinstance(seq_lens, np.ndarray):
seq_lens = torch.Tensor(seq_lens).int()
output, new_state = self.forward_rnn(
add_time_dimension(
input_dict["obs"]["observation"].float(), seq_lens),
state, seq_lens)
action_mask = input_dict["obs"]["action_mask"]
inf_mask = torch.clamp(torch.log(action_mask), min=FLOAT_MIN)
logits = torch.reshape(output, [-1, self.num_outputs])
masked_logits = logits + inf_mask
return masked_logits, new_state
def forward_rnn(self, inputs, state, seq_lens):
raise NotImplementedError("You must implement this for an RNN model")
class TorchRNNModel(RecurrentTorchModel, nn.Module):
def __init__(self,
obs_space,
action_space,
num_outputs,
model_config,
name,
fc_size=40,
lstm_state_size=80):
nn.Module.__init__(self)
super().__init__(obs_space, action_space, num_outputs, model_config,
name)
self.obs_size = 16
self.fc_size = fc_size
self.lstm_state_size = lstm_state_size
# Build the Module from fc + LSTM
#actor net
self.actor_fc1 = nn.Linear(self.obs_size, self.fc_size)
self.actor_lstm = nn.LSTM(self.fc_size, self.lstm_state_size, batch_first=True)
# self.actor_layers = nn.Sequential(nn.Linear(self.obs_size, self.fc_size),
# nn.ReLU(),
# nn.LSTM(self.fc_size, self.lstm_state_size, batch_first=True))
self.action_branch = nn.Linear(self.lstm_state_size, num_outputs)
#value net
self.value_fc1 = nn.Linear(self.obs_size, self.fc_size)
self.value_lstm = nn.LSTM(self.fc_size, self.lstm_state_size, batch_first=True)
# self.value_layers = nn.Sequential(nn.Linear(self.obs_size, self.fc_size),
# nn.ReLU(),
# nn.LSTM(self.fc_size, self.lstm_state_size, batch_first=True))
self.value_branch = nn.Linear(self.lstm_state_size, 1)
@override(ModelV2)
def value_function(self):
assert self._values is not None, "must call forward() first"
return torch.reshape(self.value_branch(self._values), [-1])
@override(ModelV2)
def get_initial_state(self):
# make hidden states on same device as model
h = [self.actor_fc1.weight.new(1, self.lstm_state_size).zero_().squeeze(0),
self.actor_fc1.weight.new(1, self.lstm_state_size).zero_().squeeze(0),
self.value_fc1.weight.new(1, self.lstm_state_size).zero_().squeeze(0),
self.value_fc1.weight.new(1, self.lstm_state_size).zero_().squeeze(0)]
return h
@override(RecurrentTorchModel)
def forward_rnn(self, inputs, state, seq_lens):
x1 = nn.functional.relu(self.actor_fc1(inputs))
self._features, [h1, c1] = self.actor_lstm(
x1, [torch.unsqueeze(state[0], 0),
torch.unsqueeze(state[1], 0)])
action_out = self.action_branch(self._features)
x2 = nn.functional.relu(self.value_fc1(inputs))
self._values, [h2, c2] = self.value_lstm(
x2, [torch.unsqueeze(state[2], 0),
torch.unsqueeze(state[3], 0)])
return action_out, [torch.squeeze(h1, 0), torch.squeeze(c1, 0), torch.squeeze(h2, 0), torch.squeeze(c2, 0)]
```

I concat my value_hidden_states with action_hidden_states and send them together away upon each `forward_run`

call, but I get an assertion error as follows:

/usr/local/lib/python3.7/dist-packages/ray/rllib/policy/torch_policy.py in set_state(self, state)

711 optimizer_vars = state.get("_optimizer_variables", None)

712 if optimizer_vars:

→ 713 assert len(optimizer_vars) == len(self._optimizers)

714 for o, s in zip(self._optimizers, optimizer_vars):

715 optim_state_dict = convert_to_torch_tensor(AssertionError:

I would appreciate any help, so that I can train two custom models as both actor and value networks.

UPDATE: THERE IS NO PROBLEM WITH MODEL AND THIS ERROR WAS HAPPENING THROUGH A SMALL MISTAKE.