The trajecy_tory_view_API is the discrete action model, I want to change it to the continuous action model, but there are always mistakes after I change it
obs space
Tuple((
Box(-5000, 5000, (18,), dtype=np.float32),
Box(low=-1.0, high=1.0, shape=(3,), dtype=np.float32)
))
action space
Box(np.array([-1., -1., -1.,-1.]), np.array([+1., +1., +1.,+1.]), dtype=np.float32)
I have the following questions to ask
1 the probability that a continuous action needs to be output as an action, how should I set it inside?
2 in the forward function, states is used in RNN. What is the function of seq_lens?
The following code
class TorchFrameStackingCartPoleModelL(TorchModelV2, nn.Module):
"""A simple FC model that takes the last n observations as input."""
def __init__(self,
obs_space,
action_space,
num_outputs,
model_config,
name,
num_frames=3):
nn.Module.__init__(self)
super(TorchFrameStackingCartPoleModelL, self).__init__(
obs_space, action_space, None, model_config, name)
self.num_frames = num_frames
self.num_outputs = num_outputs
# Construct actual (very simple) FC model.
#assert len(obs_space.shape) == 1
in_size = self.num_frames * (21 + 4 + 1)
self.layer1 = SlimFC(
in_size=in_size, out_size=256, activation_fn="relu")
self.layer2 = SlimFC(in_size=256, out_size=256, activation_fn="relu")
self.out = SlimFC(
in_size=256, out_size=self.num_outputs, activation_fn="linear")
self.values = SlimFC(in_size=256, out_size=1, activation_fn="linear")
self._last_value = None
self.view_requirements["prev_n_obs"] = ViewRequirement(
data_col="obs",
shift="-{}:0".format(num_frames - 1),
space=obs_space)
self.view_requirements["prev_n_rewards"] = ViewRequirement(
data_col="rewards", shift="-{}:-1".format(self.num_frames))
self.view_requirements["prev_n_actions"] = ViewRequirement(
data_col="actions",
shift="-{}:-1".format(self.num_frames),
space=self.action_space)
def forward(self, input_dict, states, seq_lens):
obs = input_dict["prev_n_obs"]
obs = torch.reshape(obs,
[-1, 21 * self.num_frames])
rewards = torch.reshape(input_dict["prev_n_rewards"],
[-1, self.num_frames])
actions = input_dict["prev_n_actions"]
actions = torch.reshape(actions,
[-1, self.num_frames * 4])
input_ = torch.cat([obs, actions, rewards], dim=-1)
features = self.layer1(input_)
features = self.layer2(features)
out = self.out(features)
self._last_value = self.values(features)
return out, states
def value_function(self):
return torch.squeeze(self._last_value, -1)
File "C:\ProgramData\Anaconda3\lib\site-packages\ray\rllib\policy\torch_policy.py", line 376, in _compute_action_helper
action_dist = dist_class(dist_inputs, self.model)
File "C:\ProgramData\Anaconda3\lib\site-packages\ray\rllib\models\torch\torch_action_dist.py", line 186, in __init__
self.dist = torch.distributions.normal.Normal(mean, torch.exp(log_std))
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\distributions\normal.py", line 50, in __init__
super(Normal, self).__init__(batch_shape, validate_args=validate_args)
File "C:\ProgramData\Anaconda3\lib\site-packages\torch\distributions\distribution.py", line 53, in __init__
raise ValueError("The parameter {} has invalid values".format(param))
ValueError: The parameter scale has invalid values