RLlib Parameter Sharing / MARL Communication

Hey,

can someone explain to me what parameter sharing in rllib really means? Are all agents really using just the same network? Or has every Agent an equal network and the parameters are shared between these networks?

I’m asking because i what to implement a MARL-Algorithms like Bicnet, where all agents use the same network and communicate with each other over a B-RNN layer.

My torch model(BicNet):

class Actor(nn.Module):
def __init__(self, s_dim, a_dim, n_agents):
    super(Actor, self).__init__()

    self.s_dim = s_dim
    self.a_dim = a_dim
    self.n_agents = n_agents

    # input (batch, s_dim) output (batch, 300)

    self.prev_dense = DenseNet(s_dim, HIDDEN_DIM, HIDDEN_DIM // 2, output_activation=None, norm_in=True)
    # input (num_agents, batch, 200) output (num_agents, batch, num_agents * 2)\
    self.comm_net = LSTMNet(HIDDEN_DIM // 2, HIDDEN_DIM // 2, num_layers=1)
    # input (batch, 2) output (batch, a_dim)
    self.post_dense = DenseNet(HIDDEN_DIM + s_dim, HIDDEN_DIM // 2, a_dim, output_activation=nn.Tanh)

def forward(self, x):
    x_s = x
    x = x.view(-1, self.s_dim)
    x = self.prev_dense(x)
    x = x.reshape(-1, self.n_agents, HIDDEN_DIM // 2)
    x = self.comm_net(x)
    x = torch.cat((x, x_s), dim=-1)
    x = x.reshape(-1, HIDDEN_DIM + self.s_dim)
    x = self.post_dense(x)
    x = x.view(-1, self.n_agents, self.a_dim)
    return x


class Critic(nn.Module):
def __init__(self, s_dim, a_dim, n_agents):
    super(Critic, self).__init__()

    self.s_dim = s_dim
    self.a_dim = a_dim
    self.n_agents = n_agents

    # input (batch, s_dim) output (batch, 300)
    self.prev_dense = DenseNet((s_dim + a_dim), HIDDEN_DIM, HIDDEN_DIM // 2, output_activation=None, norm_in=True)
    # input (num_agents, batch, 200) output (num_agents, batch, num_agents * 2)\
    self.comm_net = LSTMNet(HIDDEN_DIM // 2, HIDDEN_DIM // 2, num_layers=1)
    # input (batch, 2) output (batch, a_dim)
    self.post_dense = DenseNet(HIDDEN_DIM + s_dim, HIDDEN_DIM // 2, 1, output_activation=None)

def forward(self, x_n, a_n):
    x = torch.cat((x_n, a_n), dim=-1)
    x = x.view(-1, (self.s_dim + self.a_dim))
    x = self.prev_dense(x)

    x = x.reshape(-1, self.n_agents, HIDDEN_DIM // 2)
    x = self.comm_net(x)
    x = torch.cat((x, x_n), dim=-1)
    x = x.reshape(-1, HIDDEN_DIM + self.s_dim)

    x = self.post_dense(x)
    x = x.view(-1, self.n_agents, 1)
    return x


class DenseNet(nn.Module):
def __init__(self, s_dim, hidden_dim, a_dim, norm_in=False, hidden_activation=nn.ReLU, output_activation=None):
    super(DenseNet, self).__init__()

    self._norm_in = norm_in

    if self._norm_in:
        self.norm1 = nn.BatchNorm1d(s_dim)
        self.norm2 = nn.BatchNorm1d(hidden_dim)
        self.norm3 = nn.BatchNorm1d(hidden_dim)
        self.norm4 = nn.BatchNorm1d(hidden_dim)

    self.dense1 = nn.Linear(s_dim, hidden_dim)
    self.dense1.weight.data = fanin_init(self.dense1.weight.data.size())
    self.dense2 = nn.Linear(hidden_dim, hidden_dim)
    self.dense2.weight.data = fanin_init(self.dense2.weight.data.size())
    self.dense3 = nn.Linear(hidden_dim, hidden_dim)
    self.dense3.weight.data.uniform_(-0.003, 0.003)
    self.dense4 = nn.Linear(hidden_dim, a_dim)

    if hidden_activation:
        self.hidden_activation = hidden_activation()
    else:
        self.hidden_activation = lambda x : x

    if output_activation:
        self.output_activation = output_activation()
    else:
        self.output_activation = lambda x : x

def forward(self, x):
    use_norm = True if (self._norm_in and x.shape[0] != 1) else False

    if use_norm: x = self.norm1(x)
    x = self.hidden_activation(self.dense1(x))
    if use_norm: x = self.norm2(x)
    x = self.hidden_activation(self.dense2(x))
    if use_norm: x = self.norm3(x)
    x = self.hidden_activation(self.dense3(x))
    if use_norm: x = self.norm4(x)
    x = self.output_activation(self.dense4(x))
    return x


class LSTMNet(nn.Module):
def __init__(self, input_size, hidden_size,
             num_layers=1,
             bias=True,
             batch_fisrt=True,
             bidirectional=True):
    super(LSTMNet, self).__init__()

    self.lstm = nn.LSTM(
        input_size=input_size,
        hidden_size=hidden_size,
        num_layers=num_layers,
        bias=bias,
        batch_first=batch_fisrt,
        bidirectional=bidirectional
    )

def forward(self, input, wh=None, wc=None):
    output, (hidden, cell) = self.lstm(input)
    return output

@sven1977 are you able to answer this?

1 Like

I also wondered if it would be smarter if I just use the single Agent A2C Algorithm with the custom models shown above. This way the communication through the models B-RNN layers should be ensured, Isn’t it? Or is there a problem with B-RNN layer in RLlib because of its distributed processing :thinking: