Variable length input(sentences) for customized-LSTM without pack_padded

Variable length input(sentences) for customized-LSTM without pack_padded_sequence

I would like to do image-caption using a customized-LSTM.
My problem is that most of the LSTM model use pack_padded_sequence() function with padding and train with batch_size when processing multiple different length of sentences in a batch.
I was wondering how I input the same parameters instead of using pack_padded_sequence() in customized-LSTM.

This is my model:

class FactoredLSTM(nn.Module): def __init__(self, emb_dim, hidden_dim, factored_dim, vocab_size, max_len=30): """ Set the hyper-parameters and build the layers.""" super(FactoredLSTM, self).__init__() self.hidden_dim = hidden_dim self.factored_dim = factored_dim self.vocab_size = vocab_size self.max_len = max_len self.embed = nn.Embedding(vocab_size, emb_dim) self.linear = nn.Linear(hidden_dim, vocab_size) # factored lstm weights # U,V,W self.U_i = nn.Linear(factored_dim, hidden_dim) self.V_i = nn.Linear(emb_dim, factored_dim) self.W_i = nn.Linear(hidden_dim, hidden_dim) self.U_f = nn.Linear(factored_dim, hidden_dim) self.V_f = nn.Linear(emb_dim, factored_dim) self.W_f = nn.Linear(hidden_dim, hidden_dim) self.U_o = nn.Linear(factored_dim, hidden_dim) self.V_o = nn.Linear(emb_dim, factored_dim) self.W_o = nn.Linear(hidden_dim, hidden_dim) self.U_c = nn.Linear(factored_dim, hidden_dim) self.V_c = nn.Linear(emb_dim, factored_dim) self.W_c = nn.Linear(hidden_dim, hidden_dim) # factor matrix # S self.S_i = nn.Linear(factored_dim, factored_dim) self.S_f = nn.Linear(factored_dim, factored_dim) self.S_o = nn.Linear(factored_dim, factored_dim) self.S_c = nn.Linear(factored_dim, factored_dim) def forward_factor(self, embedded, h_0, c_0, mode): i = self.V_i(embedded) f = self.V_f(embedded) o = self.V_o(embedded) c = self.V_c(embedded) if mode == "factual": i = self.S_i(i) f = self.S_f(f) o = self.S_o(o) c = self.S_c(c) i_t = torch.sigmoid(self.U_i(i) + self.W_i(h_0)) f_t = torch.sigmoid(self.U_f(f) + self.W_f(h_0)) o_t = torch.sigmoid(self.U_o(o) + self.W_o(h_0)) c_tilda = torch.tanh(self.U_c(c) + self.W_c(h_0)) c_t = f_t * c_0 + i_t * c_tilda h_t = o_t * c_t hiddens = h_t return hiddens, h_t, c_t def forward(self, features, captions, lengths, mode="factual"): """ Args: features: fixed vectors from images, [batch, emb_dim] captions: [batch, seq_len] mode: type of caption to generate """ embedded = self.embed(captions) embedded = torch.cat((features.unsqueeze(1), embedded), 1) packed = pack_padded_sequence(embedded, lengths, batch_first=True) # initialize hidden state h_t, c_t = self.init_hidden_states(len(lengths)) outputs = # iterate for length of captions for index in range(embedded.size(1)-1): emb = embedded[:, index, :] hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode) outs = self.linear(hiddens) outputs.append(outs) outputs = torch.stack(outputs, 1) return outputs def init_hidden_states(self, batch_size): hidden_dim = self.hidden_dim h0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda() c0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda() nn.init.uniform_(h0) nn.init.uniform_(c0) return h0, c0

I was confused for this part whether I input the correct parameters into customized-LSTM.
As follow:

# iterate for length of captions for index in range(embedded.size(1)-1): emb = embedded[:, index, :] hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode) outs = self.linear(hiddens) outputs.append(outs)

Can anyone give me some advices?
Thanks a lot !!!

0

Thanks for contributing an answer to Stack Overflow!

But avoid …

To learn more, see our tips on writing great answers.

Required, but never shown

By clicking "Post Your Answer", you agree to our terms of service, privacy policy and cookie policy

搜尋此網誌

Dfyjkt

Variable length input(sentences) for customized-LSTM without pack_padded_sequence

Variable length input(sentences) for customized-LSTM without pack_padded_sequence

0

Popular posts from this blog

How do I collapse sections of code in Visual Studio Code for Windows?