Variable length input(sentences) for customized-LSTM without pack_padded_sequence
Variable length input(sentences) for customized-LSTM without pack_padded_sequence
I would like to do image-caption using a customized-LSTM.
My problem is that most of the LSTM model use pack_padded_sequence() function with padding and train with batch_size when processing multiple different length of sentences in a batch.
I was wondering how I input the same parameters instead of using pack_padded_sequence() in customized-LSTM.
This is my model:
class FactoredLSTM(nn.Module):
def __init__(self, emb_dim, hidden_dim, factored_dim, vocab_size, max_len=30):
""" Set the hyper-parameters and build the layers."""
super(FactoredLSTM, self).__init__()
self.hidden_dim = hidden_dim
self.factored_dim = factored_dim
self.vocab_size = vocab_size
self.max_len = max_len
self.embed = nn.Embedding(vocab_size, emb_dim)
self.linear = nn.Linear(hidden_dim, vocab_size)
# factored lstm weights
# U,V,W
self.U_i = nn.Linear(factored_dim, hidden_dim)
self.V_i = nn.Linear(emb_dim, factored_dim)
self.W_i = nn.Linear(hidden_dim, hidden_dim)
self.U_f = nn.Linear(factored_dim, hidden_dim)
self.V_f = nn.Linear(emb_dim, factored_dim)
self.W_f = nn.Linear(hidden_dim, hidden_dim)
self.U_o = nn.Linear(factored_dim, hidden_dim)
self.V_o = nn.Linear(emb_dim, factored_dim)
self.W_o = nn.Linear(hidden_dim, hidden_dim)
self.U_c = nn.Linear(factored_dim, hidden_dim)
self.V_c = nn.Linear(emb_dim, factored_dim)
self.W_c = nn.Linear(hidden_dim, hidden_dim)
# factor matrix
# S
self.S_i = nn.Linear(factored_dim, factored_dim)
self.S_f = nn.Linear(factored_dim, factored_dim)
self.S_o = nn.Linear(factored_dim, factored_dim)
self.S_c = nn.Linear(factored_dim, factored_dim)
def forward_factor(self, embedded, h_0, c_0, mode):
i = self.V_i(embedded)
f = self.V_f(embedded)
o = self.V_o(embedded)
c = self.V_c(embedded)
if mode == "factual":
i = self.S_i(i)
f = self.S_f(f)
o = self.S_o(o)
c = self.S_c(c)
i_t = torch.sigmoid(self.U_i(i) + self.W_i(h_0))
f_t = torch.sigmoid(self.U_f(f) + self.W_f(h_0))
o_t = torch.sigmoid(self.U_o(o) + self.W_o(h_0))
c_tilda = torch.tanh(self.U_c(c) + self.W_c(h_0))
c_t = f_t * c_0 + i_t * c_tilda
h_t = o_t * c_t
hiddens = h_t
return hiddens, h_t, c_t
def forward(self, features, captions, lengths, mode="factual"):
"""
Args:
features: fixed vectors from images, [batch, emb_dim]
captions: [batch, seq_len]
mode: type of caption to generate
"""
embedded = self.embed(captions)
embedded = torch.cat((features.unsqueeze(1), embedded), 1)
packed = pack_padded_sequence(embedded, lengths, batch_first=True)
# initialize hidden state
h_t, c_t = self.init_hidden_states(len(lengths))
outputs =
# iterate for length of captions
for index in range(embedded.size(1)-1):
emb = embedded[:, index, :]
hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode)
outs = self.linear(hiddens)
outputs.append(outs)
outputs = torch.stack(outputs, 1)
return outputs
def init_hidden_states(self, batch_size):
hidden_dim = self.hidden_dim
h0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda()
c0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda()
nn.init.uniform_(h0)
nn.init.uniform_(c0)
return h0, c0
I was confused for this part whether I input the correct parameters into customized-LSTM.
As follow:
# iterate for length of captions
for index in range(embedded.size(1)-1):
emb = embedded[:, index, :]
hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode)
outs = self.linear(hiddens)
outputs.append(outs)
Can anyone give me some advices?
Thanks a lot !!!
0
Thanks for contributing an answer to Stack Overflow!
But avoid …
To learn more, see our tips on writing great answers.
Required, but never shown
Required, but never shown
By clicking "Post Your Answer", you agree to our terms of service, privacy policy and cookie policy