Spaces:
Running
Running
import torch.nn as nn | |
import torch | |
import random | |
""" | |
This class puts together the decoder and encoder and | |
receives Klingon and Engish data from the tokenization process | |
""" | |
class Seq2SeqModel(nn.Module): | |
def __init__(self, encoder, decoder, device): | |
super().__init__() | |
self.encoder = encoder | |
self.decoder = decoder | |
self.device = device | |
# cause encoder and decoder must have same no.of layers | |
assert (encoder.hid_dim == decoder.hid_dim), "Hidden dimensions of encoder and decoder not equal" | |
assert (encoder.n_layers == decoder.n_layers), "Encoder and decoder layers not equal" | |
""" | |
Parameters: | |
---------- | |
input : Tensor | |
Input tensor containing token indices (seq_len, batch_size) | |
Tokenized English Data | |
trg : Tensor | |
Target tensor containing token indices (seq_len, batch_size) | |
This is what our tokenized Klingon Data | |
teacher_forcing_ratio: double | |
the % of time I use ground-truths aka during training | |
Returns: | |
------- | |
prediction : Tensor | |
Predicted output tensor from the GRU (seq_len, batch_size, output_dim) | |
hidden : Tensor | |
Hidden state tensor from the GRU (n_layers, batch_size, hid_dim) | |
""" | |
def forward(self,input, trg, teacher_forcing_ratio): | |
batch_size = trg.shape[1] | |
trg_length = trg.shape[0] | |
trg_size = self.decoder.output_dim | |
#storing decorder outputs | |
outputs = torch.zeros(trg_length,batch_size,trg_size).to(self.device) | |
#output of encoder used as input for decoder | |
hidden = self.encoder(input) | |
#print("Encoder hidden state shape:", hidden.shape) | |
# basically we want to single out the first input into the decoder as a | |
#start of sentence token. This is to let the decoder know when to start making predictions | |
input = trg[0, :] | |
for t in range(1, trg_length): | |
#forward pass through decoder. hidden here refers to context vector from | |
#encoder. hidden keeps getting updated | |
output, hidden = self.decoder(input, hidden) | |
#print("Decoder output shape:", output.shape) | |
#Here I am just storing all the predictions made | |
outputs[t] = output | |
#leaving usage of teacher forcing to chance | |
teacher_force = random.random() < teacher_forcing_ratio | |
#print("Output tensor shape in Seq to Seq:", output.shape) | |
# Get the highest predicted token from our predictions | |
highest = output.argmax(1) | |
# If teacher forcing is used use next token else use predicted token | |
input = trg[t] if teacher_force else highest | |
return outputs | |