import torch.nn as nn import torch import random """ This class puts together the decoder and encoder and receives Klingon and Engish data from the tokenization process """ class Seq2SeqModel(nn.Module): def __init__(self, encoder, decoder, device): super().__init__() self.encoder = encoder self.decoder = decoder self.device = device # cause encoder and decoder must have same no.of layers assert (encoder.hid_dim == decoder.hid_dim), "Hidden dimensions of encoder and decoder not equal" assert (encoder.n_layers == decoder.n_layers), "Encoder and decoder layers not equal" """ Parameters: ---------- input : Tensor Input tensor containing token indices (seq_len, batch_size) Tokenized English Data trg : Tensor Target tensor containing token indices (seq_len, batch_size) This is what our tokenized Klingon Data teacher_forcing_ratio: double the % of time I use ground-truths aka during training Returns: ------- prediction : Tensor Predicted output tensor from the GRU (seq_len, batch_size, output_dim) hidden : Tensor Hidden state tensor from the GRU (n_layers, batch_size, hid_dim) """ def forward(self,input, trg, teacher_forcing_ratio): batch_size = trg.shape[1] trg_length = trg.shape[0] trg_size = self.decoder.output_dim #storing decorder outputs outputs = torch.zeros(trg_length,batch_size,trg_size).to(self.device) #output of encoder used as input for decoder hidden = self.encoder(input) #print("Encoder hidden state shape:", hidden.shape) # basically we want to single out the first input into the decoder as a #start of sentence token. This is to let the decoder know when to start making predictions input = trg[0, :] for t in range(1, trg_length): #forward pass through decoder. hidden here refers to context vector from #encoder. hidden keeps getting updated output, hidden = self.decoder(input, hidden) #print("Decoder output shape:", output.shape) #Here I am just storing all the predictions made outputs[t] = output #leaving usage of teacher forcing to chance teacher_force = random.random() < teacher_forcing_ratio #print("Output tensor shape in Seq to Seq:", output.shape) # Get the highest predicted token from our predictions highest = output.argmax(1) # If teacher forcing is used use next token else use predicted token input = trg[t] if teacher_force else highest return outputs