Spaces:

DiegoTheExplorar
/

KlingonHeads

Running

File size: 2,934 Bytes

a2570fa

import torch.nn as nn
import torch
import random
"""

    This class puts together the decoder and encoder and 

    receives Klingon and Engish data from the tokenization process



"""

class Seq2SeqModel(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        # cause encoder and decoder must have same no.of layers
        assert (encoder.hid_dim == decoder.hid_dim), "Hidden dimensions of encoder and decoder not equal"
        assert (encoder.n_layers == decoder.n_layers), "Encoder and decoder layers not equal"

    """

        Parameters:

        ----------

        input : Tensor

            Input tensor containing token indices (seq_len, batch_size)

            Tokenized English Data

        

        trg : Tensor

            Target tensor containing token indices (seq_len, batch_size)

            This is what our tokenized Klingon Data

        

        teacher_forcing_ratio: double

            the % of time I use ground-truths aka during training

        Returns:

        -------

        prediction : Tensor

            Predicted output tensor from the GRU (seq_len, batch_size, output_dim)

        

        hidden : Tensor

            Hidden state tensor from the GRU (n_layers, batch_size, hid_dim)

    """
    def forward(self,input, trg, teacher_forcing_ratio):
        batch_size = trg.shape[1]
        trg_length = trg.shape[0]
        trg_size = self.decoder.output_dim
        #storing decorder outputs
        outputs = torch.zeros(trg_length,batch_size,trg_size).to(self.device)
        #output of encoder used as input for decoder
        hidden = self.encoder(input)
        #print("Encoder hidden state shape:", hidden.shape)
        # basically we want to single out the first input into the decoder as a 
        #start of sentence token. This is to let the decoder know when to start making predictions
        input = trg[0, :]
        for t in range(1, trg_length):
           #forward pass through decoder. hidden here refers to context vector from
           #encoder. hidden keeps getting updated
            output, hidden = self.decoder(input, hidden)
            #print("Decoder output shape:", output.shape)
            #Here I am just storing all the predictions made
            outputs[t] = output
            
            #leaving usage of teacher forcing to chance
            teacher_force = random.random() < teacher_forcing_ratio
            #print("Output tensor shape in Seq to Seq:", output.shape)

            # Get the highest predicted token from our predictions
            highest = output.argmax(1)
            
            # If teacher forcing is used use next token else  use predicted token
            input = trg[t] if teacher_force else highest

        return outputs