File size: 2,934 Bytes
a2570fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import torch.nn as nn
import torch
import random
"""

    This class puts together the decoder and encoder and 

    receives Klingon and Engish data from the tokenization process



"""

class Seq2SeqModel(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        # cause encoder and decoder must have same no.of layers
        assert (encoder.hid_dim == decoder.hid_dim), "Hidden dimensions of encoder and decoder not equal"
        assert (encoder.n_layers == decoder.n_layers), "Encoder and decoder layers not equal"

    """

        Parameters:

        ----------

        input : Tensor

            Input tensor containing token indices (seq_len, batch_size)

            Tokenized English Data

        

        trg : Tensor

            Target tensor containing token indices (seq_len, batch_size)

            This is what our tokenized Klingon Data

        

        teacher_forcing_ratio: double

            the % of time I use ground-truths aka during training

        Returns:

        -------

        prediction : Tensor

            Predicted output tensor from the GRU (seq_len, batch_size, output_dim)

        

        hidden : Tensor

            Hidden state tensor from the GRU (n_layers, batch_size, hid_dim)

    """
    def forward(self,input, trg, teacher_forcing_ratio):
        batch_size = trg.shape[1]
        trg_length = trg.shape[0]
        trg_size = self.decoder.output_dim
        #storing decorder outputs
        outputs = torch.zeros(trg_length,batch_size,trg_size).to(self.device)
        #output of encoder used as input for decoder
        hidden = self.encoder(input)
        #print("Encoder hidden state shape:", hidden.shape)
        # basically we want to single out the first input into the decoder as a 
        #start of sentence token. This is to let the decoder know when to start making predictions
        input = trg[0, :]
        for t in range(1, trg_length):
           #forward pass through decoder. hidden here refers to context vector from
           #encoder. hidden keeps getting updated
            output, hidden = self.decoder(input, hidden)
            #print("Decoder output shape:", output.shape)
            #Here I am just storing all the predictions made
            outputs[t] = output
            
            #leaving usage of teacher forcing to chance
            teacher_force = random.random() < teacher_forcing_ratio
            #print("Output tensor shape in Seq to Seq:", output.shape)

            # Get the highest predicted token from our predictions
            highest = output.argmax(1)
            
            # If teacher forcing is used use next token else  use predicted token
            input = trg[t] if teacher_force else highest

        return outputs