Spaces:
Running
Running
File size: 2,934 Bytes
a2570fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import torch.nn as nn
import torch
import random
"""
This class puts together the decoder and encoder and
receives Klingon and Engish data from the tokenization process
"""
class Seq2SeqModel(nn.Module):
def __init__(self, encoder, decoder, device):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
# cause encoder and decoder must have same no.of layers
assert (encoder.hid_dim == decoder.hid_dim), "Hidden dimensions of encoder and decoder not equal"
assert (encoder.n_layers == decoder.n_layers), "Encoder and decoder layers not equal"
"""
Parameters:
----------
input : Tensor
Input tensor containing token indices (seq_len, batch_size)
Tokenized English Data
trg : Tensor
Target tensor containing token indices (seq_len, batch_size)
This is what our tokenized Klingon Data
teacher_forcing_ratio: double
the % of time I use ground-truths aka during training
Returns:
-------
prediction : Tensor
Predicted output tensor from the GRU (seq_len, batch_size, output_dim)
hidden : Tensor
Hidden state tensor from the GRU (n_layers, batch_size, hid_dim)
"""
def forward(self,input, trg, teacher_forcing_ratio):
batch_size = trg.shape[1]
trg_length = trg.shape[0]
trg_size = self.decoder.output_dim
#storing decorder outputs
outputs = torch.zeros(trg_length,batch_size,trg_size).to(self.device)
#output of encoder used as input for decoder
hidden = self.encoder(input)
#print("Encoder hidden state shape:", hidden.shape)
# basically we want to single out the first input into the decoder as a
#start of sentence token. This is to let the decoder know when to start making predictions
input = trg[0, :]
for t in range(1, trg_length):
#forward pass through decoder. hidden here refers to context vector from
#encoder. hidden keeps getting updated
output, hidden = self.decoder(input, hidden)
#print("Decoder output shape:", output.shape)
#Here I am just storing all the predictions made
outputs[t] = output
#leaving usage of teacher forcing to chance
teacher_force = random.random() < teacher_forcing_ratio
#print("Output tensor shape in Seq to Seq:", output.shape)
# Get the highest predicted token from our predictions
highest = output.argmax(1)
# If teacher forcing is used use next token else use predicted token
input = trg[t] if teacher_force else highest
return outputs
|