DiegoTheExplorar's picture
Rename GradioKlingonToEnglish.py to app.py
4b51755 verified
import torch
import tensorflow as tf
import gradio as gr
from tokenizers import Tokenizer
from Seq2SeqModel import Seq2SeqModel
from KTEPreprocess import preprocess
from Decoder import Decoder
from Encoder import Encoder
n_layers = 2
emb_dim = 256
hid_dim = 512
dropout = 0.5 # Reduced dropout to allow more learning (less regularization)
device = torch.device('cpu')
# Load preprocessed data and model parameters
(klingon_tokenizer, english_tokenizer, output_dim,input_dim,
klingon_train_padded, english_train_input, english_train_target,
klingon_test_padded, english_test_input, english_test_target,
max_length_klingon, max_length_english) = preprocess()
# Initialize encoder and decoder for Klingon to English
encoder = Encoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
decoder = Decoder(input_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
# Initialize and load the Seq2SeqModel
model = Seq2SeqModel(encoder, decoder, device).to(device)
model.load_state_dict(torch.load('BPE_Klingon_to_English_1.pth', map_location=torch.device('cpu')))
model.eval()
# Preprocess the Klingon input
def preprocess_sentence(sentence, tokenizer, max_length):
tokenized_sentence = tokenizer.encode(sentence).ids
padded_sentence = tf.keras.preprocessing.sequence.pad_sequences([tokenized_sentence], maxlen=max_length, padding='post')
return torch.tensor(padded_sentence, dtype=torch.long).to(device)
# Translate Klingon to English
def translate_klingon_to_english(klingon_sentence):
print(f"Original Klingon sentence: {klingon_sentence}")
input_sentence = preprocess_sentence(klingon_sentence, klingon_tokenizer, max_length_klingon)
print(f"Tokenized and padded input sentence: {input_sentence.tolist()}")
if input_sentence.numpy().sum() == 0:
print("Warning: Input sentence is empty or out of vocabulary after tokenization.")
input_sentence = input_sentence.squeeze(0)
with torch.no_grad():
output = model(input_sentence.unsqueeze(1), input_sentence.unsqueeze(1), 0)
print(f"Model raw output (logits): {output}")
output_indices = torch.argmax(output, dim=-1).squeeze().tolist()
english_sentence = english_tokenizer.decode(output_indices, skip_special_tokens=True)
print(f"Decoded English sentence: {english_sentence}")
return english_sentence
# Gradio interface
examples = [
["nuqneH! DaHjaj SuvwI'"], # Greetings! Today is a good day (to die)
["tlhIngan Hol vIjatlh"], # I speak Klingon
["SopwI' SoH"], # You are my dinner
]
iface = gr.Interface(
fn=translate_klingon_to_english,
inputs=gr.Textbox(label="Klingon Phrase", lines=2, placeholder="Enter Klingon text here..."),
outputs=gr.Textbox(label="English Translation", lines=2),
title="Klingon to English Translation",
description="Enter text in Klingon and get its translation in English. This translator helps you understand the language of the Klingon species from the Star Trek universe.",
examples=examples,
theme="default"
)
iface.launch()