import torch import tensorflow as tf import gradio as gr from tokenizers import Tokenizer from Seq2SeqModel import Seq2SeqModel from KTEPreprocess import preprocess from Decoder import Decoder from Encoder import Encoder n_layers = 2 emb_dim = 256 hid_dim = 512 dropout = 0.5 # Reduced dropout to allow more learning (less regularization) device = torch.device('cpu') # Load preprocessed data and model parameters (klingon_tokenizer, english_tokenizer, output_dim,input_dim, klingon_train_padded, english_train_input, english_train_target, klingon_test_padded, english_test_input, english_test_target, max_length_klingon, max_length_english) = preprocess() # Initialize encoder and decoder for Klingon to English encoder = Encoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device) decoder = Decoder(input_dim, emb_dim, hid_dim, n_layers, dropout).to(device) # Initialize and load the Seq2SeqModel model = Seq2SeqModel(encoder, decoder, device).to(device) model.load_state_dict(torch.load('BPE_Klingon_to_English_1.pth', map_location=torch.device('cpu'))) model.eval() # Preprocess the Klingon input def preprocess_sentence(sentence, tokenizer, max_length): tokenized_sentence = tokenizer.encode(sentence).ids padded_sentence = tf.keras.preprocessing.sequence.pad_sequences([tokenized_sentence], maxlen=max_length, padding='post') return torch.tensor(padded_sentence, dtype=torch.long).to(device) # Translate Klingon to English def translate_klingon_to_english(klingon_sentence): print(f"Original Klingon sentence: {klingon_sentence}") input_sentence = preprocess_sentence(klingon_sentence, klingon_tokenizer, max_length_klingon) print(f"Tokenized and padded input sentence: {input_sentence.tolist()}") if input_sentence.numpy().sum() == 0: print("Warning: Input sentence is empty or out of vocabulary after tokenization.") input_sentence = input_sentence.squeeze(0) with torch.no_grad(): output = model(input_sentence.unsqueeze(1), input_sentence.unsqueeze(1), 0) print(f"Model raw output (logits): {output}") output_indices = torch.argmax(output, dim=-1).squeeze().tolist() english_sentence = english_tokenizer.decode(output_indices, skip_special_tokens=True) print(f"Decoded English sentence: {english_sentence}") return english_sentence # Gradio interface examples = [ ["nuqneH! DaHjaj SuvwI'"], # Greetings! Today is a good day (to die) ["tlhIngan Hol vIjatlh"], # I speak Klingon ["SopwI' SoH"], # You are my dinner ] iface = gr.Interface( fn=translate_klingon_to_english, inputs=gr.Textbox(label="Klingon Phrase", lines=2, placeholder="Enter Klingon text here..."), outputs=gr.Textbox(label="English Translation", lines=2), title="Klingon to English Translation", description="Enter text in Klingon and get its translation in English. This translator helps you understand the language of the Klingon species from the Star Trek universe.", examples=examples, theme="default" ) iface.launch()