import torch
import tensorflow as tf
import gradio as gr
from tokenizers import Tokenizer
from Seq2SeqModel import Seq2SeqModel 
from KTEPreprocess import preprocess 
from Decoder import Decoder
from Encoder import Encoder

n_layers = 2
emb_dim = 256  
hid_dim = 512  
dropout = 0.5  # Reduced dropout to allow more learning (less regularization)
device = torch.device('cpu')

# Load preprocessed data and model parameters
(klingon_tokenizer, english_tokenizer, output_dim,input_dim,
 klingon_train_padded, english_train_input, english_train_target,
 klingon_test_padded, english_test_input, english_test_target,
 max_length_klingon, max_length_english) = preprocess()

# Initialize encoder and decoder for Klingon to English
encoder = Encoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
decoder = Decoder(input_dim, emb_dim, hid_dim, n_layers, dropout).to(device)

# Initialize and load the Seq2SeqModel
model = Seq2SeqModel(encoder, decoder, device).to(device)
model.load_state_dict(torch.load('BPE_Klingon_to_English_1.pth', map_location=torch.device('cpu')))
model.eval()

# Preprocess the Klingon input
def preprocess_sentence(sentence, tokenizer, max_length):
    tokenized_sentence = tokenizer.encode(sentence).ids
    padded_sentence = tf.keras.preprocessing.sequence.pad_sequences([tokenized_sentence], maxlen=max_length, padding='post')
    return torch.tensor(padded_sentence, dtype=torch.long).to(device)

# Translate Klingon to English
def translate_klingon_to_english(klingon_sentence):
    print(f"Original Klingon sentence: {klingon_sentence}")

    input_sentence = preprocess_sentence(klingon_sentence, klingon_tokenizer, max_length_klingon)
    print(f"Tokenized and padded input sentence: {input_sentence.tolist()}")

    if input_sentence.numpy().sum() == 0:
        print("Warning: Input sentence is empty or out of vocabulary after tokenization.")
    input_sentence = input_sentence.squeeze(0)

    with torch.no_grad():
        output = model(input_sentence.unsqueeze(1), input_sentence.unsqueeze(1), 0)
        print(f"Model raw output (logits): {output}")

    output_indices = torch.argmax(output, dim=-1).squeeze().tolist()
    english_sentence = english_tokenizer.decode(output_indices, skip_special_tokens=True)
    print(f"Decoded English sentence: {english_sentence}")

    return english_sentence

# Gradio interface
examples = [
    ["nuqneH! DaHjaj SuvwI'"],  # Greetings! Today is a good day (to die)
    ["tlhIngan Hol vIjatlh"],  # I speak Klingon
    ["SopwI' SoH"],  # You are my dinner
]

iface = gr.Interface(
    fn=translate_klingon_to_english,
    inputs=gr.Textbox(label="Klingon Phrase", lines=2, placeholder="Enter Klingon text here..."),
    outputs=gr.Textbox(label="English Translation", lines=2),
    title="Klingon to English Translation",
    description="Enter text in Klingon and get its translation in English. This translator helps you understand the language of the Klingon species from the Star Trek universe.",
    examples=examples,
    theme="default"
)

iface.launch()