Spaces:

DiegoTheExplorar
/

KlingonHeads_Klingon_To_English

Sleeping

App Files Files Community

KlingonHeads_Klingon_To_English / app.py

DiegoTheExplorar

Rename GradioKlingonToEnglish.py to app.py

4b51755 verified 7 months ago

raw

history blame contribute delete

3.05 kB

	import torch
	import tensorflow as tf
	import gradio as gr
	from tokenizers import Tokenizer
	from Seq2SeqModel import Seq2SeqModel
	from KTEPreprocess import preprocess
	from Decoder import Decoder
	from Encoder import Encoder

	n_layers = 2
	emb_dim = 256
	hid_dim = 512
	dropout = 0.5 # Reduced dropout to allow more learning (less regularization)
	device = torch.device('cpu')

	# Load preprocessed data and model parameters
	(klingon_tokenizer, english_tokenizer, output_dim,input_dim,
	klingon_train_padded, english_train_input, english_train_target,
	klingon_test_padded, english_test_input, english_test_target,
	max_length_klingon, max_length_english) = preprocess()

	# Initialize encoder and decoder for Klingon to English
	encoder = Encoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
	decoder = Decoder(input_dim, emb_dim, hid_dim, n_layers, dropout).to(device)

	# Initialize and load the Seq2SeqModel
	model = Seq2SeqModel(encoder, decoder, device).to(device)
	model.load_state_dict(torch.load('BPE_Klingon_to_English_1.pth', map_location=torch.device('cpu')))
	model.eval()

	# Preprocess the Klingon input
	def preprocess_sentence(sentence, tokenizer, max_length):
	tokenized_sentence = tokenizer.encode(sentence).ids
	padded_sentence = tf.keras.preprocessing.sequence.pad_sequences([tokenized_sentence], maxlen=max_length, padding='post')
	return torch.tensor(padded_sentence, dtype=torch.long).to(device)

	# Translate Klingon to English
	def translate_klingon_to_english(klingon_sentence):
	print(f"Original Klingon sentence: {klingon_sentence}")

	input_sentence = preprocess_sentence(klingon_sentence, klingon_tokenizer, max_length_klingon)
	print(f"Tokenized and padded input sentence: {input_sentence.tolist()}")

	if input_sentence.numpy().sum() == 0:
	print("Warning: Input sentence is empty or out of vocabulary after tokenization.")
	input_sentence = input_sentence.squeeze(0)

	with torch.no_grad():
	output = model(input_sentence.unsqueeze(1), input_sentence.unsqueeze(1), 0)
	print(f"Model raw output (logits): {output}")

	output_indices = torch.argmax(output, dim=-1).squeeze().tolist()
	english_sentence = english_tokenizer.decode(output_indices, skip_special_tokens=True)
	print(f"Decoded English sentence: {english_sentence}")

	return english_sentence

	# Gradio interface
	examples = [
	["nuqneH! DaHjaj SuvwI'"], # Greetings! Today is a good day (to die)
	["tlhIngan Hol vIjatlh"], # I speak Klingon
	["SopwI' SoH"], # You are my dinner
	]

	iface = gr.Interface(
	fn=translate_klingon_to_english,
	inputs=gr.Textbox(label="Klingon Phrase", lines=2, placeholder="Enter Klingon text here..."),
	outputs=gr.Textbox(label="English Translation", lines=2),
	title="Klingon to English Translation",
	description="Enter text in Klingon and get its translation in English. This translator helps you understand the language of the Klingon species from the Star Trek universe.",
	examples=examples,
	theme="default"
	)

	iface.launch()