DiegoTheExplorar
commited on
Rename GradioKlingonToEnglish.py to app.py
Browse files
GradioKlingonToEnglish.py → app.py
RENAMED
@@ -1,75 +1,75 @@
|
|
1 |
-
import torch
|
2 |
-
import tensorflow as tf
|
3 |
-
import gradio as gr
|
4 |
-
from tokenizers import Tokenizer
|
5 |
-
from Seq2SeqModel import Seq2SeqModel
|
6 |
-
from KTEPreprocess import preprocess
|
7 |
-
from Decoder import Decoder
|
8 |
-
from Encoder import Encoder
|
9 |
-
|
10 |
-
n_layers = 2
|
11 |
-
emb_dim = 256
|
12 |
-
hid_dim = 512
|
13 |
-
dropout = 0.5 # Reduced dropout to allow more learning (less regularization)
|
14 |
-
device = torch.device('cpu')
|
15 |
-
|
16 |
-
# Load preprocessed data and model parameters
|
17 |
-
(klingon_tokenizer, english_tokenizer, output_dim,input_dim,
|
18 |
-
klingon_train_padded, english_train_input, english_train_target,
|
19 |
-
klingon_test_padded, english_test_input, english_test_target,
|
20 |
-
max_length_klingon, max_length_english) = preprocess()
|
21 |
-
|
22 |
-
# Initialize encoder and decoder for Klingon to English
|
23 |
-
encoder = Encoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
|
24 |
-
decoder = Decoder(input_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
|
25 |
-
|
26 |
-
# Initialize and load the Seq2SeqModel
|
27 |
-
model = Seq2SeqModel(encoder, decoder, device).to(device)
|
28 |
-
model.load_state_dict(torch.load(
|
29 |
-
model.eval()
|
30 |
-
|
31 |
-
# Preprocess the Klingon input
|
32 |
-
def preprocess_sentence(sentence, tokenizer, max_length):
|
33 |
-
tokenized_sentence = tokenizer.encode(sentence).ids
|
34 |
-
padded_sentence = tf.keras.preprocessing.sequence.pad_sequences([tokenized_sentence], maxlen=max_length, padding='post')
|
35 |
-
return torch.tensor(padded_sentence, dtype=torch.long).to(device)
|
36 |
-
|
37 |
-
# Translate Klingon to English
|
38 |
-
def translate_klingon_to_english(klingon_sentence):
|
39 |
-
print(f"Original Klingon sentence: {klingon_sentence}")
|
40 |
-
|
41 |
-
input_sentence = preprocess_sentence(klingon_sentence, klingon_tokenizer, max_length_klingon)
|
42 |
-
print(f"Tokenized and padded input sentence: {input_sentence.tolist()}")
|
43 |
-
|
44 |
-
if input_sentence.numpy().sum() == 0:
|
45 |
-
print("Warning: Input sentence is empty or out of vocabulary after tokenization.")
|
46 |
-
input_sentence = input_sentence.squeeze(0)
|
47 |
-
|
48 |
-
with torch.no_grad():
|
49 |
-
output = model(input_sentence.unsqueeze(1), input_sentence.unsqueeze(1), 0)
|
50 |
-
print(f"Model raw output (logits): {output}")
|
51 |
-
|
52 |
-
output_indices = torch.argmax(output, dim=-1).squeeze().tolist()
|
53 |
-
english_sentence = english_tokenizer.decode(output_indices, skip_special_tokens=True)
|
54 |
-
print(f"Decoded English sentence: {english_sentence}")
|
55 |
-
|
56 |
-
return english_sentence
|
57 |
-
|
58 |
-
# Gradio interface
|
59 |
-
examples = [
|
60 |
-
["nuqneH! DaHjaj SuvwI'"], # Greetings! Today is a good day (to die)
|
61 |
-
["tlhIngan Hol vIjatlh"], # I speak Klingon
|
62 |
-
["SopwI' SoH"], # You are my dinner
|
63 |
-
]
|
64 |
-
|
65 |
-
iface = gr.Interface(
|
66 |
-
fn=translate_klingon_to_english,
|
67 |
-
inputs=gr.Textbox(label="Klingon Phrase", lines=2, placeholder="Enter Klingon text here..."),
|
68 |
-
outputs=gr.Textbox(label="English Translation", lines=2),
|
69 |
-
title="Klingon to English Translation",
|
70 |
-
description="Enter text in Klingon and get its translation in English. This translator helps you understand the language of the Klingon species from the Star Trek universe.",
|
71 |
-
examples=examples,
|
72 |
-
theme="default"
|
73 |
-
)
|
74 |
-
|
75 |
-
iface.launch()
|
|
|
1 |
+
import torch
|
2 |
+
import tensorflow as tf
|
3 |
+
import gradio as gr
|
4 |
+
from tokenizers import Tokenizer
|
5 |
+
from Seq2SeqModel import Seq2SeqModel
|
6 |
+
from KTEPreprocess import preprocess
|
7 |
+
from Decoder import Decoder
|
8 |
+
from Encoder import Encoder
|
9 |
+
|
10 |
+
n_layers = 2
|
11 |
+
emb_dim = 256
|
12 |
+
hid_dim = 512
|
13 |
+
dropout = 0.5 # Reduced dropout to allow more learning (less regularization)
|
14 |
+
device = torch.device('cpu')
|
15 |
+
|
16 |
+
# Load preprocessed data and model parameters
|
17 |
+
(klingon_tokenizer, english_tokenizer, output_dim,input_dim,
|
18 |
+
klingon_train_padded, english_train_input, english_train_target,
|
19 |
+
klingon_test_padded, english_test_input, english_test_target,
|
20 |
+
max_length_klingon, max_length_english) = preprocess()
|
21 |
+
|
22 |
+
# Initialize encoder and decoder for Klingon to English
|
23 |
+
encoder = Encoder(output_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
|
24 |
+
decoder = Decoder(input_dim, emb_dim, hid_dim, n_layers, dropout).to(device)
|
25 |
+
|
26 |
+
# Initialize and load the Seq2SeqModel
|
27 |
+
model = Seq2SeqModel(encoder, decoder, device).to(device)
|
28 |
+
model.load_state_dict(torch.load('BPE_Klingon_to_English_1.pth', map_location=torch.device('cpu')))
|
29 |
+
model.eval()
|
30 |
+
|
31 |
+
# Preprocess the Klingon input
|
32 |
+
def preprocess_sentence(sentence, tokenizer, max_length):
|
33 |
+
tokenized_sentence = tokenizer.encode(sentence).ids
|
34 |
+
padded_sentence = tf.keras.preprocessing.sequence.pad_sequences([tokenized_sentence], maxlen=max_length, padding='post')
|
35 |
+
return torch.tensor(padded_sentence, dtype=torch.long).to(device)
|
36 |
+
|
37 |
+
# Translate Klingon to English
|
38 |
+
def translate_klingon_to_english(klingon_sentence):
|
39 |
+
print(f"Original Klingon sentence: {klingon_sentence}")
|
40 |
+
|
41 |
+
input_sentence = preprocess_sentence(klingon_sentence, klingon_tokenizer, max_length_klingon)
|
42 |
+
print(f"Tokenized and padded input sentence: {input_sentence.tolist()}")
|
43 |
+
|
44 |
+
if input_sentence.numpy().sum() == 0:
|
45 |
+
print("Warning: Input sentence is empty or out of vocabulary after tokenization.")
|
46 |
+
input_sentence = input_sentence.squeeze(0)
|
47 |
+
|
48 |
+
with torch.no_grad():
|
49 |
+
output = model(input_sentence.unsqueeze(1), input_sentence.unsqueeze(1), 0)
|
50 |
+
print(f"Model raw output (logits): {output}")
|
51 |
+
|
52 |
+
output_indices = torch.argmax(output, dim=-1).squeeze().tolist()
|
53 |
+
english_sentence = english_tokenizer.decode(output_indices, skip_special_tokens=True)
|
54 |
+
print(f"Decoded English sentence: {english_sentence}")
|
55 |
+
|
56 |
+
return english_sentence
|
57 |
+
|
58 |
+
# Gradio interface
|
59 |
+
examples = [
|
60 |
+
["nuqneH! DaHjaj SuvwI'"], # Greetings! Today is a good day (to die)
|
61 |
+
["tlhIngan Hol vIjatlh"], # I speak Klingon
|
62 |
+
["SopwI' SoH"], # You are my dinner
|
63 |
+
]
|
64 |
+
|
65 |
+
iface = gr.Interface(
|
66 |
+
fn=translate_klingon_to_english,
|
67 |
+
inputs=gr.Textbox(label="Klingon Phrase", lines=2, placeholder="Enter Klingon text here..."),
|
68 |
+
outputs=gr.Textbox(label="English Translation", lines=2),
|
69 |
+
title="Klingon to English Translation",
|
70 |
+
description="Enter text in Klingon and get its translation in English. This translator helps you understand the language of the Klingon species from the Star Trek universe.",
|
71 |
+
examples=examples,
|
72 |
+
theme="default"
|
73 |
+
)
|
74 |
+
|
75 |
+
iface.launch()
|