musfiqdehan
commited on
Commit
·
03e97cd
1
Parent(s):
6b83453
Refactor alignment and translation process
Browse files
app.py
CHANGED
@@ -2,25 +2,33 @@ import gradio as gr
|
|
2 |
from gradio_rich_textbox import RichTextbox
|
3 |
|
4 |
from helper.text_preprocess import space_punc
|
5 |
-
from helper.alignment_mappers import select_model
|
6 |
-
from helper.
|
7 |
-
from helper.translators import select_translator
|
8 |
|
9 |
|
10 |
-
def
|
11 |
"""
|
12 |
Bangla PoS Tagger
|
13 |
"""
|
|
|
|
|
|
|
14 |
|
15 |
src = space_punc(src)
|
16 |
|
17 |
-
|
|
|
|
|
18 |
|
19 |
model_name = select_model(model_name)
|
20 |
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
-
return
|
24 |
|
25 |
|
26 |
with gr.Blocks(css="styles.css") as demo:
|
@@ -31,20 +39,121 @@ with gr.Blocks(css="styles.css") as demo:
|
|
31 |
with gr.Column():
|
32 |
inputs = [
|
33 |
gr.Textbox(
|
34 |
-
label="Enter
|
35 |
-
placeholder="বাংলা বাক্য লিখুন"
|
36 |
),
|
37 |
gr.Dropdown(
|
38 |
-
choices=
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
),
|
41 |
gr.Dropdown(
|
42 |
choices=["Google-mBERT (Base-Multilingual)", "Neulab-AwesomeAlign (Bn-En-0.5M)", "BUET-BanglaBERT (Large)", "SagorSarker-BanglaBERT (Base)", "SentenceTransformers-LaBSE (Multilingual)"],
|
43 |
label="Select a Model"
|
44 |
-
),
|
45 |
-
gr.Dropdown(
|
46 |
-
choices=["spaCy", "NLTK", "Flair", "TextBlob"],
|
47 |
-
label="Select a PoS Tagger"
|
48 |
)
|
49 |
]
|
50 |
|
@@ -54,54 +163,27 @@ with gr.Blocks(css="styles.css") as demo:
|
|
54 |
with gr.Column():
|
55 |
outputs = [
|
56 |
gr.Textbox(label="English Translation"),
|
57 |
-
RichTextbox(label="
|
58 |
-
gr.Textbox(label="
|
59 |
]
|
60 |
|
61 |
-
btn.click(
|
62 |
|
63 |
gr.Examples([
|
64 |
[
|
65 |
"বাংলাদেশ দক্ষিণ এশিয়ার একটি সার্বভৌম রাষ্ট্র।",
|
66 |
-
"
|
67 |
-
"
|
68 |
-
"NLTK"
|
69 |
],
|
70 |
[
|
71 |
"বাংলাদেশের সংবিধানিক নাম কি?",
|
72 |
-
"
|
73 |
"Google-mBERT (Base-Multilingual)",
|
74 |
-
"spaCy"
|
75 |
],
|
76 |
[
|
77 |
"বাংলাদেশের সাংবিধানিক নাম গণপ্রজাতন্ত্রী বাংলাদেশ।",
|
78 |
-
"
|
79 |
"Google-mBERT (Base-Multilingual)",
|
80 |
-
"TextBlob"
|
81 |
-
],
|
82 |
-
[
|
83 |
-
"তিনজনের কেউই বাবার পথ ধরে প্রযুক্তি দুনিয়ায় হাঁটেননি।",
|
84 |
-
"Google",
|
85 |
-
"Neulab-AwesomeAlign (Bn-En-0.5M)",
|
86 |
-
"spaCy"
|
87 |
-
],
|
88 |
-
[
|
89 |
-
"তিনজনের কেউই বাবার পথ ধরে প্রযুক্তি দুনিয়ায় হাঁটেননি।",
|
90 |
-
"BanglaNMT",
|
91 |
-
"Google-mBERT (Base-Multilingual)",
|
92 |
-
"spaCy"
|
93 |
-
],
|
94 |
-
[
|
95 |
-
"তিনজনের কেউই বাবার পথ ধরে প্রযুক্তি দুনিয়ায় হাঁটেননি।",
|
96 |
-
"MyMemory",
|
97 |
-
"Google-mBERT (Base-Multilingual)",
|
98 |
-
"spaCy"
|
99 |
-
],
|
100 |
-
[
|
101 |
-
"বিশ্বের আরও একটি সেরা ক্লাব।",
|
102 |
-
"Google",
|
103 |
-
"Neulab-AwesomeAlign (Bn-En-0.5M)",
|
104 |
-
"Flair"
|
105 |
]
|
106 |
|
107 |
], inputs)
|
|
|
2 |
from gradio_rich_textbox import RichTextbox
|
3 |
|
4 |
from helper.text_preprocess import space_punc
|
5 |
+
from helper.alignment_mappers import select_model, get_alignments_table
|
6 |
+
from helper.translators import select_target_lang_code, google_translation
|
|
|
7 |
|
8 |
|
9 |
+
def process_alignments(src, language_name, model_name):
|
10 |
"""
|
11 |
Bangla PoS Tagger
|
12 |
"""
|
13 |
+
|
14 |
+
tgt = None
|
15 |
+
html_table = None
|
16 |
|
17 |
src = space_punc(src)
|
18 |
|
19 |
+
tgt = select_target_lang_code(language_name)
|
20 |
+
|
21 |
+
tgt = google_translation(src, tgt)
|
22 |
|
23 |
model_name = select_model(model_name)
|
24 |
|
25 |
+
html_table, alignment_accuracy = get_alignments_table(
|
26 |
+
source=src,
|
27 |
+
target=tgt,
|
28 |
+
model_name=model_name
|
29 |
+
)
|
30 |
|
31 |
+
return tgt, html_table, alignment_accuracy
|
32 |
|
33 |
|
34 |
with gr.Blocks(css="styles.css") as demo:
|
|
|
39 |
with gr.Column():
|
40 |
inputs = [
|
41 |
gr.Textbox(
|
42 |
+
label="Enter a Sentence (Auto Detect Language)",
|
|
|
43 |
),
|
44 |
gr.Dropdown(
|
45 |
+
choices=
|
46 |
+
[
|
47 |
+
"Afrikaans",
|
48 |
+
"Albanian",
|
49 |
+
"Arabic",
|
50 |
+
"Aragonese",
|
51 |
+
"Armenian",
|
52 |
+
"Asturian",
|
53 |
+
"Azerbaijani",
|
54 |
+
"Bashkir",
|
55 |
+
"Basque",
|
56 |
+
"Bavarian",
|
57 |
+
"Belarusian",
|
58 |
+
"Bengali",
|
59 |
+
"Bishnupriya Manipuri",
|
60 |
+
"Bosnian",
|
61 |
+
"Breton",
|
62 |
+
"Bulgarian",
|
63 |
+
"Burmese",
|
64 |
+
"Catalan",
|
65 |
+
"Cebuano",
|
66 |
+
"Chechen",
|
67 |
+
"Chinese (Simplified)",
|
68 |
+
"Chinese (Traditional)",
|
69 |
+
"Chuvash",
|
70 |
+
"Croatian",
|
71 |
+
"Czech",
|
72 |
+
"Danish",
|
73 |
+
"Dutch",
|
74 |
+
"English",
|
75 |
+
"Estonian",
|
76 |
+
"Finnish",
|
77 |
+
"French",
|
78 |
+
"Galician",
|
79 |
+
"Georgian",
|
80 |
+
"German",
|
81 |
+
"Greek",
|
82 |
+
"Gujarati",
|
83 |
+
"Haitian",
|
84 |
+
"Hebrew",
|
85 |
+
"Hindi",
|
86 |
+
"Hungarian",
|
87 |
+
"Icelandic",
|
88 |
+
"Ido",
|
89 |
+
"Indonesian",
|
90 |
+
"Irish",
|
91 |
+
"Italian",
|
92 |
+
"Japanese",
|
93 |
+
"Javanese",
|
94 |
+
"Kannada",
|
95 |
+
"Kazakh",
|
96 |
+
"Kirghiz",
|
97 |
+
"Korean",
|
98 |
+
"Latin",
|
99 |
+
"Latvian",
|
100 |
+
"Lithuanian",
|
101 |
+
"Lombard",
|
102 |
+
"Low Saxon",
|
103 |
+
"Luxembourgish",
|
104 |
+
"Macedonian",
|
105 |
+
"Malagasy",
|
106 |
+
"Malay",
|
107 |
+
"Malayalam",
|
108 |
+
"Marathi",
|
109 |
+
"Minangkabau",
|
110 |
+
"Nepali",
|
111 |
+
"Newar",
|
112 |
+
"Norwegian (Bokmal)",
|
113 |
+
"Norwegian (Nynorsk)",
|
114 |
+
"Occitan",
|
115 |
+
"Persian (Farsi)",
|
116 |
+
"Piedmontese",
|
117 |
+
"Polish",
|
118 |
+
"Portuguese",
|
119 |
+
"Punjabi",
|
120 |
+
"Romanian",
|
121 |
+
"Russian",
|
122 |
+
"Scots",
|
123 |
+
"Serbian",
|
124 |
+
"Serbo-Croatian",
|
125 |
+
"Sicilian",
|
126 |
+
"Slovak",
|
127 |
+
"Slovenian",
|
128 |
+
"South Azerbaijani",
|
129 |
+
"Spanish",
|
130 |
+
"Sundanese",
|
131 |
+
"Swahili",
|
132 |
+
"Swedish",
|
133 |
+
"Tagalog",
|
134 |
+
"Tajik",
|
135 |
+
"Tamil",
|
136 |
+
"Tatar",
|
137 |
+
"Telugu",
|
138 |
+
"Turkish",
|
139 |
+
"Ukrainian",
|
140 |
+
"Urdu",
|
141 |
+
"Uzbek",
|
142 |
+
"Vietnamese",
|
143 |
+
"Volapük",
|
144 |
+
"Waray-Waray",
|
145 |
+
"Welsh",
|
146 |
+
"West Frisian",
|
147 |
+
"Western Punjabi",
|
148 |
+
"Yoruba",
|
149 |
+
"Thai",
|
150 |
+
"Mongolian"
|
151 |
+
],
|
152 |
+
label="Select Target Language"
|
153 |
),
|
154 |
gr.Dropdown(
|
155 |
choices=["Google-mBERT (Base-Multilingual)", "Neulab-AwesomeAlign (Bn-En-0.5M)", "BUET-BanglaBERT (Large)", "SagorSarker-BanglaBERT (Base)", "SentenceTransformers-LaBSE (Multilingual)"],
|
156 |
label="Select a Model"
|
|
|
|
|
|
|
|
|
157 |
)
|
158 |
]
|
159 |
|
|
|
163 |
with gr.Column():
|
164 |
outputs = [
|
165 |
gr.Textbox(label="English Translation"),
|
166 |
+
RichTextbox(label="Alignments Mapping (Source to Target)"),
|
167 |
+
gr.Textbox(label="Alignment Accuracy (Based on Unknown(UNK) Tags)")
|
168 |
]
|
169 |
|
170 |
+
btn.click(process_alignments, inputs, outputs)
|
171 |
|
172 |
gr.Examples([
|
173 |
[
|
174 |
"বাংলাদেশ দক্ষিণ এশিয়ার একটি সার্বভৌম রাষ্ট্র।",
|
175 |
+
"English",
|
176 |
+
"SentenceTransformers-LaBSE (Multilingual)",
|
|
|
177 |
],
|
178 |
[
|
179 |
"বাংলাদেশের সংবিধানিক নাম কি?",
|
180 |
+
"English",
|
181 |
"Google-mBERT (Base-Multilingual)",
|
|
|
182 |
],
|
183 |
[
|
184 |
"বাংলাদেশের সাংবিধানিক নাম গণপ্রজাতন্ত্রী বাংলাদেশ।",
|
185 |
+
"Hindi",
|
186 |
"Google-mBERT (Base-Multilingual)",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
]
|
188 |
|
189 |
], inputs)
|