p
commited on
Commit
·
c069c11
1
Parent(s):
4dffb82
max_new_tokens
Browse files
app.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
-
# Based on example code of https://huggingface.co/facebook/
|
2 |
|
3 |
|
4 |
import gradio as gr
|
5 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
6 |
|
7 |
-
model = M2M100ForConditionalGeneration.from_pretrained(
|
|
|
8 |
|
9 |
-
tokenizer = M2M100Tokenizer.from_pretrained("facebook/
|
10 |
|
11 |
|
12 |
this_description = '''
|
13 |
-
Using facebook/m2m100-12B-avg-5-ckpt pre-trained model.
|
14 |
|
15 |
Chinese(zh)
|
16 |
English(en)
|
@@ -20,6 +21,7 @@ Sinhalese(si)
|
|
20 |
Thai(th)
|
21 |
Vietnamese(vi)
|
22 |
...
|
|
|
23 |
'''
|
24 |
|
25 |
# From facebook/m2m100-12B-avg-5-ckpt
|
@@ -129,7 +131,6 @@ lang_codes = {
|
|
129 |
|
130 |
def m2m_translate(Input_Text, from_lang, to_lang):
|
131 |
tokenizer.src_lang = lang_codes[from_lang]
|
132 |
-
|
133 |
encoded_from_lang = tokenizer(Input_Text, return_tensors="pt")
|
134 |
|
135 |
generated_tokens = model.generate(
|
@@ -143,21 +144,6 @@ def m2m_translate(Input_Text, from_lang, to_lang):
|
|
143 |
return res[0]
|
144 |
|
145 |
|
146 |
-
def m2m_translate2(Input_Text, from_lang, to_lang):
|
147 |
-
tokenizer.src_lang = lang_codes[from_lang]
|
148 |
-
|
149 |
-
encoded_from_lang = tokenizer(Input_Text, return_tensors="pt")
|
150 |
-
|
151 |
-
generated_tokens = model.generate(
|
152 |
-
**encoded_from_lang, forced_bos_token_id=tokenizer.get_lang_id(lang_codes[to_lang]))
|
153 |
-
|
154 |
-
res = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
|
155 |
-
|
156 |
-
return res[0]
|
157 |
-
# if res:
|
158 |
-
# return '\n'.join(res)
|
159 |
-
|
160 |
-
|
161 |
iface = gr.Interface(
|
162 |
fn=m2m_translate,
|
163 |
|
|
|
1 |
+
# Based on example code of https://huggingface.co/facebook/m2m100-12B-avg-5-ckpt
|
2 |
|
3 |
|
4 |
import gradio as gr
|
5 |
from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
|
6 |
|
7 |
+
model = M2M100ForConditionalGeneration.from_pretrained(
|
8 |
+
"facebook/m2m100-12B-avg-5-ckpt")
|
9 |
|
10 |
+
tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100-12B-avg-5-ckpt")
|
11 |
|
12 |
|
13 |
this_description = '''
|
14 |
+
Using facebook/m2m100-12B-avg-5-ckpt pre-trained model.
|
15 |
|
16 |
Chinese(zh)
|
17 |
English(en)
|
|
|
21 |
Thai(th)
|
22 |
Vietnamese(vi)
|
23 |
...
|
24 |
+
|
25 |
'''
|
26 |
|
27 |
# From facebook/m2m100-12B-avg-5-ckpt
|
|
|
131 |
|
132 |
def m2m_translate(Input_Text, from_lang, to_lang):
|
133 |
tokenizer.src_lang = lang_codes[from_lang]
|
|
|
134 |
encoded_from_lang = tokenizer(Input_Text, return_tensors="pt")
|
135 |
|
136 |
generated_tokens = model.generate(
|
|
|
144 |
return res[0]
|
145 |
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
iface = gr.Interface(
|
148 |
fn=m2m_translate,
|
149 |
|