|
import gradio as gr |
|
import requests |
|
from transformers import pipeline |
|
import nltk |
|
from nltk import sent_tokenize |
|
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast |
|
from transformers import pipeline |
|
|
|
|
|
from transformers import pipeline |
|
|
|
pipe = pipeline("text2text-generation", model="SnypzZz/Llama2-13b-Language-translate", use_fast = False) |
|
|
|
tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX") |
|
model = None |
|
model_loaded = False |
|
|
|
from secrets_file import api_token_header |
|
|
|
|
|
def load_model(): |
|
global model, model_loaded |
|
model = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate") |
|
model_loaded =True |
|
return model |
|
|
|
def translation(text,dest_lang,dest_lang_code, src_lang_code): |
|
|
|
if(dest_lang_code == src_lang_code): |
|
return "Please select different languages to translate between." |
|
|
|
headers = {"Authorization": f"Bearer {api_token_header}"} |
|
|
|
|
|
if(dest_lang == "Bengali" and src_lang_code == "en_XX"): |
|
API_URL = "/static-proxy?url=https%3A%2F%2Fapi-inference.huggingface.co%2Fmodels%2Fcsebuetnlp%2Fbanglat5_nmt_en_bn%26quot%3B%3C%2Fspan%3E%3C!-- HTML_TAG_END --> |
|
def query(payload): |
|
response = requests.post(API_URL, headers=headers, json=payload) |
|
return response.json() |
|
output = query({ |
|
"inputs": text, |
|
}) |
|
print(output) |
|
return output[0]['translation_text'] |
|
else: |
|
global model |
|
if model: |
|
pass |
|
else: |
|
model = load_model() |
|
loaded_model = model |
|
tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang=src_lang_code) |
|
|
|
loaded_model_inputs = tokenizer(text, return_tensors="pt") |
|
|
|
|
|
generated_tokens = loaded_model.generate( |
|
**loaded_model_inputs, |
|
forced_bos_token_id=tokenizer.lang_code_to_id[dest_lang_code] |
|
) |
|
output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) |
|
print(output) |
|
return output[0] |
|
|
|
|
|
def main_translation(text,dest_lang_code,src_lang_code): |
|
|
|
codes = {"en_XX":"English","bn_IN":"Bengali", "en_GB":"English","gu_IN":"Gujarati","hi_IN":"Hindi","ta_IN":"Tamil","te_IN":"Telugu","mr_IN":"Marathi"} |
|
dest_lang = codes[dest_lang_code] |
|
src_lang = codes[src_lang_code] |
|
|
|
sentences = sent_tokenize(text) |
|
output = "" |
|
for line in sentences: |
|
output += translation(line,dest_lang,dest_lang_code, src_lang_code) |
|
return {"output":output} |
|
|
|
|
|
def test(text, src, dest): |
|
ans = main_translation(text,dest,src) |
|
return ans['output'] |
|
demo = gr.Interface( |
|
test, |
|
["textbox", |
|
gr.Dropdown( |
|
[("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Source", info="Select the Source Language!" |
|
), |
|
gr.Dropdown( |
|
[("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Destination", info="Select the Destination Language!" |
|
), |
|
], |
|
outputs=["textbox"], |
|
) |
|
|
|
demo.launch() |