import gradio as gr import requests from transformers import pipeline import nltk from nltk import sent_tokenize from transformers import MBartForConditionalGeneration, MBart50TokenizerFast from transformers import pipeline # Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text2text-generation", model="SnypzZz/Llama2-13b-Language-translate", use_fast = False) tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang="en_XX") model = None model_loaded = False from secrets_file import api_token_header def load_model(): global model, model_loaded model = MBartForConditionalGeneration.from_pretrained("SnypzZz/Llama2-13b-Language-translate") model_loaded =True return model def translation(text,dest_lang,dest_lang_code, src_lang_code): if(dest_lang_code == src_lang_code): return "Please select different languages to translate between." headers = {"Authorization": f"Bearer {api_token_header}"} # Bengali Done if(dest_lang == "Bengali" and src_lang_code == "en_XX"): API_URL = "https://api-inference.huggingface.co/models/csebuetnlp/banglat5_nmt_en_bn" def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() output = query({ "inputs": text, }) print(output) return output[0]['translation_text'] else: global model if model: pass else: model = load_model() loaded_model = model tokenizer = MBart50TokenizerFast.from_pretrained("SnypzZz/Llama2-13b-Language-translate", src_lang=src_lang_code) #model_inputs = tokenizer(text, return_tensors="pt") loaded_model_inputs = tokenizer(text, return_tensors="pt") # translate generated_tokens = loaded_model.generate( **loaded_model_inputs, forced_bos_token_id=tokenizer.lang_code_to_id[dest_lang_code] ) output = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) print(output) return output[0] def main_translation(text,dest_lang_code,src_lang_code): codes = {"en_XX":"English","bn_IN":"Bengali", "en_GB":"English","gu_IN":"Gujarati","hi_IN":"Hindi","ta_IN":"Tamil","te_IN":"Telugu","mr_IN":"Marathi"} dest_lang = codes[dest_lang_code] src_lang = codes[src_lang_code] sentences = sent_tokenize(text) output = "" for line in sentences: output += translation(line,dest_lang,dest_lang_code, src_lang_code) return {"output":output} def test(text, src, dest): ans = main_translation(text,dest,src) return ans['output'] demo = gr.Interface( test, ["textbox", gr.Dropdown( [("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Source", info="Select the Source Language!" ), gr.Dropdown( [("English", "en_XX"), ("Hindi","hi_IN"), ("Bengali","bn_IN"), ("Gujarati","gu_IN"), ("Tamil","ta_IN"), ("Telugu","te_IN"), ("Marathi","mr_IN")], label="Destination", info="Select the Destination Language!" ), ], outputs=["textbox"], ) demo.launch()