import os import torch import gradio as gr import time from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline from flores200_codes import flores_codes def load_models(): # build model and tokenizer model_name_dict = { #'nllb-distilled-600M': 'facebook/nllb-200-distilled-600M', #'nllb-1.3B': 'facebook/nllb-200-1.3B', 'nllb-distilled-1.3B': 'facebook/nllb-200-distilled-1.3B', #'nllb-3.3B': 'facebook/nllb-200-3.3B', } model_dict = {} for call_name, real_name in model_name_dict.items(): print('\tLoading model: %s' % call_name) model = AutoModelForSeq2SeqLM.from_pretrained(real_name) tokenizer = AutoTokenizer.from_pretrained(real_name) model_dict[call_name+'_model'] = model model_dict[call_name+'_tokenizer'] = tokenizer return model_dict def translation(source, target, text): if len(model_dict) == 2: model_name = 'nllb-distilled-1.3B' start_time = time.time() source = flores_codes[source] target = flores_codes[target] model = model_dict[model_name + '_model'] tokenizer = model_dict[model_name + '_tokenizer'] translator = pipeline('translation', model=model, tokenizer=tokenizer, src_lang=source, tgt_lang=target) output = translator(text, max_length=400) end_time = time.time() full_output = output output = output[0]['translation_text'] # result = {'inference_time': end_time - start_time, # 'source': source, # 'target': target, # 'result': output, # 'full_output': full_output} result = output; return result if __name__ == '__main__': print('\tinit models') global model_dict model_dict = load_models() # define gradio demo lang_codes = list(flores_codes.keys()) #inputs = [gr.inputs.Radio(['nllb-distilled-600M', 'nllb-1.3B', 'nllb-distilled-1.3B'], label='NLLB Model'), inputs = [gr.Dropdown(label='Source | འདི་ནས།', choices=lang_codes, value='English'), gr.Dropdown( label='Target | འདི་ལ།', choices=lang_codes, value='Standard Tibetan'), gr.Textbox(lines=5, label="Input text | ཡིག་གེ་འབྲི་ཡུལ།"), ] # outputs = gr.outputs.JSON() outputs = gr.Textbox(lines=5, label="Translated Result | རྒྱུར་ཟིན་པ།") title = "Dhumra AI Translator" demo_status = "[སྐད་ཡིག་གཅིག་ཀྱང་མ་ལུས་པ།]" description = f"Details: https://github.com/facebookresearch/fairseq/tree/nllb. {demo_status}" examples = [ [ 'English', 'Standard Tibetan', 'What is this?'] ] gr.Interface(translation, inputs, outputs, title=title, description=description, examples=examples, examples_per_page=50, ).launch()