Spaces:

MohamedRashad
/

Arabic-Chatbot-Arena

Running on Zero

App Files Files Community

MohamedRashad commited on Aug 7, 2024

Commit

00ec273

1 Parent(s): 5925564

Refactor model ID handling in app.py

Browse files

Files changed (1) hide show

app.py +33 -33

app.py CHANGED Viewed

@@ -20,47 +20,47 @@ def load_model_a(model_id):
     global tokenizer_a, model_a
     tokenizer_a = AutoTokenizer.from_pretrained(model_id)
     print(f"model A: {tokenizer_a.eos_token}")
-    try:
-        model_a = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            attn_implementation="flash_attention_2",
-            trust_remote_code=True,
-        ).eval()
-    except:
-        print(f"Using default attention implementation in {model_id}")
-        model_a = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            trust_remote_code=True,
-        ).eval()
     return gr.update(label=model_id)
 def load_model_b(model_id):
     global tokenizer_b, model_b
     tokenizer_b = AutoTokenizer.from_pretrained(model_id)
     print(f"model B: {tokenizer_b.eos_token}")
-    try:
-        model_b = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            attn_implementation="flash_attention_2",
-            trust_remote_code=True,
-        ).eval()
-    except:
-        print(f"Using default attention implementation in {model_id}")
-        model_b = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.bfloat16,
-            device_map="auto",
-            trust_remote_code=True,
-        ).eval()
     return gr.update(label=model_id)
-@spaces.GPU(duration=120)
 def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
     text_streamer_a = TextIteratorStreamer(tokenizer_a, skip_prompt=True)
     text_streamer_b = TextIteratorStreamer(tokenizer_b, skip_prompt=True)

     global tokenizer_a, model_a
     tokenizer_a = AutoTokenizer.from_pretrained(model_id)
     print(f"model A: {tokenizer_a.eos_token}")
+    model_a = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        attn_implementation="flash_attention_2",
+        trust_remote_code=True,
+    ).eval()
+    # try:
+    # except:
+    #     print(f"Using default attention implementation in {model_id}")
+    #     model_a = AutoModelForCausalLM.from_pretrained(
+    #         model_id,
+    #         torch_dtype=torch.bfloat16,
+    #         device_map="auto",
+    #         trust_remote_code=True,
+    #     ).eval()
     return gr.update(label=model_id)
 def load_model_b(model_id):
     global tokenizer_b, model_b
     tokenizer_b = AutoTokenizer.from_pretrained(model_id)
     print(f"model B: {tokenizer_b.eos_token}")
+    model_b = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        attn_implementation="flash_attention_2",
+        trust_remote_code=True,
+    ).eval()
+    # try:
+    # except:
+    #     print(f"Using default attention implementation in {model_id}")
+    #     model_b = AutoModelForCausalLM.from_pretrained(
+    #         model_id,
+    #         torch_dtype=torch.bfloat16,
+    #         device_map="auto",
+    #         trust_remote_code=True,
+    #     ).eval()
     return gr.update(label=model_id)
+@spaces.GPU()
 def generate_both(system_prompt, input_text, chatbot_a, chatbot_b, max_new_tokens=2048, temperature=0.2, top_p=0.9, repetition_penalty=1.1):
     text_streamer_a = TextIteratorStreamer(tokenizer_a, skip_prompt=True)
     text_streamer_b = TextIteratorStreamer(tokenizer_b, skip_prompt=True)