phi-4

Running on Zero

eswardivi commited on Apr 23, 2024

Commit

51153f0

verified ·

1 Parent(s): 4828909

updated with Flashattention

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ token = os.environ["HF_TOKEN"]
 model = AutoModelForCausalLM.from_pretrained(
     "microsoft/Phi-3-mini-128k-instruct", token=token,trust_remote_code=True
 )
 tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)

 model = AutoModelForCausalLM.from_pretrained(
+    use_cache=False,attn_implementation="flash_attention_2",
     "microsoft/Phi-3-mini-128k-instruct", token=token,trust_remote_code=True
 )
 tok = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct", token=token)