Spaces:

FM-1976
/

Gemma2-2B-Instruct-ST

Sleeping

FM-1976 commited on Aug 14, 2024

Commit

112cd69

verified ·

1 Parent(s): 2762745

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import random
 import string
 from time import sleep
 import tiktoken
 # for counting the tokens in the prompt and in the result
 #context_count = len(encoding.encode(yourtext))
@@ -30,6 +31,15 @@ if "messages" not in st.session_state:
 if "repeat" not in st.session_state:
     st.session_state.repeat = 1.35
 if "temperature" not in st.session_state:
     st.session_state.temperature = 0.1
@@ -64,9 +74,8 @@ def genRANstring(n):
 def create_chat():
 # Set HF API token  and HF repo
     from llama_cpp import Llama
-    client = Llama.from_pretrained(
-                repo_id="bartowski/gemma-2-2b-it-GGUF",
-                filename="gemma-2-2b-it-Q5_K_M.gguf",
                 temperature=0.24,
                 n_ctx=nCTX,
                 max_tokens=600,

 import string
 from time import sleep
 import tiktoken
+from huggingface_hub import hf_hub_download
 # for counting the tokens in the prompt and in the result
 #context_count = len(encoding.encode(yourtext))
 if "repeat" not in st.session_state:
     st.session_state.repeat = 1.35
+if "modelfile" not in st.session_state:
+    modelfile = hf_hub_download(
+        repo_id=os.environ.get("REPO_ID", "bartowski/gemma-2-2b-it-GGUF"),
+        filename=os.environ.get("MODEL_FILE", "gemma-2-2b-it-Q5_K_M.gguf"),
+    )
+    st.session_state.modelfile = modelfile
 if "temperature" not in st.session_state:
     st.session_state.temperature = 0.1
 def create_chat():
 # Set HF API token  and HF repo
     from llama_cpp import Llama
+    client = Llama(
+                model_path=st.session_state.modelfile,
                 temperature=0.24,
                 n_ctx=nCTX,
                 max_tokens=600,