Qwen-2-llamacpp

Sleeping

TobDeBer commited on Aug 15, 2024

Commit

ac3b484

1 Parent(s): 3488d23

use CPU version of llama-cpp-python

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
-import spaces
 import os
 import json
 import subprocess
@@ -31,7 +33,6 @@ hf_hub_download(
 llm = None
 llm_model = None
-@spaces.GPU(duration=120)
 def respond(
     message,
     history: list[tuple[str, str]],

+# conda activate audio
+# pip install llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
+import llama_cpp
 import os
 import json
 import subprocess
 llm = None
 llm_model = None
 def respond(
     message,
     history: list[tuple[str, str]],

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 spaces
 huggingface_hub
 scikit-build-core
-https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.81-cu124/llama_cpp_python-0.2.81-cp310-cp310-linux_x86_64.whl
 llama-cpp-agent>=0.2.25

 spaces
 huggingface_hub
 scikit-build-core
+llama-cpp-python --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 llama-cpp-agent>=0.2.25