Caleb Fahlgren commited on
Commit
e735a4c
·
1 Parent(s): e8c1c43

use correct llama

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +9 -5
  3. requirements.txt +2 -1
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 📈
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: gradio
7
- sdk_version: 4.32.2
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: indigo
5
  colorTo: green
6
  sdk: gradio
7
+ sdk_version: 4.31.3
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
2
  from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
 
3
  from huggingface_hub import hf_hub_download
4
  from huggingface_hub import HfApi
5
  import matplotlib.pyplot as plt
@@ -8,7 +9,6 @@ import pandas as pd
8
  import gradio as gr
9
  import duckdb
10
  import requests
11
- import llama_cpp
12
  import instructor
13
  import spaces
14
  import enum
@@ -22,11 +22,11 @@ view_name = "dataset_view"
22
  hf_api = HfApi()
23
  conn = duckdb.connect()
24
 
25
- gpu_layers = int(os.environ.get("GPU_LAYERS", 81))
26
  draft_pred_tokens = int(os.environ.get("DRAFT_PRED_TOKENS", 2))
27
 
28
  repo_id = os.getenv("MODEL_REPO_ID", "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF")
29
- model_file_name = os.getenv("MODEL_FILE_NAME", "Hermes-2-Pro-Llama-3-8B-Q8_0.gguf")
30
 
31
  hf_hub_download(
32
  repo_id=repo_id,
@@ -88,7 +88,7 @@ CREATE TABLE {} (
88
 
89
  @spaces.GPU(duration=120)
90
  def generate_query(ddl: str, query: str) -> dict:
91
- llama = llama_cpp.Llama(
92
  model_path=f"models/{model_file_name}",
93
  n_gpu_layers=gpu_layers,
94
  chat_format="chatml",
@@ -200,4 +200,8 @@ with gr.Blocks() as demo:
200
 
201
 
202
  if __name__ == "__main__":
203
- demo.launch()
 
 
 
 
 
1
  from gradio_huggingfacehub_search import HuggingfaceHubSearch
2
  from llama_cpp.llama_speculative import LlamaPromptLookupDecoding
3
+ from llama_cpp_cuda_tensorcores import Llama
4
  from huggingface_hub import hf_hub_download
5
  from huggingface_hub import HfApi
6
  import matplotlib.pyplot as plt
 
9
  import gradio as gr
10
  import duckdb
11
  import requests
 
12
  import instructor
13
  import spaces
14
  import enum
 
22
  hf_api = HfApi()
23
  conn = duckdb.connect()
24
 
25
+ gpu_layers = int(os.environ.get("GPU_LAYERS", 0))
26
  draft_pred_tokens = int(os.environ.get("DRAFT_PRED_TOKENS", 2))
27
 
28
  repo_id = os.getenv("MODEL_REPO_ID", "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF")
29
+ model_file_name = os.getenv("MODEL_FILE_NAME", "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf")
30
 
31
  hf_hub_download(
32
  repo_id=repo_id,
 
88
 
89
  @spaces.GPU(duration=120)
90
  def generate_query(ddl: str, query: str) -> dict:
91
+ llama = Llama(
92
  model_path=f"models/{model_file_name}",
93
  n_gpu_layers=gpu_layers,
94
  chat_format="chatml",
 
200
 
201
 
202
  if __name__ == "__main__":
203
+ demo.launch(
204
+ show_error=True,
205
+ quiet=False,
206
+ debug=True,
207
+ )
requirements.txt CHANGED
@@ -40,7 +40,6 @@ Jinja2==3.1.4
40
  jsonschema==4.22.0
41
  jsonschema-specifications==2023.12.1
42
  kiwisolver==1.4.5
43
- llama_cpp_python==0.2.77
44
  markdown-it-py==3.0.0
45
  MarkupSafe==2.1.5
46
  matplotlib==3.9.0
@@ -92,3 +91,5 @@ uvloop==0.19.0
92
  watchfiles==0.22.0
93
  websockets==11.0.3
94
  yarl==1.9.4
 
 
 
40
  jsonschema==4.22.0
41
  jsonschema-specifications==2023.12.1
42
  kiwisolver==1.4.5
 
43
  markdown-it-py==3.0.0
44
  MarkupSafe==2.1.5
45
  matplotlib==3.9.0
 
91
  watchfiles==0.22.0
92
  websockets==11.0.3
93
  yarl==1.9.4
94
+ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.69+cpuavx2-cp310-cp310-linux_x86_64.whl
95
+ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.69+cu121-cp310-cp310-linux_x86_64.whl