AFischer1985
commited on
Added history
Browse files
run.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
# Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: December 30th, 2023
|
5 |
-
# Last update:
|
6 |
##############################################################################################################
|
7 |
|
8 |
|
@@ -57,14 +57,53 @@ print(collection.count())
|
|
57 |
|
58 |
# Model
|
59 |
#-------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
|
70 |
# Gradio-GUI
|
@@ -72,19 +111,30 @@ client = InferenceClient(
|
|
72 |
|
73 |
import gradio as gr
|
74 |
import json
|
|
|
75 |
|
76 |
-
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=
|
77 |
startOfString=""
|
78 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
79 |
template0=" [INST]{system}\n [/INST] </s>"
|
80 |
template1=" [INST] {message} [/INST]"
|
81 |
template2=" {response}</s>"
|
|
|
|
|
|
|
|
|
|
|
82 |
if("Gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
83 |
template0="<start_of_turn>user{system}</end_of_turn>"
|
84 |
template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
|
85 |
-
template2="{response}</end_of_turn>"
|
86 |
-
if("Mixtral-
|
87 |
startOfString="<s>"
|
|
|
|
|
|
|
|
|
|
|
88 |
template0=" [INST]{system}\n [/INST] </s>"
|
89 |
template1=" [INST] {message} [/INST]"
|
90 |
template2=" {response}</s>"
|
@@ -100,7 +150,11 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
100 |
if(("Discolm_german_7b" in modelPath) or ("SauerkrautLM-7b-HerO" in modelPath)): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
101 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
102 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
103 |
-
template2="{response}<|im_end|>\n"
|
|
|
|
|
|
|
|
|
104 |
if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
|
105 |
template0="{system} " #<s>
|
106 |
template1="USER: {message} ASSISTANT: "
|
@@ -128,8 +182,9 @@ def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=
|
|
128 |
return startOfString+prompt
|
129 |
|
130 |
|
|
|
131 |
def response(
|
132 |
-
|
133 |
):
|
134 |
temperature = float(temperature)
|
135 |
if temperature < 1e-2: temperature = 1e-2
|
@@ -144,7 +199,7 @@ def response(
|
|
144 |
)
|
145 |
addon=""
|
146 |
results=collection.query(
|
147 |
-
query_texts=[
|
148 |
n_results=2,
|
149 |
#where={"source": "google-docs"}
|
150 |
#where_document={"$contains":"search_string"}
|
@@ -157,9 +212,18 @@ def response(
|
|
157 |
print(combination)
|
158 |
if(len(results)>1):
|
159 |
addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
|
160 |
-
system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt."
|
161 |
#body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
|
162 |
-
formatted_prompt = extend_prompt(system+"\n"+prompt, None) #history)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
164 |
output = ""
|
165 |
for response in stream:
|
|
|
2 |
# Title: Gradio Interface to LLM-chatbot (for recommending AI) with RAG-funcionality and ChromaDB on HF-Hub
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: December 30th, 2023
|
5 |
+
# Last update: May 27th, 2024
|
6 |
##############################################################################################################
|
7 |
|
8 |
|
|
|
57 |
|
58 |
# Model
|
59 |
#-------
|
60 |
+
onPrem=False
|
61 |
+
myModel="mistralai/Mixtral-8x7B-Instruct-v0.1"
|
62 |
+
if(onPrem==False):
|
63 |
+
modelPath=myModel
|
64 |
+
from huggingface_hub import InferenceClient
|
65 |
+
import gradio as gr
|
66 |
+
client = InferenceClient(
|
67 |
+
model=modelPath,
|
68 |
+
#token="hf_..."
|
69 |
+
)
|
70 |
+
else:
|
71 |
+
import os
|
72 |
+
import requests
|
73 |
+
import subprocess
|
74 |
+
#modelPath="/home/af/gguf/models/c4ai-command-r-v01-Q4_0.gguf"
|
75 |
+
#modelPath="/home/af/gguf/models/Discolm_german_7b_v1.Q4_0.gguf"
|
76 |
+
modelPath="/home/af/gguf/models/Mixtral-8x7b-instruct-v0.1.Q4_0.gguf"
|
77 |
+
if(os.path.exists(modelPath)==False):
|
78 |
+
#url="https://huggingface.co/TheBloke/DiscoLM_German_7b_v1-GGUF/resolve/main/discolm_german_7b_v1.Q4_0.gguf?download=true"
|
79 |
+
url="https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_0.gguf?download=true"
|
80 |
+
response = requests.get(url)
|
81 |
+
with open("./Mixtral-8x7b-instruct.gguf", mode="wb") as file:
|
82 |
+
file.write(response.content)
|
83 |
+
print("Model downloaded")
|
84 |
+
modelPath="./Mixtral-8x7b-instruct.gguf"
|
85 |
+
print(modelPath)
|
86 |
+
n="20"
|
87 |
+
if("Mixtral-8x7b-instruct" in modelPath): n="0" # mixtral seems to cause problems here...
|
88 |
+
command = ["python3", "-m", "llama_cpp.server", "--model", modelPath, "--host", "0.0.0.0", "--port", "2600", "--n_threads", "8", "--n_gpu_layers", n]
|
89 |
+
subprocess.Popen(command)
|
90 |
+
print("Server ready!")
|
91 |
|
92 |
+
|
93 |
+
# Check template
|
94 |
+
#----------------
|
95 |
+
if(False):
|
96 |
+
from transformers import AutoTokenizer
|
97 |
+
#mod="mistralai/Mixtral-8x22B-Instruct-v0.1"
|
98 |
+
#mod="mistralai/Mixtral-8x7b-instruct-v0.1"
|
99 |
+
mod="VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct"
|
100 |
+
tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
|
101 |
+
cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
|
102 |
+
res=tok.apply_chat_template(cha)
|
103 |
+
print(tok.decode(res))
|
104 |
+
cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
|
105 |
+
res=tok.apply_chat_template(cha)
|
106 |
+
print(tok.decode(res))
|
107 |
|
108 |
|
109 |
# Gradio-GUI
|
|
|
111 |
|
112 |
import gradio as gr
|
113 |
import json
|
114 |
+
import re
|
115 |
|
116 |
+
def extend_prompt(message="", history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=True):
|
117 |
startOfString=""
|
118 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
119 |
template0=" [INST]{system}\n [/INST] </s>"
|
120 |
template1=" [INST] {message} [/INST]"
|
121 |
template2=" {response}</s>"
|
122 |
+
if("command-r" in modelPath): #https://huggingface.co/CohereForAI/c4ai-command-r-v01
|
123 |
+
## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
|
124 |
+
template0="<BOS_TOKEN><|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|> {system}<|END_OF_TURN_TOKEN|>"
|
125 |
+
template1="<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{message}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"
|
126 |
+
template2="{response}<|END_OF_TURN_TOKEN|>"
|
127 |
if("Gemma-" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
128 |
template0="<start_of_turn>user{system}</end_of_turn>"
|
129 |
template1="<start_of_turn>user{message}</end_of_turn><start_of_turn>model"
|
130 |
+
template2="{response}</end_of_turn>"
|
131 |
+
if("Mixtral-8x22B-Instruct" in modelPath): # AutoTokenizer: <s>[INST] U1[/INST] A1</s>[INST] U2[/INST] A2</s>
|
132 |
startOfString="<s>"
|
133 |
+
template0="[INST]{system}\n [/INST] </s>"
|
134 |
+
template1="[INST] {message}[/INST]"
|
135 |
+
template2=" {response}</s>"
|
136 |
+
if("Mixtral-8x7b-instruct" in modelPath): # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1
|
137 |
+
startOfString="<s>" # AutoTokenzizer: <s> [INST] U1 [/INST]A1</s> [INST] U2 [/INST]A2</s>
|
138 |
template0=" [INST]{system}\n [/INST] </s>"
|
139 |
template1=" [INST] {message} [/INST]"
|
140 |
template2=" {response}</s>"
|
|
|
150 |
if(("Discolm_german_7b" in modelPath) or ("SauerkrautLM-7b-HerO" in modelPath)): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
151 |
template0="<|im_start|>system\n{system}<|im_end|>\n"
|
152 |
template1="<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
153 |
+
template2="{response}<|im_end|>\n"
|
154 |
+
if("Llama-3-SauerkrautLM-8b-Instruct" in modelPath): #https://huggingface.co/VAGOsolutions/SauerkrautLM-7b-HerO
|
155 |
+
template0="<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system}<|eot_id|>"
|
156 |
+
template1="<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
157 |
+
template2="{response}<|eot_id|>\n"
|
158 |
if("WizardLM-13B-V1.2" in modelPath): #https://huggingface.co/WizardLM/WizardLM-13B-V1.2
|
159 |
template0="{system} " #<s>
|
160 |
template1="USER: {message} ASSISTANT: "
|
|
|
182 |
return startOfString+prompt
|
183 |
|
184 |
|
185 |
+
|
186 |
def response(
|
187 |
+
message, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
|
188 |
):
|
189 |
temperature = float(temperature)
|
190 |
if temperature < 1e-2: temperature = 1e-2
|
|
|
199 |
)
|
200 |
addon=""
|
201 |
results=collection.query(
|
202 |
+
query_texts=[message],
|
203 |
n_results=2,
|
204 |
#where={"source": "google-docs"}
|
205 |
#where_document={"$contains":"search_string"}
|
|
|
212 |
print(combination)
|
213 |
if(len(results)>1):
|
214 |
addon=" Bitte berücksichtige bei deiner Antwort ggf. folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
|
215 |
+
system="Du bist ein deutschsprachiges KI-basiertes Assistenzsystem, das zu jedem Anliegen möglichst geeignete KI-Tools empfiehlt." #+addon #+"\n\nUser-Anliegen:"
|
216 |
#body={"prompt":system+"### Instruktion:\n"+message+"\n\n### Antwort:","max_tokens":500, "echo":"False","stream":"True"} #e.g. SauerkrautLM
|
217 |
+
#formatted_prompt = extend_prompt(system+"\n"+prompt, None) #history)
|
218 |
+
prompt=extend_prompt(
|
219 |
+
message, # current message of the user
|
220 |
+
history, # complete history
|
221 |
+
system, # system prompt
|
222 |
+
addon, # RAG-component added to the system prompt
|
223 |
+
None, # fictive first words of the AI (neither displayed nor stored)
|
224 |
+
historylimit=4, # number of past messages to consider for response to current message
|
225 |
+
removeHTML=True # remove HTML-components from History (to prevent bugs with Markdown)
|
226 |
+
)
|
227 |
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
228 |
output = ""
|
229 |
for response in stream:
|