Tonic commited on
Commit
66854bf
·
1 Parent(s): a12fc17

improve retrieval and application logic

Browse files
Files changed (2) hide show
  1. app.py +71 -68
  2. globalvars.py +4 -0
app.py CHANGED
@@ -14,7 +14,7 @@ import gradio as gr
14
  from huggingface_hub import InferenceClient
15
  import openai
16
  from openai import OpenAI
17
- from globalvars import API_BASE, intention_prompt, tasks
18
  from dotenv import load_dotenv
19
  import re
20
  from utils import load_env_variables
@@ -30,26 +30,17 @@ os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
30
  os.environ['CUDA_CACHE_DISABLE'] = '1'
31
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
 
33
- ### Utils
34
-
35
  hf_token, yi_token = load_env_variables()
36
 
37
  def clear_cuda_cache():
38
  torch.cuda.empty_cache()
39
 
40
-
41
- ## 01ai Yi-large Clience
42
-
43
  client = OpenAI(
44
  api_key=yi_token,
45
  base_url=API_BASE
46
  )
47
 
48
 
49
- ## use instruct embeddings
50
-
51
- # Load the tokenizer and model
52
-
53
  class EmbeddingGenerator:
54
  def __init__(self, model_name: str, token: str, intention_client):
55
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -108,7 +99,6 @@ class MyEmbeddingFunction(EmbeddingFunction):
108
  embeddings = [item for sublist in embeddings for item in sublist]
109
  return embeddings
110
 
111
- ## add chroma vector store
112
  class DocumentLoader:
113
  def __init__(self, file_path: str, mode: str = "elements"):
114
  self.file_path = file_path
@@ -136,60 +126,73 @@ class ChromaManager:
136
  return result_docs
137
 
138
 
139
-
140
- # print(completion)
141
-
142
- def respond(
143
- message,
144
- history: list[tuple[str, str]],
145
- system_message,
146
- max_tokens,
147
- temperature,
148
- top_p,
149
- ):
150
- messages = [{"role": "system", "content": system_message}]
151
-
152
- for val in history:
153
- if val[0]:
154
- messages.append({"role": "user", "content": val[0]})
155
- if val[1]:
156
- messages.append({"role": "assistant", "content": val[1]})
157
-
158
- messages.append({"role": "user", "content": message})
159
-
160
- response = ""
161
-
162
- for message in client.chat_completion(
163
- messages,
164
- max_tokens=max_tokens,
165
- stream=True,
166
- temperature=temperature,
167
- top_p=top_p,
168
- ):
169
- token = message.choices[0].delta.content
170
-
171
- response += token
172
- yield response
173
-
174
- """
175
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
176
- """
177
- demo = gr.ChatInterface(
178
- respond,
179
- additional_inputs=[
180
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
181
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
182
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
183
- gr.Slider(
184
- minimum=0.1,
185
- maximum=1.0,
186
- value=0.95,
187
- step=0.05,
188
- label="Top-p (nucleus sampling)",
189
- ),
190
- ],
191
- )
192
-
193
-
194
- if __name__ == "__main__":
195
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  from huggingface_hub import InferenceClient
15
  import openai
16
  from openai import OpenAI
17
+ from globalvars import API_BASE, intention_prompt, tasks , system_message, model_name
18
  from dotenv import load_dotenv
19
  import re
20
  from utils import load_env_variables
 
30
  os.environ['CUDA_CACHE_DISABLE'] = '1'
31
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
32
 
 
 
33
  hf_token, yi_token = load_env_variables()
34
 
35
  def clear_cuda_cache():
36
  torch.cuda.empty_cache()
37
 
 
 
 
38
  client = OpenAI(
39
  api_key=yi_token,
40
  base_url=API_BASE
41
  )
42
 
43
 
 
 
 
 
44
  class EmbeddingGenerator:
45
  def __init__(self, model_name: str, token: str, intention_client):
46
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
99
  embeddings = [item for sublist in embeddings for item in sublist]
100
  return embeddings
101
 
 
102
  class DocumentLoader:
103
  def __init__(self, file_path: str, mode: str = "elements"):
104
  self.file_path = file_path
 
126
  return result_docs
127
 
128
 
129
+ # Initialize clients
130
+ intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
131
+ embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
132
+ embedding_function = MyEmbeddingFunction(embedding_generator=embedding_generator)
133
+ chroma_manager = ChromaManager(embedding_function=embedding_function)
134
+
135
+ def respond(
136
+ message,
137
+ history: list[tuple[str, str]],
138
+ system_message,
139
+ max_tokens,
140
+ temperature,
141
+ top_p,
142
+ ):
143
+ retrieved_text = query_documents(message)
144
+ messages = [{"role": "system", "content": system_message}]
145
+ for val in history:
146
+ if val[0]:
147
+ messages.append({"role": "user", "content": val[0]})
148
+ if val[1]:
149
+ messages.append({"role": "assistant", "content": val[1]})
150
+ messages.append({"role": "user", "content": f"{retrieved_text}\n\n{message}"})
151
+ response = ""
152
+ for message in intention_client.chat_completion(
153
+ messages,
154
+ max_tokens=max_tokens,
155
+ stream=True,
156
+ temperature=temperature,
157
+ top_p=top_p,
158
+ ):
159
+ token = message.choices[0].delta.content
160
+ response += token
161
+ yield response
162
+
163
+ def upload_documents(files):
164
+ for file in files:
165
+ loader = DocumentLoader(file.name)
166
+ documents = loader.load_documents()
167
+ chroma_manager.add_documents(documents)
168
+ return "Documents uploaded and processed successfully!"
169
+
170
+ def query_documents(query):
171
+ results = chroma_manager.query(query)
172
+ return "\n\n".join([result.content for result in results])
173
+
174
+ with gr.Blocks() as demo:
175
+ with gr.Tab("Upload Documents"):
176
+ with gr.Row():
177
+ document_upload = gr.File(file_count="multiple", file_types=["document"])
178
+ upload_button = gr.Button("Upload and Process")
179
+ upload_button.click(upload_documents, inputs=document_upload, outputs=gr.Text())
180
+
181
+ with gr.Tab("Ask Questions"):
182
+ with gr.Row():
183
+ chat_interface = gr.ChatInterface(
184
+ respond,
185
+ additional_inputs=[
186
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
187
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
188
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
189
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
190
+ ],
191
+ )
192
+ query_input = gr.Textbox(label="Query")
193
+ query_button = gr.Button("Query")
194
+ query_output = gr.Textbox()
195
+ query_button.click(query_documents, inputs=query_input, outputs=query_output)
196
+
197
+ if __name__ == "__main__":
198
+ demo.launch()
globalvars.py CHANGED
@@ -3,6 +3,8 @@
3
  API_BASE = "https://api.01.ai/v1"
4
  API_KEY = "your key"
5
 
 
 
6
  title = """
7
  # 👋🏻Welcome to 🙋🏻‍♂️Tonic's 📽️Nvidia 🛌🏻Embed V-1 !"""
8
 
@@ -84,3 +86,5 @@ intention_prompt= """
84
  produce a complete json schema."
85
 
86
  you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
 
 
 
3
  API_BASE = "https://api.01.ai/v1"
4
  API_KEY = "your key"
5
 
6
+ model_name = 'nvidia/NV-Embed-v1'
7
+
8
  title = """
9
  # 👋🏻Welcome to 🙋🏻‍♂️Tonic's 📽️Nvidia 🛌🏻Embed V-1 !"""
10
 
 
86
  produce a complete json schema."
87
 
88
  you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
89
+
90
+ system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""