Tonic commited on
Commit
190f21f
·
1 Parent(s): 45cde94

add metadata prompt , fix imports , add chroma

Browse files
Files changed (3) hide show
  1. app.py +9 -6
  2. globalvars.py +2 -0
  3. requirements.txt +1 -0
app.py CHANGED
@@ -10,10 +10,11 @@ from torch import Tensor
10
  from transformers import AutoTokenizer, AutoModel
11
  from huggingface_hub import InferenceClient
12
  from openai import OpenAI
13
- from langchain_community.document_loaders import UnstructuredFileLoader
 
14
  from chromadb import Documents, EmbeddingFunction, Embeddings
15
  from chromadb.config import Settings
16
- from chromadb import HttpClient
17
  from utils import load_env_variables, parse_and_route
18
  from globalvars import API_BASE, intention_prompt, tasks, system_message, model_name
19
  from dotenv import load_dotenv
@@ -97,7 +98,7 @@ def load_documents(file_path: str, mode: str = "elements"):
97
  return [doc.page_content for doc in docs]
98
 
99
  def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
100
- client = HttpClient(host='localhost', port=8000, settings = Settings(allow_reset=True, anonymized_telemetry=False))
101
  client.reset() # resets the database
102
  collection = client.create_collection(collection_name)
103
  return client, collection
@@ -110,7 +111,9 @@ def query_chroma(client, collection_name: str, query_text: str, embedding_functi
110
  db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
111
  result_docs = db.similarity_search(query_text)
112
  return result_docs
113
-
 
 
114
  # Initialize clients
115
  intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
116
  embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
@@ -149,11 +152,11 @@ def upload_documents(files):
149
  for file in files:
150
  loader = UnstructuredFileLoader(file.name)
151
  documents = loader.load_documents()
152
- chroma_manager.add_documents(documents)
153
  return "Documents uploaded and processed successfully!"
154
 
155
  def query_documents(query):
156
- results = chroma_manager.query(query)
157
  return "\n\n".join([result.content for result in results])
158
 
159
  with gr.Blocks() as demo:
 
10
  from transformers import AutoTokenizer, AutoModel
11
  from huggingface_hub import InferenceClient
12
  from openai import OpenAI
13
+ from langchain_community.document_loaders import UnstructuredFileLoader
14
+ from langchain_chroma import Chroma
15
  from chromadb import Documents, EmbeddingFunction, Embeddings
16
  from chromadb.config import Settings
17
+ from chromadb import HttpClient
18
  from utils import load_env_variables, parse_and_route
19
  from globalvars import API_BASE, intention_prompt, tasks, system_message, model_name
20
  from dotenv import load_dotenv
 
98
  return [doc.page_content for doc in docs]
99
 
100
  def initialize_chroma(collection_name: str, embedding_function: MyEmbeddingFunction):
101
+ client = chromadb.HttpClient(host='localhost', port=8000, settings = Settings(allow_reset=True, anonymized_telemetry=False))
102
  client.reset() # resets the database
103
  collection = client.create_collection(collection_name)
104
  return client, collection
 
111
  db = Chroma(client=client, collection_name=collection_name, embedding_function=embedding_function)
112
  result_docs = db.similarity_search(query_text)
113
  return result_docs
114
+
115
+
116
+
117
  # Initialize clients
118
  intention_client = OpenAI(api_key=yi_token, base_url=API_BASE)
119
  embedding_generator = EmbeddingGenerator(model_name=model_name, token=hf_token, intention_client=intention_client)
 
152
  for file in files:
153
  loader = UnstructuredFileLoader(file.name)
154
  documents = loader.load_documents()
155
+ add_documents_to_chroma(documents)
156
  return "Documents uploaded and processed successfully!"
157
 
158
  def query_documents(query):
159
+ results = query_chroma(query)
160
  return "\n\n".join([result.content for result in results])
161
 
162
  with gr.Blocks() as demo:
globalvars.py CHANGED
@@ -87,4 +87,6 @@ produce a complete json schema."
87
 
88
  you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
89
 
 
 
90
  system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""
 
87
 
88
  you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
89
 
90
+ metadata_prompt = "you will recieve a text or a question, produce metadata operator pairs for the text . ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION , ONLY PRODUCE ONE METADATA STRING PER OPERATOR:"
91
+
92
  system_message = """ You are a helpful assistant named YiTonic . answer the question provided based on the context above. Produce a complete answer:"""
requirements.txt CHANGED
@@ -7,6 +7,7 @@ openai
7
  python-dotenv
8
  chromadb
9
  langchain-community
 
10
  unstructured[all-docs]
11
  libmagic
12
  # poppler
 
7
  python-dotenv
8
  chromadb
9
  langchain-community
10
+ langchain-chroma
11
  unstructured[all-docs]
12
  libmagic
13
  # poppler