zeyadahmedd commited on
Commit
0e1d9bb
·
1 Parent(s): b884c59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -43
app.py CHANGED
@@ -1,5 +1,3 @@
1
- import time
2
-
3
  import chromadb
4
  from chromadb.utils import embedding_functions
5
  from test.new import connect_to_llama
@@ -10,7 +8,6 @@ import os
10
  from chunkipy.text_chunker import split_by_sentences
11
  import langid
12
  from translate import Translator
13
-
14
  chroma_client = chromadb.PersistentClient()
15
  from test.llama import llama_local
16
  working_dir = os.getcwd()
@@ -32,22 +29,25 @@ def detect_and_translate_query(query, context, dest_language='en'):
32
  translated_context = translator.translate(context)
33
  return translated_query, translated_context, input_language
34
 
 
35
  def translate_response(response, source_language, dest_language):
36
  translator = Translator(to_lang=source_language, from_lang=dest_language)
37
  translated_response = translator.translate(response)
38
- print("translate_response "+str(translate_response))
39
  return translated_response
40
- def create_multiple_db(path,collection,working_dir):
 
 
41
  filelist = os.listdir(path)
42
  print(filelist)
43
  data_pdfs = []
44
- metadata_buff=[]
45
  for file_n in filelist:
46
  with open(file_n, 'rb') as file:
47
  pdf_reader = PyPDF2.PdfReader(file)
48
- meta_data=dict(pdf_reader.metadata)
49
- print("De elmeta data before: ",meta_data)
50
- meta_data.update({"/Title":file_n})
51
  print("De elmeta data after: ", meta_data)
52
  metadata_buff.append(meta_data)
53
  data = ""
@@ -59,22 +59,23 @@ def create_multiple_db(path,collection,working_dir):
59
  data_pdfs.append(chunk)
60
  file.close()
61
  os.chdir(working_dir)
62
- print(metadata_buff,"\n",len(metadata_buff))
63
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
64
  i = 0
65
- md_i=0
66
  for data in data_pdfs:
67
  print(data)
68
  collection.add(
69
  documents=data,
70
  embeddings=sentence_transformer_ef(data),
71
  ids=['id' + str(x + i) for x in range(len(data))],
72
- metadatas=[metadata_buff[md_i]for i in range(len(data))]
73
  )
74
- md_i+=1
75
  i += len(data)
76
  return "done"
77
-
 
78
  def architecture_with_chroma(data):
79
  try:
80
  data_dict = eval(data)
@@ -87,20 +88,20 @@ def architecture_with_chroma(data):
87
  query = data_dict.get('query')
88
  if query is None or query == "":
89
  return "please enter a query to process"
90
- if(not os.path.exists(id)):
91
  return "sorry ,there is no directory for this client"
92
  collection = chroma_client.get_or_create_collection(name=id)
93
  results = collection.query(
94
  query_texts=[query],
95
- n_results=5
96
  )
97
- print(results," de elresults\n")
98
  context = results.get('documents')[0]
99
  results_metadata = list(results.get("metadatas")[0])
100
  results_documents = list(results.get("documents")[0])
101
- print(len(results_documents),"da el len bta3 elcontexts\n")
102
  print(results_documents)
103
- for i in range(5):
104
  results_documents[i] = f"In {results_metadata[i].get('/Title')}:" + results_documents[i]
105
  for data in results_documents:
106
  print(data)
@@ -108,54 +109,54 @@ def architecture_with_chroma(data):
108
  # generated_text = model(input_prompt.format(query+"? answer reasoning answers from the provided contexts only that is related and contains this information ", context), max_length=1024, do_sample=False)[0]['generated_text']
109
  # print(input_prompt)
110
  chroma_client.stop()
111
- translated_query, translated_context, input_language = detect_and_translate_query(query, context)
112
- print('translated_query '+str(translated_query))
113
- print('translated_context '+str(translated_context))
114
- results=connect_to_llama(query,results_documents)
115
  # results=llama_local(query,results_documents)
116
- translated_response = translate_response(results, input_language, dest_language='en')
117
- return translated_response
118
- # return results
119
  # return generated_text
 
 
120
  def create(data):
121
  print(data)
122
  print(type(data))
123
  try:
124
- dict=eval(data)
125
  except:
126
  return "please enter a valid json (dict) to process"
127
- id=dict.get('id')
128
- if id==None :
129
  return "please enter an id to process on the prompt"
130
- id="mate"+str(id)
131
- if(not os.path.exists(id)):
132
  return "sorry ,there is no directory for this client"
133
  else:
134
  collection = chroma_client.get_or_create_collection(name=id)
135
  print(os.chdir(id))
136
- return create_multiple_db(os.getcwd(),collection,working_dir)+" making data for client"
 
137
 
138
  def update(data):
139
  print(data)
140
  print(type(data))
141
  try:
142
- dict=eval(data)
143
  except:
144
  return "please enter a valid json (dict) to process"
145
- id=dict.get('id')
146
- if id==None :
147
  return "please enter an id to process on the prompt"
148
- id="mate"+str(dict.get('id'))
149
- if(not os.path.exists(id)):
150
  return "sorry ,there is no directory for this client"
151
  else:
152
- try:
153
- chroma_client.delete_collection(name=id)
154
- except error:
155
- pass
156
- collection=chroma_client.create_collection(name=id)
157
  print(os.chdir(id))
158
- return create_multiple_db(os.getcwd(),collection,working_dir)+"updating client embeddings"
 
159
 
160
  iface = gr.Blocks()
161
  with iface:
 
 
 
1
  import chromadb
2
  from chromadb.utils import embedding_functions
3
  from test.new import connect_to_llama
 
8
  from chunkipy.text_chunker import split_by_sentences
9
  import langid
10
  from translate import Translator
 
11
  chroma_client = chromadb.PersistentClient()
12
  from test.llama import llama_local
13
  working_dir = os.getcwd()
 
29
  translated_context = translator.translate(context)
30
  return translated_query, translated_context, input_language
31
 
32
+
33
  def translate_response(response, source_language, dest_language):
34
  translator = Translator(to_lang=source_language, from_lang=dest_language)
35
  translated_response = translator.translate(response)
36
+ print("translate_response " + str(translate_response))
37
  return translated_response
38
+
39
+
40
+ def create_multiple_db(path, collection, working_dir):
41
  filelist = os.listdir(path)
42
  print(filelist)
43
  data_pdfs = []
44
+ metadata_buff = []
45
  for file_n in filelist:
46
  with open(file_n, 'rb') as file:
47
  pdf_reader = PyPDF2.PdfReader(file)
48
+ meta_data = dict(pdf_reader.metadata)
49
+ print("De elmeta data before: ", meta_data)
50
+ meta_data.update({"/Title": file_n})
51
  print("De elmeta data after: ", meta_data)
52
  metadata_buff.append(meta_data)
53
  data = ""
 
59
  data_pdfs.append(chunk)
60
  file.close()
61
  os.chdir(working_dir)
62
+ print(metadata_buff, "\n", len(metadata_buff))
63
  sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
64
  i = 0
65
+ md_i = 0
66
  for data in data_pdfs:
67
  print(data)
68
  collection.add(
69
  documents=data,
70
  embeddings=sentence_transformer_ef(data),
71
  ids=['id' + str(x + i) for x in range(len(data))],
72
+ metadatas=[metadata_buff[md_i] for i in range(len(data))]
73
  )
74
+ md_i += 1
75
  i += len(data)
76
  return "done"
77
+
78
+
79
  def architecture_with_chroma(data):
80
  try:
81
  data_dict = eval(data)
 
88
  query = data_dict.get('query')
89
  if query is None or query == "":
90
  return "please enter a query to process"
91
+ if (not os.path.exists(id)):
92
  return "sorry ,there is no directory for this client"
93
  collection = chroma_client.get_or_create_collection(name=id)
94
  results = collection.query(
95
  query_texts=[query],
96
+ n_results=10
97
  )
98
+ print(results, " de elresults\n")
99
  context = results.get('documents')[0]
100
  results_metadata = list(results.get("metadatas")[0])
101
  results_documents = list(results.get("documents")[0])
102
+ print(len(results_documents), "da el len bta3 elcontexts\n")
103
  print(results_documents)
104
+ for i in range(10):
105
  results_documents[i] = f"In {results_metadata[i].get('/Title')}:" + results_documents[i]
106
  for data in results_documents:
107
  print(data)
 
109
  # generated_text = model(input_prompt.format(query+"? answer reasoning answers from the provided contexts only that is related and contains this information ", context), max_length=1024, do_sample=False)[0]['generated_text']
110
  # print(input_prompt)
111
  chroma_client.stop()
112
+ # translated_query, translated_context, input_language = detect_and_translate_query(query, context)
113
+ # print('translated_query ' + str(translated_query))
114
+ # print('translated_context ' + str(translated_context))
115
+ results = connect_to_llama(query, results_documents)
116
  # results=llama_local(query,results_documents)
117
+ # translated_response = translate_response(results, input_language, dest_language='en')
118
+ # return translated_response
119
+ return results
120
  # return generated_text
121
+
122
+
123
  def create(data):
124
  print(data)
125
  print(type(data))
126
  try:
127
+ dict = eval(data)
128
  except:
129
  return "please enter a valid json (dict) to process"
130
+ id = dict.get('id')
131
+ if id == None:
132
  return "please enter an id to process on the prompt"
133
+ id = "mate" + str(id)
134
+ if (not os.path.exists(id)):
135
  return "sorry ,there is no directory for this client"
136
  else:
137
  collection = chroma_client.get_or_create_collection(name=id)
138
  print(os.chdir(id))
139
+ return create_multiple_db(os.getcwd(), collection, working_dir) + " making data for client"
140
+
141
 
142
  def update(data):
143
  print(data)
144
  print(type(data))
145
  try:
146
+ dict = eval(data)
147
  except:
148
  return "please enter a valid json (dict) to process"
149
+ id = dict.get('id')
150
+ if id == None:
151
  return "please enter an id to process on the prompt"
152
+ id = "mate" + str(dict.get('id'))
153
+ if (not os.path.exists(id)):
154
  return "sorry ,there is no directory for this client"
155
  else:
156
+ collection = chroma_client.create_collection(name=id)
 
 
 
 
157
  print(os.chdir(id))
158
+ return create_multiple_db(os.getcwd(), collection, working_dir) + "updating client embeddings"
159
+
160
 
161
  iface = gr.Blocks()
162
  with iface: