dammy commited on
Commit
2a5a407
·
1 Parent(s): cc385c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -30
app.py CHANGED
@@ -45,40 +45,33 @@ def local_query(query, context):
45
  def run_query(btn, history, query):
46
 
47
 
48
- global count
 
 
 
 
 
49
 
50
- count = 1
 
51
 
52
- if count ==1:
53
- print('inside')
54
- print(count)
55
- file_name = btn.name
56
-
57
- loader = PDFMinerLoader(file_name)
58
- doc = loader.load()
59
-
60
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
61
- texts = text_splitter.split_documents(doc)
62
-
63
- texts = [i.page_content for i in texts]
64
-
65
- doc_emb = st_model.encode(texts)
66
- doc_emb = doc_emb.tolist()
67
-
68
- ids = [str(uuid.uuid1()) for _ in doc_emb]
69
-
70
- client = chromadb.Client()
71
- collection = client.create_collection("test_db")
72
-
73
- collection.add(
74
- embeddings=doc_emb,
75
- documents=texts,
76
- ids=ids
77
- )
78
 
79
- count+=1
80
- print(count)
 
 
 
 
 
 
 
 
81
 
 
82
 
83
 
84
  context = get_context(query, collection)
 
45
  def run_query(btn, history, query):
46
 
47
 
48
+
49
+
50
+ # file_name = btn.name
51
+
52
+ # loader = PDFMinerLoader(file_name)
53
+ # doc = loader.load()
54
 
55
+ # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
56
+ # texts = text_splitter.split_documents(doc)
57
 
58
+ # texts = [i.page_content for i in texts]
59
+
60
+ # doc_emb = st_model.encode(texts)
61
+ # doc_emb = doc_emb.tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
+ # ids = [str(uuid.uuid1()) for _ in doc_emb]
64
+
65
+ # client = chromadb.Client()
66
+ # collection = client.create_collection("test_db")
67
+
68
+ # collection.add(
69
+ # embeddings=doc_emb,
70
+ # documents=texts,
71
+ # ids=ids
72
+ # )
73
 
74
+
75
 
76
 
77
  context = get_context(query, collection)