dammy commited on
Commit
725d485
·
1 Parent(s): dffeb2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -7
app.py CHANGED
@@ -11,6 +11,8 @@ import uuid
11
  from sentence_transformers import SentenceTransformer
12
  import os
13
 
 
 
14
  model_name = 'google/flan-t5-base'
15
  model = T5ForConditionalGeneration.from_pretrained(model_name, device_map='auto', offload_folder="offload")
16
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -22,7 +24,7 @@ st_model = SentenceTransformer(ST_name)
22
  print('sentence read')
23
 
24
 
25
- def get_context(query_text):
26
  query_emb = st_model.encode(query_text)
27
  query_response = collection.query(query_embeddings=query_emb.tolist(), n_results=4)
28
  context = query_response['documents'][0][0]
@@ -42,8 +44,32 @@ def local_query(query, context):
42
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)
43
 
44
  def run_query(history, query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- context = get_context(query)
 
 
 
 
 
 
 
47
  result = local_query(query, context)
48
 
49
  history = history.append(query)
@@ -52,6 +78,7 @@ def run_query(history, query):
52
 
53
  def load_document(pdf_filename):
54
 
 
55
  loader = PDFMinerLoader(pdf_filename)
56
  doc = loader.load()
57
 
@@ -84,12 +111,10 @@ def upload_pdf(file):
84
  # Check if the file is not None before accessing its attributes
85
  if file is not None:
86
  # Save the uploaded file
87
- file_name = file.name
88
-
89
- # file_name = os.path.basename(file_name)
90
 
91
- messsage = load_document(file_name)
92
- return messsage
93
  else:
94
  return "No file uploaded."
95
 
 
11
  from sentence_transformers import SentenceTransformer
12
  import os
13
 
14
+ globl file_name = ''
15
+
16
  model_name = 'google/flan-t5-base'
17
  model = T5ForConditionalGeneration.from_pretrained(model_name, device_map='auto', offload_folder="offload")
18
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
24
  print('sentence read')
25
 
26
 
27
+ def get_context(query_text, collection):
28
  query_emb = st_model.encode(query_text)
29
  query_response = collection.query(query_embeddings=query_emb.tolist(), n_results=4)
30
  context = query_response['documents'][0][0]
 
44
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)
45
 
46
  def run_query(history, query):
47
+
48
+
49
+ loader = PDFMinerLoader(pdf_filename)
50
+ doc = loader.load()
51
+
52
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
53
+ texts = text_splitter.split_documents(doc)
54
+
55
+ texts = [i.page_content for i in texts]
56
+
57
+ doc_emb = st_model.encode(texts)
58
+ doc_emb = doc_emb.tolist()
59
+
60
+ ids = [str(uuid.uuid1()) for _ in doc_emb]
61
+
62
+ client = chromadb.Client()
63
+ collection = client.create_collection("test_db")
64
 
65
+ collection.add(
66
+ embeddings=doc_emb,
67
+ documents=texts,
68
+ ids=ids
69
+ )
70
+
71
+
72
+ context = get_context(query, collection)
73
  result = local_query(query, context)
74
 
75
  history = history.append(query)
 
78
 
79
  def load_document(pdf_filename):
80
 
81
+
82
  loader = PDFMinerLoader(pdf_filename)
83
  doc = loader.load()
84
 
 
111
  # Check if the file is not None before accessing its attributes
112
  if file is not None:
113
  # Save the uploaded file
114
+ file_name = file.name
 
 
115
 
116
+ # messsage = load_document(file_name)
117
+ return 'Successfully uploaded!'
118
  else:
119
  return "No file uploaded."
120