freQuensy23 commited on
Commit
566eb82
·
1 Parent(s): 3e243df

[FIX] dependency hell

Browse files
Files changed (2) hide show
  1. main.py +11 -5
  2. requirements.txt +4 -4
main.py CHANGED
@@ -12,14 +12,17 @@ embeddings = SentenceTransformerEmbeddings(model_name=model)
12
  prev_files = None
13
  retriever = None
14
 
 
15
  def handle_files_and_query(query, files):
16
  results = ""
17
  global prev_files, retriever
 
18
  if files is not None and files != prev_files:
19
  documents = []
20
  prev_files = files
21
  for file in files:
22
- documents.extend(PyMuPDFLoader(file).load_and_split(SentenceTransformersTokenTextSplitter(model_name=model)))
 
23
  retriever = BM25Retriever.from_documents(documents, k=100)
24
  results += "Index created successfully!\n"
25
  print("Index created successfully!")
@@ -31,19 +34,22 @@ def handle_files_and_query(query, files):
31
  print(f"Query: {query}")
32
  if query:
33
  search_results = retriever.get_relevant_documents(query)
34
- pattern = r'[^\\/]+$' # pattern to get filename from filepath
35
- reranked_results = FAISS.from_documents(search_results, embeddings, distance_strategy=DistanceStrategy.COSINE).similarity_search(query, k=25)
 
 
36
  results = "\n".join([
37
  f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{result.page_content}\n"
38
  for result in reranked_results
39
  ])
40
  return results
41
 
 
42
  interface = gr.Interface(
43
  fn=handle_files_and_query,
44
  inputs=[
45
- gr.Textbox(lines = 1, label="Enter your search query here..."),
46
- gr.File(file_count="multiple", type="filepath", file_types=[".pdf"], label="Upload a file here.")
47
  ],
48
  outputs="text",
49
  title="Similarity Search for PDFs"
 
12
  prev_files = None
13
  retriever = None
14
 
15
+
16
  def handle_files_and_query(query, files):
17
  results = ""
18
  global prev_files, retriever
19
+ files = [f.name for f in files]
20
  if files is not None and files != prev_files:
21
  documents = []
22
  prev_files = files
23
  for file in files:
24
+ documents.extend(
25
+ PyMuPDFLoader(file).load_and_split(SentenceTransformersTokenTextSplitter(model_name=model)))
26
  retriever = BM25Retriever.from_documents(documents, k=100)
27
  results += "Index created successfully!\n"
28
  print("Index created successfully!")
 
34
  print(f"Query: {query}")
35
  if query:
36
  search_results = retriever.get_relevant_documents(query)
37
+ pattern = r'[^\\/]+$' # pattern to get filename from filepath
38
+ reranked_results = FAISS.from_documents(search_results, embeddings,
39
+ distance_strategy=DistanceStrategy.COSINE).similarity_search(query,
40
+ k=25)
41
  results = "\n".join([
42
  f"Source: {re.search(pattern, result.metadata['file_path']).group(0)}\nPage: {result.metadata['page']}\nContent:\n{result.page_content}\n"
43
  for result in reranked_results
44
  ])
45
  return results
46
 
47
+
48
  interface = gr.Interface(
49
  fn=handle_files_and_query,
50
  inputs=[
51
+ gr.Textbox(lines=1, label="Enter your search query here..."),
52
+ gr.File(file_count="multiple", type="file", file_types=[".pdf"], label="Upload a file here.")
53
  ],
54
  outputs="text",
55
  title="Similarity Search for PDFs"
requirements.txt CHANGED
@@ -26,7 +26,7 @@ emoji==2.8.0
26
  et-xmlfile==1.1.0
27
  exceptiongroup==1.1.1
28
  faiss-cpu==1.7.4
29
- fastapi==0.105.0
30
  ffmpy==0.3.0
31
  filelock==3.12.2
32
  filetype==1.2.0
@@ -34,7 +34,7 @@ flatbuffers==23.5.26
34
  fonttools==4.40.0
35
  frozenlist==1.3.3
36
  fsspec==2023.6.0
37
- gradio==4.10.0
38
  gradio_client==0.7.3
39
  h11==0.14.0
40
  httpcore==0.17.2
@@ -49,7 +49,7 @@ Jinja2==3.1.2
49
  joblib==1.3.2
50
  jsonschema==4.17.3
51
  kiwisolver==1.4.4
52
- langchain==0.0.205
53
  langchainplus-sdk==0.0.16
54
  langdetect==1.0.9
55
  layoutparser==0.3.4
@@ -90,7 +90,7 @@ portalocker==2.8.2
90
  protobuf==4.25.1
91
  pycocotools==2.0.7
92
  pycparser==2.21
93
- pydantic==2.5.2
94
  pydantic_core==2.14.5
95
  pydub==0.25.1
96
  Pygments==2.15.1
 
26
  et-xmlfile==1.1.0
27
  exceptiongroup==1.1.1
28
  faiss-cpu==1.7.4
29
+ fastapi
30
  ffmpy==0.3.0
31
  filelock==3.12.2
32
  filetype==1.2.0
 
34
  fonttools==4.40.0
35
  frozenlist==1.3.3
36
  fsspec==2023.6.0
37
+ gradio
38
  gradio_client==0.7.3
39
  h11==0.14.0
40
  httpcore==0.17.2
 
49
  joblib==1.3.2
50
  jsonschema==4.17.3
51
  kiwisolver==1.4.4
52
+ langchain
53
  langchainplus-sdk==0.0.16
54
  langdetect==1.0.9
55
  layoutparser==0.3.4
 
90
  protobuf==4.25.1
91
  pycocotools==2.0.7
92
  pycparser==2.21
93
+ pydantic
94
  pydantic_core==2.14.5
95
  pydub==0.25.1
96
  Pygments==2.15.1