Spaces:
Runtime error
Runtime error
freQuensy23
commited on
Commit
·
388ab15
1
Parent(s):
566eb82
[IMP] additional parameters
Browse files
main.py
CHANGED
@@ -13,7 +13,7 @@ prev_files = None
|
|
13 |
retriever = None
|
14 |
|
15 |
|
16 |
-
def handle_files_and_query(query, files):
|
17 |
results = ""
|
18 |
global prev_files, retriever
|
19 |
files = [f.name for f in files]
|
@@ -22,8 +22,11 @@ def handle_files_and_query(query, files):
|
|
22 |
prev_files = files
|
23 |
for file in files:
|
24 |
documents.extend(
|
25 |
-
PyMuPDFLoader(file).
|
26 |
-
|
|
|
|
|
|
|
27 |
results += "Index created successfully!\n"
|
28 |
print("Index created successfully!")
|
29 |
elif files is None:
|
@@ -49,10 +52,14 @@ interface = gr.Interface(
|
|
49 |
fn=handle_files_and_query,
|
50 |
inputs=[
|
51 |
gr.Textbox(lines=1, label="Enter your search query here..."),
|
52 |
-
gr.File(file_count="multiple", type="file", file_types=[".pdf"], label="Upload a file here.")
|
|
|
|
|
|
|
|
|
53 |
],
|
54 |
outputs="text",
|
55 |
-
title="Similarity Search for
|
56 |
)
|
57 |
|
58 |
interface.launch()
|
|
|
13 |
retriever = None
|
14 |
|
15 |
|
16 |
+
def handle_files_and_query(query, files, chunk_overlap=50, token_per_chunk=256, bm_25_answers=200):
|
17 |
results = ""
|
18 |
global prev_files, retriever
|
19 |
files = [f.name for f in files]
|
|
|
22 |
prev_files = files
|
23 |
for file in files:
|
24 |
documents.extend(
|
25 |
+
PyMuPDFLoader(file).
|
26 |
+
load_and_split(SentenceTransformersTokenTextSplitter(model_name=model,
|
27 |
+
chunk_overlap=chunk_overlap,
|
28 |
+
tokens_per_chunk=token_per_chunk)))
|
29 |
+
retriever = BM25Retriever.from_documents(documents, k=bm_25_answers)
|
30 |
results += "Index created successfully!\n"
|
31 |
print("Index created successfully!")
|
32 |
elif files is None:
|
|
|
52 |
fn=handle_files_and_query,
|
53 |
inputs=[
|
54 |
gr.Textbox(lines=1, label="Enter your search query here..."),
|
55 |
+
gr.File(file_count="multiple", type="file", file_types=[".pdf"], label="Upload a file here."),
|
56 |
+
gr.Slider(minimum=1, maximum=100, value=50, label="Chunk Overlap"),
|
57 |
+
gr.Slider(minimum=64, maximum=512, value=256, label="Tokens Per Chunk (чем больше - тем бОльшие куски книги "
|
58 |
+
"сможем находить)"),
|
59 |
+
gr.Slider(minimum=1, maximum=1000, value=200, label="BM25 Answers (чем больше - тем больше будем учитывать неявные смысловые сравнения слов)")
|
60 |
],
|
61 |
outputs="text",
|
62 |
+
title="Similarity Search for eksmo books"
|
63 |
)
|
64 |
|
65 |
interface.launch()
|