Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4 |
from haystack.schema import Answer
|
5 |
from haystack.document_stores import InMemoryDocumentStore
|
6 |
from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
|
7 |
-
from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader,
|
8 |
from haystack.utils import print_answers
|
9 |
from haystack.utils import convert_files_to_docs
|
10 |
import logging
|
@@ -16,7 +16,7 @@ def start_haystack():
|
|
16 |
retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
|
17 |
load_data_to_store(document_store,retriever)
|
18 |
pipeline = FAQPipeline(retriever=retriever)
|
19 |
-
return pipeline
|
20 |
|
21 |
def load_data_to_store(document_store, retriever):
|
22 |
df = pd.read_csv('monopoly_qa-v1.csv')
|
@@ -28,7 +28,7 @@ def load_data_to_store(document_store, retriever):
|
|
28 |
dicts = df.to_dict(orient="records")
|
29 |
document_store.write_documents(dicts)
|
30 |
|
31 |
-
faq_pipeline
|
32 |
|
33 |
def predict_faq(question):
|
34 |
prediction = faq_pipeline.run(question)
|
@@ -38,12 +38,40 @@ def predict_faq(question):
|
|
38 |
|
39 |
# Extractive QA functions
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
47 |
# Gradio App section
|
48 |
input_question =gr.inputs.Textbox(label="enter your monopoly question here")
|
49 |
response = "text"
|
@@ -56,5 +84,12 @@ mon_faq = gr.Interface(
|
|
56 |
examples=examples,
|
57 |
title="Monopoly FAQ Semantic Search")
|
58 |
|
59 |
-
#
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from haystack.schema import Answer
|
5 |
from haystack.document_stores import InMemoryDocumentStore
|
6 |
from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
|
7 |
+
from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, TextConverter, PreProcessor
|
8 |
from haystack.utils import print_answers
|
9 |
from haystack.utils import convert_files_to_docs
|
10 |
import logging
|
|
|
16 |
retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
|
17 |
load_data_to_store(document_store,retriever)
|
18 |
pipeline = FAQPipeline(retriever=retriever)
|
19 |
+
return pipeline
|
20 |
|
21 |
def load_data_to_store(document_store, retriever):
|
22 |
df = pd.read_csv('monopoly_qa-v1.csv')
|
|
|
28 |
dicts = df.to_dict(orient="records")
|
29 |
document_store.write_documents(dicts)
|
30 |
|
31 |
+
faq_pipeline = start_haystack()
|
32 |
|
33 |
def predict_faq(question):
|
34 |
prediction = faq_pipeline.run(question)
|
|
|
38 |
|
39 |
# Extractive QA functions
|
40 |
|
41 |
+
## preprocess monopoly rules
|
42 |
+
|
43 |
+
def preprocess_txt_doc(fpath):
|
44 |
+
|
45 |
+
converter = TextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
46 |
+
doc_txt = converter.convert(file_path=fpath, meta=None)[0]
|
47 |
+
preprocessor = PreProcessor(
|
48 |
+
clean_empty_lines=True,
|
49 |
+
clean_whitespace=True,
|
50 |
+
clean_header_footer=False,
|
51 |
+
split_by="word",
|
52 |
+
split_length=100,
|
53 |
+
split_respect_sentence_boundary=True,)
|
54 |
+
docs = preprocessor.process([doc_txt])
|
55 |
+
return docs
|
56 |
+
|
57 |
+
def start_ex_haystack(documents):
|
58 |
+
ex_document_store = InMemoryDocumentStore()
|
59 |
+
ex_document_store.write_documents(documents)
|
60 |
+
retriever = TfidfRetriever(document_store=ex_document_store)
|
61 |
+
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
|
62 |
+
pipe = ExtractiveQAPipeline(reader, retriever)
|
63 |
+
return pipe
|
64 |
+
|
65 |
+
docs = preprocess_txt_doc("monopoly_text_v1.txt")
|
66 |
+
ex_pipeline = start_ex_haystack(docs)
|
67 |
|
68 |
+
def predict_extract(question):
|
69 |
+
prediction = ex_pipeline.run(question)
|
70 |
+
possible_answers = ""
|
71 |
+
for i,a in enumerate(prediction["answers"]):
|
72 |
+
possible_answers = possible_answers +str(i) + ":" + a.answer + "\n"
|
73 |
+
return possible_answers
|
74 |
|
|
|
75 |
# Gradio App section
|
76 |
input_question =gr.inputs.Textbox(label="enter your monopoly question here")
|
77 |
response = "text"
|
|
|
84 |
examples=examples,
|
85 |
title="Monopoly FAQ Semantic Search")
|
86 |
|
87 |
+
# extractive interface
|
88 |
+
mon_ex = gr.Interface(
|
89 |
+
fn=predict_extract,
|
90 |
+
inputs=input_question,
|
91 |
+
outputs=response,
|
92 |
+
examples=examples,
|
93 |
+
title="Monopoly Extractive QA Search")
|
94 |
+
|
95 |
+
gradio.TabbedInterface([mon_faq,mon_ex]).launch()
|