Spaces:

Timjo88
/

monopoly-qa-semantic-search

Runtime error

App Files Files Community

Timjo88 commited on Sep 19, 2022

Commit

d8ad5ef

1 Parent(s): 20b7dfd

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -9

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 from haystack.schema import Answer
 from haystack.document_stores import InMemoryDocumentStore
 from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
-from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, PDFToTextConverter
 from haystack.utils import print_answers
 from haystack.utils import convert_files_to_docs
 import logging
@@ -16,7 +16,7 @@ def start_haystack():
     retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
     load_data_to_store(document_store,retriever)
     pipeline = FAQPipeline(retriever=retriever)
-    return pipeline, document_store
 def load_data_to_store(document_store, retriever):
     df = pd.read_csv('monopoly_qa-v1.csv')
@@ -28,7 +28,7 @@ def load_data_to_store(document_store, retriever):
     dicts = df.to_dict(orient="records")
     document_store.write_documents(dicts)
-faq_pipeline, doc_store = start_haystack()
 def predict_faq(question):
     prediction = faq_pipeline.run(question)
@@ -38,12 +38,40 @@ def predict_faq(question):
 # Extractive QA functions
-def_start_ex_haystack():
-    return true
 # Gradio App section
 input_question =gr.inputs.Textbox(label="enter your monopoly question here")
 response = "text"
@@ -56,5 +84,12 @@ mon_faq = gr.Interface(
             examples=examples,
             title="Monopoly FAQ Semantic Search")
-# def return_feedback(input_question,feedback_answer):

 from haystack.schema import Answer
 from haystack.document_stores import InMemoryDocumentStore
 from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
+from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, TextConverter, PreProcessor
 from haystack.utils import print_answers
 from haystack.utils import convert_files_to_docs
 import logging
     retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
     load_data_to_store(document_store,retriever)
     pipeline = FAQPipeline(retriever=retriever)
+    return pipeline
 def load_data_to_store(document_store, retriever):
     df = pd.read_csv('monopoly_qa-v1.csv')
     dicts = df.to_dict(orient="records")
     document_store.write_documents(dicts)
+faq_pipeline = start_haystack()
 def predict_faq(question):
     prediction = faq_pipeline.run(question)
 # Extractive QA functions
+## preprocess monopoly rules
+def preprocess_txt_doc(fpath):
+    converter = TextConverter(remove_numeric_tables=True, valid_languages=["en"])
+    doc_txt = converter.convert(file_path=fpath, meta=None)[0]
+    preprocessor = PreProcessor(
+        clean_empty_lines=True,
+        clean_whitespace=True,
+        clean_header_footer=False,
+        split_by="word",
+        split_length=100,
+        split_respect_sentence_boundary=True,)
+    docs = preprocessor.process([doc_txt])
+    return docs
+def start_ex_haystack(documents):
+    ex_document_store = InMemoryDocumentStore()
+    ex_document_store.write_documents(documents)
+    retriever = TfidfRetriever(document_store=ex_document_store)
+    reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
+    pipe = ExtractiveQAPipeline(reader, retriever)
+    return pipe
+docs = preprocess_txt_doc("monopoly_text_v1.txt")
+ex_pipeline = start_ex_haystack(docs)
+def predict_extract(question):
+    prediction = ex_pipeline.run(question)
+    possible_answers = ""
+    for i,a in enumerate(prediction["answers"]):
+        possible_answers =  possible_answers  +str(i) + ":" + a.answer + "\n"
+    return possible_answers
 # Gradio App section
 input_question =gr.inputs.Textbox(label="enter your monopoly question here")
 response = "text"
             examples=examples,
             title="Monopoly FAQ Semantic Search")
+# extractive interface
+mon_ex = gr.Interface(
+            fn=predict_extract,
+            inputs=input_question,
+            outputs=response,
+            examples=examples,
+            title="Monopoly Extractive QA Search")
+gradio.TabbedInterface([mon_faq,mon_ex]).launch()