Timjo88 commited on
Commit
d8ad5ef
Β·
1 Parent(s): 20b7dfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -9
app.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  from haystack.schema import Answer
5
  from haystack.document_stores import InMemoryDocumentStore
6
  from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
7
- from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, PDFToTextConverter
8
  from haystack.utils import print_answers
9
  from haystack.utils import convert_files_to_docs
10
  import logging
@@ -16,7 +16,7 @@ def start_haystack():
16
  retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
17
  load_data_to_store(document_store,retriever)
18
  pipeline = FAQPipeline(retriever=retriever)
19
- return pipeline, document_store
20
 
21
  def load_data_to_store(document_store, retriever):
22
  df = pd.read_csv('monopoly_qa-v1.csv')
@@ -28,7 +28,7 @@ def load_data_to_store(document_store, retriever):
28
  dicts = df.to_dict(orient="records")
29
  document_store.write_documents(dicts)
30
 
31
- faq_pipeline, doc_store = start_haystack()
32
 
33
  def predict_faq(question):
34
  prediction = faq_pipeline.run(question)
@@ -38,12 +38,40 @@ def predict_faq(question):
38
 
39
  # Extractive QA functions
40
 
41
- def_start_ex_haystack():
42
- return true
43
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
45
 
46
-
47
  # Gradio App section
48
  input_question =gr.inputs.Textbox(label="enter your monopoly question here")
49
  response = "text"
@@ -56,5 +84,12 @@ mon_faq = gr.Interface(
56
  examples=examples,
57
  title="Monopoly FAQ Semantic Search")
58
 
59
- # def return_feedback(input_question,feedback_answer):
60
-
 
 
 
 
 
 
 
 
4
  from haystack.schema import Answer
5
  from haystack.document_stores import InMemoryDocumentStore
6
  from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
7
+ from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, TextConverter, PreProcessor
8
  from haystack.utils import print_answers
9
  from haystack.utils import convert_files_to_docs
10
  import logging
 
16
  retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
17
  load_data_to_store(document_store,retriever)
18
  pipeline = FAQPipeline(retriever=retriever)
19
+ return pipeline
20
 
21
  def load_data_to_store(document_store, retriever):
22
  df = pd.read_csv('monopoly_qa-v1.csv')
 
28
  dicts = df.to_dict(orient="records")
29
  document_store.write_documents(dicts)
30
 
31
+ faq_pipeline = start_haystack()
32
 
33
  def predict_faq(question):
34
  prediction = faq_pipeline.run(question)
 
38
 
39
  # Extractive QA functions
40
 
41
+ ## preprocess monopoly rules
42
+
43
+ def preprocess_txt_doc(fpath):
44
+
45
+ converter = TextConverter(remove_numeric_tables=True, valid_languages=["en"])
46
+ doc_txt = converter.convert(file_path=fpath, meta=None)[0]
47
+ preprocessor = PreProcessor(
48
+ clean_empty_lines=True,
49
+ clean_whitespace=True,
50
+ clean_header_footer=False,
51
+ split_by="word",
52
+ split_length=100,
53
+ split_respect_sentence_boundary=True,)
54
+ docs = preprocessor.process([doc_txt])
55
+ return docs
56
+
57
+ def start_ex_haystack(documents):
58
+ ex_document_store = InMemoryDocumentStore()
59
+ ex_document_store.write_documents(documents)
60
+ retriever = TfidfRetriever(document_store=ex_document_store)
61
+ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
62
+ pipe = ExtractiveQAPipeline(reader, retriever)
63
+ return pipe
64
+
65
+ docs = preprocess_txt_doc("monopoly_text_v1.txt")
66
+ ex_pipeline = start_ex_haystack(docs)
67
 
68
+ def predict_extract(question):
69
+ prediction = ex_pipeline.run(question)
70
+ possible_answers = ""
71
+ for i,a in enumerate(prediction["answers"]):
72
+ possible_answers = possible_answers +str(i) + ":" + a.answer + "\n"
73
+ return possible_answers
74
 
 
75
  # Gradio App section
76
  input_question =gr.inputs.Textbox(label="enter your monopoly question here")
77
  response = "text"
 
84
  examples=examples,
85
  title="Monopoly FAQ Semantic Search")
86
 
87
+ # extractive interface
88
+ mon_ex = gr.Interface(
89
+ fn=predict_extract,
90
+ inputs=input_question,
91
+ outputs=response,
92
+ examples=examples,
93
+ title="Monopoly Extractive QA Search")
94
+
95
+ gradio.TabbedInterface([mon_faq,mon_ex]).launch()