Spaces:
Runtime error
Runtime error
File size: 3,538 Bytes
421b7df 8156e34 d8ad5ef 421b7df 235de1f 421b7df 235de1f 421b7df d8ad5ef 421b7df 8156e34 d8ad5ef 421b7df 8156e34 f291d33 f821944 235de1f d8ad5ef 235de1f d8ad5ef 421b7df 235de1f 0ba08cd 40195d1 b8e7924 078de3e 8156e34 d8ad5ef da8ee7b d8ad5ef 82a059e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
import pandas as pd
from haystack.schema import Answer
from haystack.document_stores import InMemoryDocumentStore
from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, TextConverter, PreProcessor
from haystack.utils import print_answers
from haystack.utils import convert_files_to_docs
import logging
# FAQ Haystack function calls
def start_haystack():
document_store = InMemoryDocumentStore(index="document", embedding_field='embedding', embedding_dim=384, similarity='cosine')
retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
load_data_to_store(document_store,retriever)
pipeline = FAQPipeline(retriever=retriever)
return pipeline
def load_data_to_store(document_store, retriever):
df = pd.read_csv('monopoly_qa-v1.csv')
questions = list(df.Question)
df['embedding'] = retriever.embed_queries(texts=questions)
df = df.rename(columns={"Question":"content","Answer":"answer"})
df.drop('link to source (to prevent duplicate sources)',axis=1, inplace=True)
dicts = df.to_dict(orient="records")
document_store.write_documents(dicts)
faq_pipeline = start_haystack()
def predict_faq(question):
prediction = faq_pipeline.run(question)
answer = prediction["answers"][0].meta
faq_response = "FAQ Question: " + answer["query"] + "\n"+"Answer: " + answer["answer"]
return faq_response
# Extractive QA functions
## preprocess monopoly rules
def preprocess_txt_doc(fpath):
converter = TextConverter(remove_numeric_tables=True, valid_languages=["en"])
doc_txt = converter.convert(file_path=fpath, meta=None)[0]
preprocessor = PreProcessor(
clean_empty_lines=True,
clean_whitespace=True,
clean_header_footer=False,
split_by="word",
split_length=100,
split_respect_sentence_boundary=True,)
docs = preprocessor.process([doc_txt])
return docs
def start_ex_haystack(documents):
ex_document_store = InMemoryDocumentStore()
ex_document_store.write_documents(documents)
retriever = TfidfRetriever(document_store=ex_document_store)
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
pipe = ExtractiveQAPipeline(reader, retriever)
return pipe
docs = preprocess_txt_doc("monopoly_text_v1.txt")
ex_pipeline = start_ex_haystack(docs)
def predict_extract(question):
prediction = ex_pipeline.run(question)
possible_answers = ""
for i,a in enumerate(prediction["answers"]):
possible_answers = possible_answers +str(i) + ":" + a.answer + "\n"
return possible_answers
# Gradio App section
input_question =gr.inputs.Textbox(label="enter your monopoly question here")
response = "text"
examples = ["how much cash do we get to start with?", "at what point can I buy houses?", "what happens when I land on free parking?"]
mon_faq = gr.Interface(
fn=predict_faq,
inputs=input_question,
outputs=response,
examples=examples,
title="Monopoly FAQ Semantic Search")
# extractive interface
mon_ex = gr.Interface(
fn=predict_extract,
inputs=input_question,
outputs=response,
examples=examples,
title="Monopoly Extractive QA Search")
gr.TabbedInterface([mon_faq,mon_ex],["FAQ Search","Extractive QA"]).launch() |