Spaces:

Timjo88
/

monopoly-qa-semantic-search

Runtime error

App Files Files Community

monopoly-qa-semantic-search / app.py

Timjo88

Update app.py

da8ee7b over 2 years ago

raw

history blame contribute delete

3.54 kB

	import gradio as gr
	import pandas as pd

	from haystack.schema import Answer
	from haystack.document_stores import InMemoryDocumentStore
	from haystack.pipelines import FAQPipeline, ExtractiveQAPipeline
	from haystack.nodes import EmbeddingRetriever, TfidfRetriever, FARMReader, TextConverter, PreProcessor
	from haystack.utils import print_answers
	from haystack.utils import convert_files_to_docs
	import logging

	# FAQ Haystack function calls

	def start_haystack():
	document_store = InMemoryDocumentStore(index="document", embedding_field='embedding', embedding_dim=384, similarity='cosine')
	retriever = EmbeddingRetriever(document_store=document_store, embedding_model='sentence-transformers/all-MiniLM-L6-v2', use_gpu=True, top_k=1)
	load_data_to_store(document_store,retriever)
	pipeline = FAQPipeline(retriever=retriever)
	return pipeline

	def load_data_to_store(document_store, retriever):
	df = pd.read_csv('monopoly_qa-v1.csv')
	questions = list(df.Question)
	df['embedding'] = retriever.embed_queries(texts=questions)
	df = df.rename(columns={"Question":"content","Answer":"answer"})
	df.drop('link to source (to prevent duplicate sources)',axis=1, inplace=True)

	dicts = df.to_dict(orient="records")
	document_store.write_documents(dicts)

	faq_pipeline = start_haystack()

	def predict_faq(question):
	prediction = faq_pipeline.run(question)
	answer = prediction["answers"][0].meta
	faq_response = "FAQ Question: " + answer["query"] + "\n"+"Answer: " + answer["answer"]
	return faq_response

	# Extractive QA functions

	## preprocess monopoly rules

	def preprocess_txt_doc(fpath):

	converter = TextConverter(remove_numeric_tables=True, valid_languages=["en"])
	doc_txt = converter.convert(file_path=fpath, meta=None)[0]
	preprocessor = PreProcessor(
	clean_empty_lines=True,
	clean_whitespace=True,
	clean_header_footer=False,
	split_by="word",
	split_length=100,
	split_respect_sentence_boundary=True,)
	docs = preprocessor.process([doc_txt])
	return docs

	def start_ex_haystack(documents):
	ex_document_store = InMemoryDocumentStore()
	ex_document_store.write_documents(documents)
	retriever = TfidfRetriever(document_store=ex_document_store)
	reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
	pipe = ExtractiveQAPipeline(reader, retriever)
	return pipe

	docs = preprocess_txt_doc("monopoly_text_v1.txt")
	ex_pipeline = start_ex_haystack(docs)

	def predict_extract(question):
	prediction = ex_pipeline.run(question)
	possible_answers = ""
	for i,a in enumerate(prediction["answers"]):
	possible_answers = possible_answers +str(i) + ":" + a.answer + "\n"
	return possible_answers

	# Gradio App section
	input_question =gr.inputs.Textbox(label="enter your monopoly question here")
	response = "text"
	examples = ["how much cash do we get to start with?", "at what point can I buy houses?", "what happens when I land on free parking?"]

	mon_faq = gr.Interface(
	fn=predict_faq,
	inputs=input_question,
	outputs=response,
	examples=examples,
	title="Monopoly FAQ Semantic Search")

	# extractive interface
	mon_ex = gr.Interface(
	fn=predict_extract,
	inputs=input_question,
	outputs=response,
	examples=examples,
	title="Monopoly Extractive QA Search")

	gr.TabbedInterface([mon_faq,mon_ex],["FAQ Search","Extractive QA"]).launch()