Ilona Kovaleva commited on
Commit
7a48124
·
1 Parent(s): a82939c

✨feature: Add first version

Browse files
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dill
2
+ import streamlit as st
3
+ import os
4
+ from haystack.utils import fetch_archive_from_http, clean_wiki_text, convert_files_to_docs
5
+ from haystack.schema import Answer
6
+ from haystack.document_stores import InMemoryDocumentStore
7
+ from haystack.pipelines import ExtractiveQAPipeline
8
+ from haystack.nodes import FARMReader, BM25Retriever
9
+ import logging
10
+ from markdown import markdown
11
+ from annotated_text import annotation
12
+ from streamlit_lottie import st_lottie
13
+
14
+ st.set_page_config(page_title="QA-project", page_icon="📇")
15
+ os.environ['TOKENIZERS_PARALLELISM'] = "false"
16
+ DATA_DIR = './dataset'
17
+ DOCS_PATH = os.path.join(DATA_DIR, 'all_docs_36838.pkl')
18
+ LOTTIE_PATH = 'img/108423-search-for-documents.json'
19
+ PROG_TITLE = "QA project Demo"
20
+ # Adjust to a question that you would like users to see in the search bar when they load the UI:
21
+ DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "What's the capital of France?")
22
+ DEFAULT_ANSWER_AT_STARTUP = os.getenv("DEFAULT_ANSWER_AT_STARTUP", "Paris")
23
+
24
+ def place_header_center(text, lottie_data):
25
+ cgap1, ctitle, cgap2 = st.columns([3, 3, 1])
26
+ with cgap1:
27
+ st_lottie(lottie_data, height=150)
28
+ with ctitle:
29
+ st.title(text)
30
+ with cgap2:
31
+ st.write("")
32
+
33
+
34
+ @st.experimental_memo
35
+ def get_lottie(path):
36
+ with open(path, 'r', errors='ignore') as f:
37
+ lottie_data = json.load(f)
38
+ return lottie_data
39
+
40
+
41
+ def load_and_write_data(document_store):
42
+
43
+ with open(DOCS_PATH, "rb") as f:
44
+ docs = dill.load(f)
45
+
46
+ document_store.write_documents(docs)
47
+
48
+
49
+ # Haystack Components
50
+ # @st.cache(allow_output_mutation=True)
51
+ # def start_haystack():
52
+ document_store = InMemoryDocumentStore(use_bm25=True)
53
+ load_and_write_data(document_store)
54
+ retriever = BM25Retriever(document_store=document_store)
55
+ reader = FARMReader(model_name_or_path="mrm8488/RuPERTa-base-finetuned-squadv1",
56
+ use_gpu=False,
57
+ num_processes=1)
58
+ pipeline = ExtractiveQAPipeline(reader, retriever)
59
+
60
+
61
+ def set_state_if_absent(key, value):
62
+ if key not in st.session_state:
63
+ st.session_state[key] = value
64
+
65
+ set_state_if_absent("question", DEFAULT_QUESTION_AT_STARTUP)
66
+ set_state_if_absent("answer", DEFAULT_ANSWER_AT_STARTUP)
67
+ set_state_if_absent("results", None)
68
+
69
+
70
+ def reset_results(*args):
71
+ st.session_state.results = None
72
+
73
+ # Streamlit App
74
+ lottie_data = get_lottie(LOTTIE_PATH)
75
+ place_header_center(PROG_TITLE, lottie_data)
76
+
77
+ st.markdown("""
78
+ This QA demo uses a [Haystack Extractive QA Pipeline](https://haystack.deepset.ai/components/ready-made-pipelines#extractiveqapipeline) with
79
+ an [InMemoryDocumentStore](https://haystack.deepset.ai/components/document-store) which contains documents about different program modules
80
+ Go ahead and ask questions about the program modules functionality!
81
+ """, unsafe_allow_html=True)
82
+
83
+ question = st.text_input("", value=st.session_state.question, max_chars=100, on_change=reset_results)
84
+
85
+
86
+ def ask_question(question):
87
+ prediction = pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
88
+ results = []
89
+ for answer in prediction["answers"]:
90
+ answer = answer.to_dict()
91
+ if answer["answer"]:
92
+ results.append(
93
+ {
94
+ "context": "..." + answer["context"] + "...",
95
+ "answer": answer["answer"],
96
+ "relevance": round(answer["score"] * 100, 2),
97
+ "offset_start_in_doc": answer["offsets_in_document"][0]["start"],
98
+ }
99
+ )
100
+ else:
101
+ results.append(
102
+ {
103
+ "context": None,
104
+ "answer": None,
105
+ "relevance": round(answer["score"] * 100, 2),
106
+ }
107
+ )
108
+ return results
109
+
110
+
111
+ if question:
112
+ with st.spinner("🕰️    Performing semantic search on program modules..."):
113
+ try:
114
+ msg = 'Asked ' + question
115
+ logging.info(msg)
116
+ st.session_state.results = ask_question(question)
117
+ except Exception as e:
118
+ logging.exception(e)
119
+
120
+
121
+ if st.session_state.results:
122
+ st.write('## Top Results')
123
+ for count, result in enumerate(st.session_state.results):
124
+ if result["answer"]:
125
+ answer, context = result["answer"], result["context"]
126
+ start_idx = context.find(answer)
127
+ end_idx = start_idx + len(answer)
128
+ st.write(
129
+ markdown(context[:start_idx] + str(annotation(body=answer, label="ANSWER", background="#ff700f", color='#ffffff')) + context[end_idx:]),
130
+ unsafe_allow_html=True,
131
+ )
132
+ st.markdown(f"**Relevance:** {result['relevance']}")
133
+ else:
134
+ st.info(
135
+ "🤔    Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
136
+ )
dataset/all_docs_36838.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f030eec06564ac556d62585b179f3802c5afbcd0fc909bf66424e1cc94948463
3
+ size 110792826
eval_labels_example.csv ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "Question Text";"Answer"
2
+ "What is the capital of France?";"Paris"
3
+ "What's the tallest mountain in Africa?";"Mount Kilimanjaro"
4
+ "What's the climate of Beijing?";"monsoon-influenced humid continental"
5
+ "What's the longest river of Europe?";"The Volga"
6
+ "What's the deepest lake in the world?";"Lake Bajkal"
7
+ "How many people live in the capital of the US?";"689,545"
8
+ "Which Chinese city is the largest?";"Shanghai"
9
+ "What's the type of government of the UK?";"unitary parliamentary democracy and constitutional monarchy"
10
+ "What currency is used in Hungary?";"Hungarian forint"
11
+ "In which city is the Louvre?";"Paris"
12
+ "Who is the current king of Spain?";"Felipe VI"
13
+ "Which countries border with Mongolia?";"Russia and China"
14
+ "What's the current name of Swaziland?";"Eswatini"
img/108423-search-for-documents.json ADDED
The diff for this file is too large to render. See raw diff
 
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ poppler-utils
2
+ xpdf
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ dill
2
+ farm-haystack==1.4.0
3
+ validators==0.18.2
4
+ markdown
5
+ streamlit==1.17.0
6
+ st-annotated-text
7
+ streamlit-lottie