etweedy commited on
Commit
a11b5c0
·
1 Parent(s): ef183f2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -54
app.py CHANGED
@@ -5,48 +5,37 @@ from torch.utils.data import DataLoader
5
  from transformers import (
6
  AutoTokenizer,
7
  AutoModelForQuestionAnswering,
8
- TrainingArguments,
9
- Trainer,
10
- default_data_collator,
11
  )
 
12
 
13
- # Load custom functions
14
- from lib.utils import preprocess_examples, make_predictions, get_examples
 
15
 
16
- # Set mps or cuda device if available
17
- if torch.backends.mps.is_available():
18
- device = "mps"
19
- elif torch.cuda.is_available():
20
- device = "cuda"
21
- else:
22
- device = "cpu"
23
-
24
- # Initialize session state variables
25
- if 'response' not in st.session_state:
26
- st.session_state['response'] = ''
27
- if 'context' not in st.session_state:
28
- st.session_state['context'] = ''
29
- if 'question' not in st.session_state:
30
- st.session_state['question'] = ''
31
-
32
  # Build trainer using model and tokenizer from Hugging Face repo
33
  @st.cache_resource(show_spinner=False)
34
- def get_model():
35
  """
36
  Load model and tokenizer from 🤗 repo
 
37
  Parameters: None
38
  -----------
39
  Returns:
40
  --------
41
- model : transformers.AutoModelForQuestionAnswering
42
- The fine-tuned Q&A model
43
- tokenizer : transformers.AutoTokenizer
44
- The model's pre-trained tokenizer
45
  """
46
  repo_id = 'etweedy/roberta-base-squad-v2'
47
  model = AutoModelForQuestionAnswering.from_pretrained(repo_id)
48
  tokenizer = AutoTokenizer.from_pretrained(repo_id)
49
- return model, tokenizer
 
 
 
 
 
 
50
 
51
  def fill_in_example(i):
52
  """
@@ -64,9 +53,52 @@ def clear_boxes():
64
  st.session_state['question'] = ''
65
  st.session_state['context'] = ''
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  # Retrieve stored model
68
  with st.spinner('Loading the model...'):
69
- model, tokenizer = get_model()
 
 
 
 
 
 
 
70
 
71
  # Intro text
72
  st.header('RoBERTa Q&A model')
@@ -111,14 +143,15 @@ Please type or paste a context paragraph and question you'd like to ask about it
111
  Alternatively, you can try an example by clicking one of the buttons below:
112
  ''')
113
 
114
- # Grab example question-context pairs from csv file
115
- ex_q, ex_c = get_examples()
116
-
117
  # Generate containers in order
118
  example_container = st.container()
119
  input_container = st.container()
120
  response_container = st.container()
121
 
 
 
 
 
122
  # Populate example button container
123
  with example_container:
124
  ex_cols = st.columns(len(ex_q)+1)
@@ -164,29 +197,18 @@ with input_container:
164
  st.session_state['question'] = question
165
  st.session_state['context'] = context
166
  with st.spinner('Generating response...'):
167
- # Generate dataset from input example
168
- data_raw = Dataset.from_dict(
169
- {
170
- 'id':[0],
171
- 'context':[st.session_state['context']],
172
- 'question':[st.session_state['question']],
173
- }
174
- )
175
- # Tokenize and preprocess dataset
176
- data_proc = data_raw.map(
177
- preprocess_examples,
178
- remove_columns = data_raw.column_names,
179
- batched = True,
180
- fn_kwargs = {
181
- 'tokenizer':tokenizer,
182
- }
183
- )
184
- # Make answer prediction with model
185
- predicted_answers = make_predictions(model, tokenizer,
186
- data_proc, data_raw,
187
- n_best = 20)
188
- answer = predicted_answers[0]['prediction_text']
189
- confidence = predicted_answers[0]['confidence']
190
  # Update response in session state
191
  st.session_state['response'] = f"""
192
  Answer: {answer}\n
 
5
  from transformers import (
6
  AutoTokenizer,
7
  AutoModelForQuestionAnswering,
8
+ pipeline,
 
 
9
  )
10
+ import pandas as pd
11
 
12
+ ########################
13
+ ### Helper functions ###
14
+ ########################
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  # Build trainer using model and tokenizer from Hugging Face repo
17
  @st.cache_resource(show_spinner=False)
18
+ def get_pipeline():
19
  """
20
  Load model and tokenizer from 🤗 repo
21
+ and build pipeline
22
  Parameters: None
23
  -----------
24
  Returns:
25
  --------
26
+ qa_pipeline : transformers.QuestionAnsweringPipeline
27
+ The question answering pipeline object
 
 
28
  """
29
  repo_id = 'etweedy/roberta-base-squad-v2'
30
  model = AutoModelForQuestionAnswering.from_pretrained(repo_id)
31
  tokenizer = AutoTokenizer.from_pretrained(repo_id)
32
+ qa_pipeline = pipeline(
33
+ task = 'question-answering',
34
+ model=repo_id,
35
+ tokenizer=repo_id,
36
+ handle_impossible_answer = True
37
+ )
38
+ return qa_pipeline
39
 
40
  def fill_in_example(i):
41
  """
 
53
  st.session_state['question'] = ''
54
  st.session_state['context'] = ''
55
 
56
+ def get_examples():
57
+ """
58
+ Retrieve pre-made examples from a .csv file
59
+ Parameters: None
60
+ -----------
61
+ Returns:
62
+ --------
63
+ questions, contexts : list, list
64
+ Lists of examples of corresponding question-context pairs
65
+
66
+ """
67
+ examples = pd.read_csv('examples.csv')
68
+ questions = list(examples['question'])
69
+ contexts = list(examples['context'])
70
+ return questions, contexts
71
+
72
+ #############
73
+ ### Setup ###
74
+ #############
75
+
76
+ # Set mps or cuda device if available
77
+ if torch.backends.mps.is_available():
78
+ device = "mps"
79
+ elif torch.cuda.is_available():
80
+ device = "cuda"
81
+ else:
82
+ device = "cpu"
83
+
84
+ # Initialize session state variables
85
+ if 'response' not in st.session_state:
86
+ st.session_state['response'] = ''
87
+ if 'context' not in st.session_state:
88
+ st.session_state['context'] = ''
89
+ if 'question' not in st.session_state:
90
+ st.session_state['question'] = ''
91
+
92
  # Retrieve stored model
93
  with st.spinner('Loading the model...'):
94
+ qa_pipeline = get_pipeline()
95
+
96
+ # Grab example question-context pairs from csv file
97
+ ex_q, ex_c = get_examples()
98
+
99
+ ###################
100
+ ### App content ###
101
+ ###################
102
 
103
  # Intro text
104
  st.header('RoBERTa Q&A model')
 
143
  Alternatively, you can try an example by clicking one of the buttons below:
144
  ''')
145
 
 
 
 
146
  # Generate containers in order
147
  example_container = st.container()
148
  input_container = st.container()
149
  response_container = st.container()
150
 
151
+ ###########################
152
+ ### Populate containers ###
153
+ ###########################
154
+
155
  # Populate example button container
156
  with example_container:
157
  ex_cols = st.columns(len(ex_q)+1)
 
197
  st.session_state['question'] = question
198
  st.session_state['context'] = context
199
  with st.spinner('Generating response...'):
200
+ # Generate dictionary from inputs
201
+ query = {
202
+ 'context':st.session_state['context'],
203
+ 'question':st.session_state['question'],
204
+ }
205
+ # Pass to QA pipeline
206
+ response = qa_pipeline(**query)
207
+ answer = response['answer']
208
+ confidence = response['score']
209
+ # Reformat empty answer to message
210
+ if answer == '':
211
+ answer = "I don't have an answer based on the context provided."
 
 
 
 
 
 
 
 
 
 
 
212
  # Update response in session state
213
  st.session_state['response'] = f"""
214
  Answer: {answer}\n