etweedy commited on
Commit
e77a114
·
1 Parent(s): ff48e26

Upload 8 files

Browse files
app.py CHANGED
@@ -9,7 +9,7 @@ from transformers import (
9
  Trainer,
10
  default_data_collator,
11
  )
12
- from lib.utils import preprocess_examples, make_predictions
13
 
14
  if torch.backends.mps.is_available():
15
  device = "mps"
@@ -40,8 +40,33 @@ def get_model():
40
  tokenizer = AutoTokenizer.from_pretrained(repo_id)
41
  return model, tokenizer
42
 
 
 
 
 
 
 
 
 
 
 
43
  with st.spinner('Loading the model...'):
44
  model, tokenizer = get_model()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  st.header('RoBERTa Q&A model')
47
 
@@ -51,8 +76,9 @@ This app demonstrates the answer-retrieval capabilities of a finetuned RoBERTa (
51
  Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
52
 
53
  Please type or paste a context paragraph and question you'd like to ask about it. The model will attempt to answer the question, or otherwise will report that it cannot.
54
- ''')
55
 
 
 
56
  input_container = st.container()
57
  st.divider()
58
  response_container = st.container()
@@ -62,27 +88,29 @@ with input_container:
62
  with st.form(key='input_form',clear_on_submit=False):
63
  context = st.text_area(
64
  label='Context',
65
- value='',
66
  key='context_field',
67
  label_visibility='hidden',
68
  placeholder='Enter your context paragraph here.',
69
  height=300,
70
  )
 
71
  question = st.text_input(
72
  label='Question',
73
- value='',
74
  key='question_field',
75
  label_visibility='hidden',
76
  placeholder='Enter your question here.',
77
  )
 
78
  query_submitted = st.form_submit_button("Submit")
79
  if query_submitted:
80
  with st.spinner('Generating response...'):
81
  data_raw = Dataset.from_dict(
82
  {
83
  'id':[0],
84
- 'context':[context],
85
- 'question':[question]
86
  }
87
  )
88
  data_proc = data_raw.map(
 
9
  Trainer,
10
  default_data_collator,
11
  )
12
+ from lib.utils import preprocess_examples, make_predictions, get_examples
13
 
14
  if torch.backends.mps.is_available():
15
  device = "mps"
 
40
  tokenizer = AutoTokenizer.from_pretrained(repo_id)
41
  return model, tokenizer
42
 
43
+ def fill_in_example(i):
44
+ st.session_state['response'] = ''
45
+ st.session_state['question'] = ex_q[i]
46
+ st.session_state['context'] = ex_c[i]
47
+
48
+ def clear_boxes():
49
+ st.session_state['response'] = ''
50
+ st.session_state['question'] = ''
51
+ st.session_state['context'] = ''
52
+
53
  with st.spinner('Loading the model...'):
54
  model, tokenizer = get_model()
55
+
56
+ ex_q, ex_c = get_examples()
57
+
58
+ for i in range(len(ex_q)):
59
+ st.sidebar.button(
60
+ label = f'Try example {i+1}',
61
+ key = f'ex_button_{i+1}',
62
+ on_click = fill_in_example,
63
+ args=(i,),
64
+ )
65
+ st.sidebar.button(
66
+ label = 'Clear boxes',
67
+ key = 'clear_button',
68
+ on_click = clear_boxes,
69
+ )
70
 
71
  st.header('RoBERTa Q&A model')
72
 
 
76
  Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
77
 
78
  Please type or paste a context paragraph and question you'd like to ask about it. The model will attempt to answer the question, or otherwise will report that it cannot.
 
79
 
80
+ Alternatively, you can try some of the examples provided on the sidebar to the left.
81
+ ''')
82
  input_container = st.container()
83
  st.divider()
84
  response_container = st.container()
 
88
  with st.form(key='input_form',clear_on_submit=False):
89
  context = st.text_area(
90
  label='Context',
91
+ value=st.session_state['context'],
92
  key='context_field',
93
  label_visibility='hidden',
94
  placeholder='Enter your context paragraph here.',
95
  height=300,
96
  )
97
+ st.session_state['context'] = context
98
  question = st.text_input(
99
  label='Question',
100
+ value=st.session_state['question'],
101
  key='question_field',
102
  label_visibility='hidden',
103
  placeholder='Enter your question here.',
104
  )
105
+ st.session_state['question'] = question
106
  query_submitted = st.form_submit_button("Submit")
107
  if query_submitted:
108
  with st.spinner('Generating response...'):
109
  data_raw = Dataset.from_dict(
110
  {
111
  'id':[0],
112
+ 'context':[st.session_state['context']],
113
+ 'question':[st.session_state['question']],
114
  }
115
  )
116
  data_proc = data_raw.map(
examples.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ question,context
2
+ What did Oppenheimer remark abotut the explosion?,"Oppenheimer attended Harvard University, where he earned a bachelor's degree in chemistry in 1925. He studied physics at the University of Cambridge and University of Göttingen, where he received his PhD in 1927. He held academic positions at the University of California, Berkeley, and the California Institute of Technology, and made significant contributions to theoretical physics, including in quantum mechanics and nuclear physics. During World War II, he was recruited to work on the Manhattan Project, and in 1943 was appointed as director of the Los Alamos Laboratory in New Mexico, tasked with developing the weapons. Oppenheimer's leadership and scientific expertise were instrumental in the success of the project. He was among those who observed the Trinity test on July 16, 1945, in which the first atomic bomb was successfully detonated. He later remarked that the explosion brought to his mind words from the Hindu scripture Bhagavad Gita: ""Now I am become Death, the destroyer of worlds."" In August 1945, the atomic bombs were used on the Japanese cities of Hiroshima and Nagasaki, the only use of nuclear weapons in war."
3
+ What was the phrase on the billboard which inspired the Twinkies name?,"Twinkies were invented on April 6, 1930, by Canadian-born baker James Alexander Dewar for the Continental Baking Company in Schiller Park, Illinois. Realizing that several machines used for making cream-filled strawberry shortcake sat idle when strawberries were out of season, Dewar conceived a snack cake filled with banana cream, which he dubbed the Twinkie. Ritchy Koph said he came up with the name when he saw a billboard in St. Louis for ""Twinkle Toe Shoes"". During World War II, bananas were rationed, and the company was forced to switch to vanilla cream. This change proved popular, and banana-cream Twinkies were not widely re-introduced. The original flavor was occasionally found in limited time only promotions, but the company used vanilla cream for most Twinkies. In 1988, Fruit and Cream Twinkies were introduced with a strawberry filling swirled into the cream. The product was soon dropped. Vanilla's dominance over banana flavoring was challenged in 2005, following a month-long promotion of the movie King Kong. Hostess saw its Twinkie sales rise 20 percent during the promotion, and in 2007 restored the banana-cream Twinkie to its snack lineup although they are now made with 2% banana purée."
4
+ What happened in November 2020?,"""Baby Shark"" is a children's song associated with a dance involving hand movements that originated as a campfire song dating back to at least the 20th century. In 2016, ""Baby Shark"" became very popular when Pinkfong, a South Korean entertainment company, released a version of the song with a YouTube music video that went viral across social media, online video, and radio. In January 2022, it became the first YouTube video to reach 10 billion views. In November 2020, Pinkfong's version became the most-viewed YouTube video of all time, with over 12 billion views as of April 2023. ""Baby Shark"" originated as a campfire song or chant. The original song dates back to at least the 20th century, potentially created by camp counselors inspired by the movie Jaws. In the chant, each member of a family of sharks is introduced, with campers using their hands to imitate the sharks' jaws. Different versions of the song have the sharks hunting fish, eating a sailor, or killing people who then go to heaven. Various entities have copyrighted original videos and sound recordings of the song, and some have trademarked merchandise based on their versions. However, according to The New York Times, the underlying song and characters are believed to be in the public domain."
lib/.DS_Store CHANGED
Binary files a/lib/.DS_Store and b/lib/.DS_Store differ
 
lib/.ipynb_checkpoints/utils-checkpoint.py CHANGED
@@ -4,6 +4,7 @@ import collections
4
  import torch
5
  from torch.utils.data import DataLoader
6
  from transformers import default_data_collator
 
7
 
8
  def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
9
  """
@@ -112,7 +113,7 @@ def make_predictions(model,tokenizer,inputs,examples,
112
 
113
  if torch.backends.mps.is_available():
114
  device = "mps"
115
- elif torch.cuda.us_available():
116
  device = "cuda"
117
  else:
118
  device = "cpu"
@@ -185,4 +186,11 @@ def make_predictions(model,tokenizer,inputs,examples,
185
  for pred in predicted_answers:
186
  if pred['prediction_text'] == '':
187
  pred['prediction_text'] = "I don't have an answer based on the context provided."
188
- return predicted_answers
 
 
 
 
 
 
 
 
4
  import torch
5
  from torch.utils.data import DataLoader
6
  from transformers import default_data_collator
7
+ import pandas as pd
8
 
9
  def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
10
  """
 
113
 
114
  if torch.backends.mps.is_available():
115
  device = "mps"
116
+ elif torch.cuda.is_available():
117
  device = "cuda"
118
  else:
119
  device = "cpu"
 
186
  for pred in predicted_answers:
187
  if pred['prediction_text'] == '':
188
  pred['prediction_text'] = "I don't have an answer based on the context provided."
189
+ return predicted_answers
190
+
191
+ def get_examples():
192
+ examples = pd.read_csv('examples.csv')
193
+ questions = list(examples['question'])
194
+ contexts = list(examples['context'])
195
+ return questions, contexts
196
+
lib/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/lib/__pycache__/__init__.cpython-310.pyc and b/lib/__pycache__/__init__.cpython-310.pyc differ
 
lib/__pycache__/utils.cpython-310.pyc CHANGED
Binary files a/lib/__pycache__/utils.cpython-310.pyc and b/lib/__pycache__/utils.cpython-310.pyc differ
 
lib/utils.py CHANGED
@@ -4,6 +4,7 @@ import collections
4
  import torch
5
  from torch.utils.data import DataLoader
6
  from transformers import default_data_collator
 
7
 
8
  def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
9
  """
@@ -185,4 +186,11 @@ def make_predictions(model,tokenizer,inputs,examples,
185
  for pred in predicted_answers:
186
  if pred['prediction_text'] == '':
187
  pred['prediction_text'] = "I don't have an answer based on the context provided."
188
- return predicted_answers
 
 
 
 
 
 
 
 
4
  import torch
5
  from torch.utils.data import DataLoader
6
  from transformers import default_data_collator
7
+ import pandas as pd
8
 
9
  def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
10
  """
 
186
  for pred in predicted_answers:
187
  if pred['prediction_text'] == '':
188
  pred['prediction_text'] = "I don't have an answer based on the context provided."
189
+ return predicted_answers
190
+
191
+ def get_examples():
192
+ examples = pd.read_csv('examples.csv')
193
+ questions = list(examples['question'])
194
+ contexts = list(examples['context'])
195
+ return questions, contexts
196
+