Spaces:
Runtime error
Runtime error
Upload 8 files
Browse files- app.py +34 -6
- examples.csv +4 -0
- lib/.DS_Store +0 -0
- lib/.ipynb_checkpoints/utils-checkpoint.py +10 -2
- lib/__pycache__/__init__.cpython-310.pyc +0 -0
- lib/__pycache__/utils.cpython-310.pyc +0 -0
- lib/utils.py +9 -1
app.py
CHANGED
@@ -9,7 +9,7 @@ from transformers import (
|
|
9 |
Trainer,
|
10 |
default_data_collator,
|
11 |
)
|
12 |
-
from lib.utils import preprocess_examples, make_predictions
|
13 |
|
14 |
if torch.backends.mps.is_available():
|
15 |
device = "mps"
|
@@ -40,8 +40,33 @@ def get_model():
|
|
40 |
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
41 |
return model, tokenizer
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
with st.spinner('Loading the model...'):
|
44 |
model, tokenizer = get_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
st.header('RoBERTa Q&A model')
|
47 |
|
@@ -51,8 +76,9 @@ This app demonstrates the answer-retrieval capabilities of a finetuned RoBERTa (
|
|
51 |
Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
|
52 |
|
53 |
Please type or paste a context paragraph and question you'd like to ask about it. The model will attempt to answer the question, or otherwise will report that it cannot.
|
54 |
-
''')
|
55 |
|
|
|
|
|
56 |
input_container = st.container()
|
57 |
st.divider()
|
58 |
response_container = st.container()
|
@@ -62,27 +88,29 @@ with input_container:
|
|
62 |
with st.form(key='input_form',clear_on_submit=False):
|
63 |
context = st.text_area(
|
64 |
label='Context',
|
65 |
-
value='',
|
66 |
key='context_field',
|
67 |
label_visibility='hidden',
|
68 |
placeholder='Enter your context paragraph here.',
|
69 |
height=300,
|
70 |
)
|
|
|
71 |
question = st.text_input(
|
72 |
label='Question',
|
73 |
-
value='',
|
74 |
key='question_field',
|
75 |
label_visibility='hidden',
|
76 |
placeholder='Enter your question here.',
|
77 |
)
|
|
|
78 |
query_submitted = st.form_submit_button("Submit")
|
79 |
if query_submitted:
|
80 |
with st.spinner('Generating response...'):
|
81 |
data_raw = Dataset.from_dict(
|
82 |
{
|
83 |
'id':[0],
|
84 |
-
'context':[context],
|
85 |
-
'question':[question]
|
86 |
}
|
87 |
)
|
88 |
data_proc = data_raw.map(
|
|
|
9 |
Trainer,
|
10 |
default_data_collator,
|
11 |
)
|
12 |
+
from lib.utils import preprocess_examples, make_predictions, get_examples
|
13 |
|
14 |
if torch.backends.mps.is_available():
|
15 |
device = "mps"
|
|
|
40 |
tokenizer = AutoTokenizer.from_pretrained(repo_id)
|
41 |
return model, tokenizer
|
42 |
|
43 |
+
def fill_in_example(i):
|
44 |
+
st.session_state['response'] = ''
|
45 |
+
st.session_state['question'] = ex_q[i]
|
46 |
+
st.session_state['context'] = ex_c[i]
|
47 |
+
|
48 |
+
def clear_boxes():
|
49 |
+
st.session_state['response'] = ''
|
50 |
+
st.session_state['question'] = ''
|
51 |
+
st.session_state['context'] = ''
|
52 |
+
|
53 |
with st.spinner('Loading the model...'):
|
54 |
model, tokenizer = get_model()
|
55 |
+
|
56 |
+
ex_q, ex_c = get_examples()
|
57 |
+
|
58 |
+
for i in range(len(ex_q)):
|
59 |
+
st.sidebar.button(
|
60 |
+
label = f'Try example {i+1}',
|
61 |
+
key = f'ex_button_{i+1}',
|
62 |
+
on_click = fill_in_example,
|
63 |
+
args=(i,),
|
64 |
+
)
|
65 |
+
st.sidebar.button(
|
66 |
+
label = 'Clear boxes',
|
67 |
+
key = 'clear_button',
|
68 |
+
on_click = clear_boxes,
|
69 |
+
)
|
70 |
|
71 |
st.header('RoBERTa Q&A model')
|
72 |
|
|
|
76 |
Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
|
77 |
|
78 |
Please type or paste a context paragraph and question you'd like to ask about it. The model will attempt to answer the question, or otherwise will report that it cannot.
|
|
|
79 |
|
80 |
+
Alternatively, you can try some of the examples provided on the sidebar to the left.
|
81 |
+
''')
|
82 |
input_container = st.container()
|
83 |
st.divider()
|
84 |
response_container = st.container()
|
|
|
88 |
with st.form(key='input_form',clear_on_submit=False):
|
89 |
context = st.text_area(
|
90 |
label='Context',
|
91 |
+
value=st.session_state['context'],
|
92 |
key='context_field',
|
93 |
label_visibility='hidden',
|
94 |
placeholder='Enter your context paragraph here.',
|
95 |
height=300,
|
96 |
)
|
97 |
+
st.session_state['context'] = context
|
98 |
question = st.text_input(
|
99 |
label='Question',
|
100 |
+
value=st.session_state['question'],
|
101 |
key='question_field',
|
102 |
label_visibility='hidden',
|
103 |
placeholder='Enter your question here.',
|
104 |
)
|
105 |
+
st.session_state['question'] = question
|
106 |
query_submitted = st.form_submit_button("Submit")
|
107 |
if query_submitted:
|
108 |
with st.spinner('Generating response...'):
|
109 |
data_raw = Dataset.from_dict(
|
110 |
{
|
111 |
'id':[0],
|
112 |
+
'context':[st.session_state['context']],
|
113 |
+
'question':[st.session_state['question']],
|
114 |
}
|
115 |
)
|
116 |
data_proc = data_raw.map(
|
examples.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
question,context
|
2 |
+
What did Oppenheimer remark abotut the explosion?,"Oppenheimer attended Harvard University, where he earned a bachelor's degree in chemistry in 1925. He studied physics at the University of Cambridge and University of Göttingen, where he received his PhD in 1927. He held academic positions at the University of California, Berkeley, and the California Institute of Technology, and made significant contributions to theoretical physics, including in quantum mechanics and nuclear physics. During World War II, he was recruited to work on the Manhattan Project, and in 1943 was appointed as director of the Los Alamos Laboratory in New Mexico, tasked with developing the weapons. Oppenheimer's leadership and scientific expertise were instrumental in the success of the project. He was among those who observed the Trinity test on July 16, 1945, in which the first atomic bomb was successfully detonated. He later remarked that the explosion brought to his mind words from the Hindu scripture Bhagavad Gita: ""Now I am become Death, the destroyer of worlds."" In August 1945, the atomic bombs were used on the Japanese cities of Hiroshima and Nagasaki, the only use of nuclear weapons in war."
|
3 |
+
What was the phrase on the billboard which inspired the Twinkies name?,"Twinkies were invented on April 6, 1930, by Canadian-born baker James Alexander Dewar for the Continental Baking Company in Schiller Park, Illinois. Realizing that several machines used for making cream-filled strawberry shortcake sat idle when strawberries were out of season, Dewar conceived a snack cake filled with banana cream, which he dubbed the Twinkie. Ritchy Koph said he came up with the name when he saw a billboard in St. Louis for ""Twinkle Toe Shoes"". During World War II, bananas were rationed, and the company was forced to switch to vanilla cream. This change proved popular, and banana-cream Twinkies were not widely re-introduced. The original flavor was occasionally found in limited time only promotions, but the company used vanilla cream for most Twinkies. In 1988, Fruit and Cream Twinkies were introduced with a strawberry filling swirled into the cream. The product was soon dropped. Vanilla's dominance over banana flavoring was challenged in 2005, following a month-long promotion of the movie King Kong. Hostess saw its Twinkie sales rise 20 percent during the promotion, and in 2007 restored the banana-cream Twinkie to its snack lineup although they are now made with 2% banana purée."
|
4 |
+
What happened in November 2020?,"""Baby Shark"" is a children's song associated with a dance involving hand movements that originated as a campfire song dating back to at least the 20th century. In 2016, ""Baby Shark"" became very popular when Pinkfong, a South Korean entertainment company, released a version of the song with a YouTube music video that went viral across social media, online video, and radio. In January 2022, it became the first YouTube video to reach 10 billion views. In November 2020, Pinkfong's version became the most-viewed YouTube video of all time, with over 12 billion views as of April 2023. ""Baby Shark"" originated as a campfire song or chant. The original song dates back to at least the 20th century, potentially created by camp counselors inspired by the movie Jaws. In the chant, each member of a family of sharks is introduced, with campers using their hands to imitate the sharks' jaws. Different versions of the song have the sharks hunting fish, eating a sailor, or killing people who then go to heaven. Various entities have copyrighted original videos and sound recordings of the song, and some have trademarked merchandise based on their versions. However, according to The New York Times, the underlying song and characters are believed to be in the public domain."
|
lib/.DS_Store
CHANGED
Binary files a/lib/.DS_Store and b/lib/.DS_Store differ
|
|
lib/.ipynb_checkpoints/utils-checkpoint.py
CHANGED
@@ -4,6 +4,7 @@ import collections
|
|
4 |
import torch
|
5 |
from torch.utils.data import DataLoader
|
6 |
from transformers import default_data_collator
|
|
|
7 |
|
8 |
def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
|
9 |
"""
|
@@ -112,7 +113,7 @@ def make_predictions(model,tokenizer,inputs,examples,
|
|
112 |
|
113 |
if torch.backends.mps.is_available():
|
114 |
device = "mps"
|
115 |
-
elif torch.cuda.
|
116 |
device = "cuda"
|
117 |
else:
|
118 |
device = "cpu"
|
@@ -185,4 +186,11 @@ def make_predictions(model,tokenizer,inputs,examples,
|
|
185 |
for pred in predicted_answers:
|
186 |
if pred['prediction_text'] == '':
|
187 |
pred['prediction_text'] = "I don't have an answer based on the context provided."
|
188 |
-
return predicted_answers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import torch
|
5 |
from torch.utils.data import DataLoader
|
6 |
from transformers import default_data_collator
|
7 |
+
import pandas as pd
|
8 |
|
9 |
def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
|
10 |
"""
|
|
|
113 |
|
114 |
if torch.backends.mps.is_available():
|
115 |
device = "mps"
|
116 |
+
elif torch.cuda.is_available():
|
117 |
device = "cuda"
|
118 |
else:
|
119 |
device = "cpu"
|
|
|
186 |
for pred in predicted_answers:
|
187 |
if pred['prediction_text'] == '':
|
188 |
pred['prediction_text'] = "I don't have an answer based on the context provided."
|
189 |
+
return predicted_answers
|
190 |
+
|
191 |
+
def get_examples():
|
192 |
+
examples = pd.read_csv('examples.csv')
|
193 |
+
questions = list(examples['question'])
|
194 |
+
contexts = list(examples['context'])
|
195 |
+
return questions, contexts
|
196 |
+
|
lib/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/lib/__pycache__/__init__.cpython-310.pyc and b/lib/__pycache__/__init__.cpython-310.pyc differ
|
|
lib/__pycache__/utils.cpython-310.pyc
CHANGED
Binary files a/lib/__pycache__/utils.cpython-310.pyc and b/lib/__pycache__/utils.cpython-310.pyc differ
|
|
lib/utils.py
CHANGED
@@ -4,6 +4,7 @@ import collections
|
|
4 |
import torch
|
5 |
from torch.utils.data import DataLoader
|
6 |
from transformers import default_data_collator
|
|
|
7 |
|
8 |
def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
|
9 |
"""
|
@@ -185,4 +186,11 @@ def make_predictions(model,tokenizer,inputs,examples,
|
|
185 |
for pred in predicted_answers:
|
186 |
if pred['prediction_text'] == '':
|
187 |
pred['prediction_text'] = "I don't have an answer based on the context provided."
|
188 |
-
return predicted_answers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import torch
|
5 |
from torch.utils.data import DataLoader
|
6 |
from transformers import default_data_collator
|
7 |
+
import pandas as pd
|
8 |
|
9 |
def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
|
10 |
"""
|
|
|
186 |
for pred in predicted_answers:
|
187 |
if pred['prediction_text'] == '':
|
188 |
pred['prediction_text'] = "I don't have an answer based on the context provided."
|
189 |
+
return predicted_answers
|
190 |
+
|
191 |
+
def get_examples():
|
192 |
+
examples = pd.read_csv('examples.csv')
|
193 |
+
questions = list(examples['question'])
|
194 |
+
contexts = list(examples['context'])
|
195 |
+
return questions, contexts
|
196 |
+
|