Spaces:

etweedy
/

roberta-squad-v2

Runtime error

App Files Files Community

etweedy commited on Jul 6, 2023

Commit

e77a114

1 Parent(s): ff48e26

Upload 8 files

Browse files

Files changed (7) hide show

app.py +34 -6
examples.csv +4 -0
lib/.DS_Store +0 -0
lib/.ipynb_checkpoints/utils-checkpoint.py +10 -2
lib/__pycache__/__init__.cpython-310.pyc +0 -0
lib/__pycache__/utils.cpython-310.pyc +0 -0
lib/utils.py +9 -1

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from transformers import (
     Trainer,
     default_data_collator,
 )
-from lib.utils import preprocess_examples, make_predictions
 if torch.backends.mps.is_available():
     device = "mps"
@@ -40,8 +40,33 @@ def get_model():
     tokenizer = AutoTokenizer.from_pretrained(repo_id)
     return model, tokenizer
 with st.spinner('Loading the model...'):
     model, tokenizer = get_model()
 st.header('RoBERTa Q&A model')
@@ -51,8 +76,9 @@ This app demonstrates the answer-retrieval capabilities of a finetuned RoBERTa (
 Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
 Please type or paste a context paragraph and question you'd like to ask about it.  The model will attempt to answer the question, or otherwise will report that it cannot.
-''')
 input_container = st.container()
 st.divider()
 response_container = st.container()
@@ -62,27 +88,29 @@ with input_container:
     with st.form(key='input_form',clear_on_submit=False):
         context = st.text_area(
             label='Context',
-            value='',
             key='context_field',
             label_visibility='hidden',
             placeholder='Enter your context paragraph here.',
             height=300,
         )
         question = st.text_input(
             label='Question',
-            value='',
             key='question_field',
             label_visibility='hidden',
             placeholder='Enter your question here.',
         )
         query_submitted = st.form_submit_button("Submit")
         if query_submitted:
             with st.spinner('Generating response...'):
                 data_raw = Dataset.from_dict(
                     {
                         'id':[0],
-                        'context':[context],
-                        'question':[question]
                     }
                 )
                 data_proc = data_raw.map(

     Trainer,
     default_data_collator,
 )
+from lib.utils import preprocess_examples, make_predictions, get_examples
 if torch.backends.mps.is_available():
     device = "mps"
     tokenizer = AutoTokenizer.from_pretrained(repo_id)
     return model, tokenizer
+def fill_in_example(i):
+    st.session_state['response'] = ''
+    st.session_state['question'] = ex_q[i]
+    st.session_state['context'] = ex_c[i]
+def clear_boxes():
+    st.session_state['response'] = ''
+    st.session_state['question'] = ''
+    st.session_state['context'] = ''
 with st.spinner('Loading the model...'):
     model, tokenizer = get_model()
+ex_q, ex_c = get_examples()
+for i in range(len(ex_q)):
+    st.sidebar.button(
+        label = f'Try example {i+1}',
+        key = f'ex_button_{i+1}',
+        on_click = fill_in_example,
+        args=(i,),
+    )
+st.sidebar.button(
+    label = 'Clear boxes',
+    key = 'clear_button',
+    on_click = clear_boxes,
+)
 st.header('RoBERTa Q&A model')
 Version 2 incorporates the 100,000 samples from Version 1.1, along with 50,000 'unanswerable' questions, i.e. samples in the question cannot be answered using the context given.
 Please type or paste a context paragraph and question you'd like to ask about it.  The model will attempt to answer the question, or otherwise will report that it cannot.
+Alternatively, you can try some of the examples provided on the sidebar to the left.
+''')
 input_container = st.container()
 st.divider()
 response_container = st.container()
     with st.form(key='input_form',clear_on_submit=False):
         context = st.text_area(
             label='Context',
+            value=st.session_state['context'],
             key='context_field',
             label_visibility='hidden',
             placeholder='Enter your context paragraph here.',
             height=300,
         )
+        st.session_state['context'] = context
         question = st.text_input(
             label='Question',
+            value=st.session_state['question'],
             key='question_field',
             label_visibility='hidden',
             placeholder='Enter your question here.',
         )
+        st.session_state['question'] = question
         query_submitted = st.form_submit_button("Submit")
         if query_submitted:
             with st.spinner('Generating response...'):
                 data_raw = Dataset.from_dict(
                     {
                         'id':[0],
+                        'context':[st.session_state['context']],
+                        'question':[st.session_state['question']],
                     }
                 )
                 data_proc = data_raw.map(

examples.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+question,context
+What did Oppenheimer remark abotut the explosion?,"Oppenheimer attended Harvard University, where he earned a bachelor's degree in chemistry in 1925. He studied physics at the University of Cambridge and University of Göttingen, where he received his PhD in 1927. He held academic positions at the University of California, Berkeley, and the California Institute of Technology, and made significant contributions to theoretical physics, including in quantum mechanics and nuclear physics. During World War II, he was recruited to work on the Manhattan Project, and in 1943 was appointed as director of the Los Alamos Laboratory in New Mexico, tasked with developing the weapons. Oppenheimer's leadership and scientific expertise were instrumental in the success of the project. He was among those who observed the Trinity test on July 16, 1945, in which the first atomic bomb was successfully detonated. He later remarked that the explosion brought to his mind words from the Hindu scripture Bhagavad Gita: ""Now I am become Death, the destroyer of worlds."" In August 1945, the atomic bombs were used on the Japanese cities of Hiroshima and Nagasaki, the only use of nuclear weapons in war."
+What was the phrase on the billboard which inspired the Twinkies name?,"Twinkies were invented on April 6, 1930, by Canadian-born baker James Alexander Dewar for the Continental Baking Company in Schiller Park, Illinois. Realizing that several machines used for making cream-filled strawberry shortcake sat idle when strawberries were out of season, Dewar conceived a snack cake filled with banana cream, which he dubbed the Twinkie. Ritchy Koph said he came up with the name when he saw a billboard in St. Louis for ""Twinkle Toe Shoes"".  During World War II, bananas were rationed, and the company was forced to switch to vanilla cream. This change proved popular, and banana-cream Twinkies were not widely re-introduced. The original flavor was occasionally found in limited time only promotions, but the company used vanilla cream for most Twinkies. In 1988, Fruit and Cream Twinkies were introduced with a strawberry filling swirled into the cream. The product was soon dropped. Vanilla's dominance over banana flavoring was challenged in 2005, following a month-long promotion of the movie King Kong. Hostess saw its Twinkie sales rise 20 percent during the promotion, and in 2007 restored the banana-cream Twinkie to its snack lineup although they are now made with 2% banana purée."
+What happened in November 2020?,"""Baby Shark"" is a children's song associated with a dance involving hand movements that originated as a campfire song dating back to at least the 20th century. In 2016, ""Baby Shark"" became very popular when Pinkfong, a South Korean entertainment company, released a version of the song with a YouTube music video that went viral across social media, online video, and radio. In January 2022, it became the first YouTube video to reach 10 billion views. In November 2020, Pinkfong's version became the most-viewed YouTube video of all time, with over 12 billion views as of April 2023. ""Baby Shark"" originated as a campfire song or chant. The original song dates back to at least the 20th century, potentially created by camp counselors inspired by the movie Jaws. In the chant, each member of a family of sharks is introduced, with campers using their hands to imitate the sharks' jaws. Different versions of the song have the sharks hunting fish, eating a sailor, or killing people who then go to heaven. Various entities have copyrighted original videos and sound recordings of the song, and some have trademarked merchandise based on their versions. However, according to The New York Times, the underlying song and characters are believed to be in the public domain."

lib/.DS_Store CHANGED Viewed

Binary files a/lib/.DS_Store and b/lib/.DS_Store differ

lib/.ipynb_checkpoints/utils-checkpoint.py CHANGED Viewed

@@ -4,6 +4,7 @@ import collections
 import torch
 from torch.utils.data import DataLoader
 from transformers import default_data_collator
 def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
     """
@@ -112,7 +113,7 @@ def make_predictions(model,tokenizer,inputs,examples,
     if torch.backends.mps.is_available():
         device = "mps"
-    elif torch.cuda.us_available():
         device = "cuda"
     else:
         device = "cpu"
@@ -185,4 +186,11 @@ def make_predictions(model,tokenizer,inputs,examples,
             for pred in predicted_answers:
                 if pred['prediction_text'] == '':
                     pred['prediction_text'] = "I don't have an answer based on the context provided."
-    return predicted_answers

 import torch
 from torch.utils.data import DataLoader
 from transformers import default_data_collator
+import pandas as pd
 def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
     """
     if torch.backends.mps.is_available():
         device = "mps"
+    elif torch.cuda.is_available():
         device = "cuda"
     else:
         device = "cpu"
             for pred in predicted_answers:
                 if pred['prediction_text'] == '':
                     pred['prediction_text'] = "I don't have an answer based on the context provided."
+    return predicted_answers
+def get_examples():
+    examples = pd.read_csv('examples.csv')
+    questions = list(examples['question'])
+    contexts = list(examples['context'])
+    return questions, contexts

lib/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/lib/__pycache__/__init__.cpython-310.pyc and b/lib/__pycache__/__init__.cpython-310.pyc differ

lib/__pycache__/utils.cpython-310.pyc CHANGED Viewed

Binary files a/lib/__pycache__/utils.cpython-310.pyc and b/lib/__pycache__/utils.cpython-310.pyc differ

lib/utils.py CHANGED Viewed

@@ -4,6 +4,7 @@ import collections
 import torch
 from torch.utils.data import DataLoader
 from transformers import default_data_collator
 def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
     """
@@ -185,4 +186,11 @@ def make_predictions(model,tokenizer,inputs,examples,
             for pred in predicted_answers:
                 if pred['prediction_text'] == '':
                     pred['prediction_text'] = "I don't have an answer based on the context provided."
-    return predicted_answers

 import torch
 from torch.utils.data import DataLoader
 from transformers import default_data_collator
+import pandas as pd
 def preprocess_examples(examples, tokenizer , max_length = 384, stride = 128):
     """
             for pred in predicted_answers:
                 if pred['prediction_text'] == '':
                     pred['prediction_text'] = "I don't have an answer based on the context provided."
+    return predicted_answers
+def get_examples():
+    examples = pd.read_csv('examples.csv')
+    questions = list(examples['question'])
+    contexts = list(examples['context'])
+    return questions, contexts