Spaces:

ml6team
/

distilbart-tos-summarizer-tosdr

Build error

App Files Files Community

sdhanabal1 commited on Jan 28, 2022

Commit

ce42613

1 Parent(s): 91466d8

Address PR review comments

Browse files

Files changed (3) hide show

Summarizer.py +1 -0
app.py +88 -87
requirements.txt +3 -3

Summarizer.py CHANGED Viewed

@@ -12,6 +12,7 @@ from transformers import Pipeline
 class Summarizer:
     DEFAULT_LANGUAGE = "english"
     def __init__(self, pipeline: Pipeline):
         self.pipeline = pipeline

 class Summarizer:
     DEFAULT_LANGUAGE = "english"
+    DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH = 10
     def __init__(self, pipeline: Pipeline):
         self.pipeline = pipeline

app.py CHANGED Viewed

@@ -6,99 +6,100 @@ from validators import ValidationFailure
 from Summarizer import Summarizer
-nltk.download('punkt')
-DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH = 10
-st.markdown('# Terms & conditions summarization :pencil:')
-st.write('Do you also take the time out of your day to thoroughly read every word of the Terms & Conditions before signing up for a new app? :thinking_face: \nNo?'
-         'Well have we got a demo for you!'
-         'Just copy-paste the lengthy Terms & Conditions text or provide a URL to the text and let our fancy NLP algorithm do the rest!'
-         'You will see both an extractive summary (the most important sentences will be highlighted) and an abstractive summary (an actual summary)'
-         'The abstractive summarization is preceded by LSA (Latent Semantic Analysis) extractive summarization')
-st.write('Want to find out more?'
-         'For information about the extractive summarization :point_right: https://en.wikipedia.org/wiki/Latent_semantic_analysis'
-         'For information about the abstractive summarization :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')
-st.markdown("""
-To use this:
-- Number of sentences to be extracted is configurable
-- Specify an URL to extract contents OR copy terms & conditions content and hit 'Summarize'
-""")
-@st.cache(allow_output_mutation=True,
-          suppress_st_warning=True,
-          show_spinner=False)
-def create_pipeline():
-    with st.spinner('Please wait for the model to load...'):
-        terms_and_conditions_pipeline = pipeline(
-            task='summarization',
-            model='ml6team/distilbart-tos-summarizer-tosdr',
-            tokenizer='ml6team/distilbart-tos-summarizer-tosdr'
-        )
-    return terms_and_conditions_pipeline
-def display_abstractive_summary(summary) -> None:
-    st.subheader("Abstractive Summary")
-    st.markdown('#####')
-    st.markdown(summary)
-def display_extractive_summary(terms_and_conditions_sentences: list, summary_sentences: list) -> None:
-    st.subheader("Extractive Summary")
-    st.markdown('#####')
-    terms_and_conditions = " ".join(sentence for sentence in terms_and_conditions_sentences)
-    replaced_text = terms_and_conditions
-    for sentence in summary_sentences:
-        replaced_text = replaced_text.replace(sentence, f"<span style='background-color: #FFFF00'>{sentence}</span>")
-    st.write(replaced_text, unsafe_allow_html=True)
-def is_valid_url(url: str) -> bool:
-    result = validators.url(url)
-    if isinstance(result, ValidationFailure):
-        return False
-    return True
-summarizer: Summarizer = Summarizer(create_pipeline())
-if 'tc_text' not in st.session_state:
-    st.session_state['tc_text'] = ''
-if 'sentences_length' not in st.session_state:
-    st.session_state['sentences_length'] = DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH
-st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
-st.header("Input")
-with st.form(key='terms-and-conditions'):
-    sentences_length_input = st.number_input(
-        label='Number of sentences to be extracted:',
-        min_value=1,
-        value=st.session_state.sentences_length
-    )
-    tc_text_input = st.text_area(
-        value=st.session_state.tc_text,
-        label='Terms & conditions content or specify an URL:',
-        height=240
-    )
-    submit_button = st.form_submit_button(label='Summarize')
-if submit_button:
-    if is_valid_url(tc_text_input):
-        (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_url(tc_text_input,
-                                                                                            sentences_length_input)
-    else:
-        (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_text(tc_text_input,
-                                                                                             sentences_length_input)
-    extract_summary = " ".join([sentence for sentence in extract_summary_sentences])
-    abstract_summary = summarizer.abstractive_summary(extract_summary)
-    display_extractive_summary(all_sentences, extract_summary_sentences)
-    display_abstractive_summary(abstract_summary)

 from Summarizer import Summarizer
+def main() -> None:
+    nltk.download('punkt')
+    st.markdown('# Terms & conditions summarization :pencil:')
+    st.write('Do you also take the time out of your day to thoroughly read every word of the Terms & Conditions before signing up for a new app? :thinking_face: \nNo?'
+             'Well have we got a demo for you!'
+             'Just copy-paste the lengthy Terms & Conditions text or provide a URL to the text and let our fancy NLP algorithm do the rest!'
+             'You will see both an extractive summary (the most important sentences will be highlighted) and an abstractive summary (an actual summary)'
+             'The abstractive summarization is preceded by LSA (Latent Semantic Analysis) extractive summarization')
+    st.write('Want to find out more?'
+             'For information about the extractive summarization :point_right: https://en.wikipedia.org/wiki/Latent_semantic_analysis'
+             'For information about the abstractive summarization :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr')
+    st.markdown("""
+    To use this:
+    - Number of sentences to be extracted is configurable
+    - Specify an URL to extract contents OR copy terms & conditions content and hit 'Summarize'
+    """)
+    @st.cache(allow_output_mutation=True,
+              suppress_st_warning=True,
+              show_spinner=False)
+    def create_pipeline():
+        with st.spinner('Please wait for the model to load...'):
+            terms_and_conditions_pipeline = pipeline(
+                task='summarization',
+                model='ml6team/distilbart-tos-summarizer-tosdr',
+                tokenizer='ml6team/distilbart-tos-summarizer-tosdr'
+            )
+        return terms_and_conditions_pipeline
+    def display_abstractive_summary(summary) -> None:
+        st.subheader("Abstractive Summary")
+        st.markdown('#####')
+        st.markdown(summary)
+    def display_extractive_summary(terms_and_conditions_sentences: list, summary_sentences: list) -> None:
+        st.subheader("Extractive Summary")
+        st.markdown('#####')
+        terms_and_conditions = " ".join(sentence for sentence in terms_and_conditions_sentences)
+        replaced_text = terms_and_conditions
+        for sentence in summary_sentences:
+            replaced_text = replaced_text.replace(sentence,
+                                                  f"<span style='background-color: #FFFF00'>{sentence}</span>")
+        st.write(replaced_text, unsafe_allow_html=True)
+    def is_valid_url(url: str) -> bool:
+        result = validators.url(url)
+        if isinstance(result, ValidationFailure):
+            return False
+        return True
+    summarizer: Summarizer = Summarizer(create_pipeline())
+    if 'tc_text' not in st.session_state:
+        st.session_state['tc_text'] = ''
+    if 'sentences_length' not in st.session_state:
+        st.session_state['sentences_length'] = Summarizer.DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH
+    st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True)
+    st.header("Input")
+    with st.form(key='terms-and-conditions'):
+        sentences_length_input = st.number_input(
+            label='Number of sentences to be extracted:',
+            min_value=1,
+            value=st.session_state.sentences_length
+        )
+        tc_text_input = st.text_area(
+            value=st.session_state.tc_text,
+            label='Terms & conditions content or specify an URL:',
+            height=240
+        )
+        submit_button = st.form_submit_button(label='Summarize')
+    if submit_button:
+        if is_valid_url(tc_text_input):
+            (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_url(tc_text_input,
+                                                                                                sentences_length_input)
+        else:
+            (all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_text(tc_text_input,
+                                                                                                 sentences_length_input)
+        extract_summary = " ".join([sentence for sentence in extract_summary_sentences])
+        abstract_summary = summarizer.abstractive_summary(extract_summary)
+        display_extractive_summary(all_sentences, extract_summary_sentences)
+        display_abstractive_summary(abstract_summary)
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@ nlpaug==1.1.7
 streamlit
 torch==1.9.1
 torchvision==0.10.1
-transformers
 sumy==0.9.0
-nltk
-validators

 streamlit
 torch==1.9.1
 torchvision==0.10.1
+transformers==4.10.3
 sumy==0.9.0
+nltk==3.6.7
+validators==0.18.2