from sumy.parsers.plaintext import PlaintextParser from sumy.nlp.tokenizers import Tokenizer from sumy.summarizers.luhn import LuhnSummarizer from sumy.nlp.stemmers import Stemmer from sumy.utils import get_stop_words import gradio as gr import nltk import time def luhn_summarizer(text_corpus): start_time = time.time() parser = PlaintextParser.from_string(text_corpus, Tokenizer("english")) stemmer = Stemmer("english") summarizer = LuhnSummarizer(stemmer) summarizer.stop_words = get_stop_words("english") sentences = summarizer(parser.document, 25) summary = "" for sentence in sentences: summary += str(sentence) + "" end_time = time.time() print(f"Time taken: {end_time - start_time:.2f} seconds") return summary def clear_everything(text_corpus, summary): return None, None theme = gr.themes.Soft( primary_hue="purple", secondary_hue="cyan", neutral_hue="slate", font=[ gr.themes.GoogleFont('Syne'), gr.themes.GoogleFont('Poppins'), gr.themes.GoogleFont('Poppins'), gr.themes.GoogleFont('Poppins') ], ) with gr.Blocks(theme=theme, title="Luhn Summarizer", fill_height=True) as app: gr.HTML( value ='''
This app uses a Luhn approach to summarize PDF documents based on CPU.
The summarization process can take some time depending on the size of the text corpus and the complexity of the content.
''') with gr.Row(): with gr.Column(): text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5) with gr.Row(): clear_btn = gr.Button(value="Clear", variant='stop') summarize_btn = gr.Button(value="Summarize", variant='primary') summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True) summarize_btn.click( luhn_summarizer, inputs=[text_corpus], outputs=[summary], concurrency_limit=25, scroll_to_output=True, show_api=True, api_name="luhn_summarizer", show_progress="full", ) clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False) nltk.download('punkt', quiet=True) nltk.download('punkt_tab', quiet=True) app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)