Luhn Summarizer

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
import gradio as gr
import nltk
import time

def luhn_summarizer(text_corpus):
    start_time = time.time()
    parser = PlaintextParser.from_string(text_corpus, Tokenizer("english"))
    stemmer = Stemmer("english")
    summarizer = LuhnSummarizer(stemmer)
    summarizer.stop_words = get_stop_words("english")
    sentences = summarizer(parser.document, 25)
    summary = ""
    for sentence in sentences:
        summary += str(sentence) + ""
    end_time = time.time()
    print(f"Time taken: {end_time - start_time:.2f} seconds")
    return summary

def clear_everything(text_corpus, summary):
    return None, None

theme = gr.themes.Soft(
    primary_hue="purple",
    secondary_hue="cyan",
    neutral_hue="slate",
    font=[
        gr.themes.GoogleFont('Syne'), 
        gr.themes.GoogleFont('Poppins'), 
        gr.themes.GoogleFont('Poppins'), 
        gr.themes.GoogleFont('Poppins')
    ],
)

with gr.Blocks(theme=theme, title="Luhn Summarizer", fill_height=True) as app:
    gr.HTML(
        value ='''
        <h1 style="text-align: center;">Luhn Summarizer</h1>
        <p style="text-align: center;">This app uses a Luhn approach to summarize PDF documents based on CPU.</p>
        <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
        ''')
    with gr.Row():
        with gr.Column():
            text_corpus = gr.TextArea(label="Text Corpus", placeholder="Paste the text corpus here", lines=5)
            with gr.Row():
                clear_btn = gr.Button(value="Clear", variant='stop')
                summarize_btn = gr.Button(value="Summarize", variant='primary')
        summary = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)

    summarize_btn.click(
        luhn_summarizer,
        inputs=[text_corpus],
        outputs=[summary],
        concurrency_limit=25,
        scroll_to_output=True,
        show_api=True,
        api_name="luhn_summarizer",
        show_progress="full",
    )
    clear_btn.click(clear_everything, inputs=[text_corpus, summary], outputs=[text_corpus, summary], show_api=False)

nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
app.queue(default_concurrency_limit=25).launch(show_api=True, max_threads=500, ssr_mode=False)