adarshjha01 commited on
Commit
04e4202
·
verified ·
1 Parent(s): c852311

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -0
app.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+ from youtube_transcript_api.formatters import TextFormatter
4
+ import re
5
+ import torch
6
+ import gradio as gr
7
+ from transformers import pipeline
8
+
9
+ # Initialize summarization pipeline
10
+ text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",
11
+ torch_dtype=torch.bfloat16)
12
+
13
+ def summarize_text(input_text):
14
+ """Summarizes the given text."""
15
+ if not input_text.strip():
16
+ return "No transcript available to summarize."
17
+ output = text_summary(input_text, max_length=200, min_length=30, do_sample=False)
18
+ return output[0]['summary_text']
19
+
20
+ def get_video_id(url):
21
+ """Extracts video ID from a YouTube URL."""
22
+ match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
23
+ if not match:
24
+ raise ValueError("Invalid YouTube URL")
25
+ return match.group(1)
26
+
27
+ def fetch_transcript(video_id):
28
+ """
29
+ Fetches the transcript of the YouTube video in English or Hindi.
30
+ Returns the transcript text and the language.
31
+ """
32
+ languages_to_try = [["en"], ["hi"]]
33
+ for language_codes in languages_to_try:
34
+ try:
35
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=language_codes)
36
+ formatter = TextFormatter()
37
+ return formatter.format_transcript(transcript), language_codes[0]
38
+ except Exception as e:
39
+ continue # Try the next language in the list
40
+ raise ValueError("Transcript not available in English or Hindi.")
41
+
42
+ def process_youtube_url(url):
43
+ """Fetches the transcript from the YouTube URL and summarizes it."""
44
+ try:
45
+ video_id = get_video_id(url)
46
+ transcript, language = fetch_transcript(video_id)
47
+ language_name = "English" if language == "en" else "Hindi"
48
+ summary = summarize_text(transcript)
49
+ return f"Transcript Language: {language_name}\n\nSummary:\n{summary}"
50
+ except ValueError as e:
51
+ return str(e)
52
+ except Exception as e:
53
+ return f"An unexpected error occurred: {e}"
54
+
55
+ # Gradio Interface
56
+ demo = gr.Interface(
57
+ fn=process_youtube_url,
58
+ inputs=[gr.Textbox(label="Enter YouTube URL", placeholder="https://www.youtube.com/watch?v=example")],
59
+ outputs=[gr.Textbox(label="Summary")],
60
+ title="YouTube Video Transcript Summarizer",
61
+ description="Enter a YouTube video URL to generate it's summary. The system will first attempt to fetch the transcript in English, then Hindi if English is unavailable."
62
+ )
63
+
64
+ if __name__ == "__main__":
65
+ demo.launch()