File size: 2,532 Bytes
04e4202 67b26c6 04e4202 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import re
import torch
import gradio as gr
from transformers import pipeline
# Initialize summarization pipeline
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6",
torch_dtype=torch.bfloat16, device=-1)
def summarize_text(input_text):
"""Summarizes the given text."""
if not input_text.strip():
return "No transcript available to summarize."
output = text_summary(input_text, max_length=200, min_length=30, do_sample=False)
return output[0]['summary_text']
def get_video_id(url):
"""Extracts video ID from a YouTube URL."""
match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
if not match:
raise ValueError("Invalid YouTube URL")
return match.group(1)
def fetch_transcript(video_id):
"""
Fetches the transcript of the YouTube video in English or Hindi.
Returns the transcript text and the language.
"""
languages_to_try = [["en"], ["hi"]]
for language_codes in languages_to_try:
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=language_codes)
formatter = TextFormatter()
return formatter.format_transcript(transcript), language_codes[0]
except Exception as e:
continue # Try the next language in the list
raise ValueError("Transcript not available in English or Hindi.")
def process_youtube_url(url):
"""Fetches the transcript from the YouTube URL and summarizes it."""
try:
video_id = get_video_id(url)
transcript, language = fetch_transcript(video_id)
language_name = "English" if language == "en" else "Hindi"
summary = summarize_text(transcript)
return f"Transcript Language: {language_name}\n\nSummary:\n{summary}"
except ValueError as e:
return str(e)
except Exception as e:
return f"An unexpected error occurred: {e}"
# Gradio Interface
demo = gr.Interface(
fn=process_youtube_url,
inputs=[gr.Textbox(label="Enter YouTube URL", placeholder="https://www.youtube.com/watch?v=example")],
outputs=[gr.Textbox(label="Summary")],
title="YouTube Video Transcript Summarizer",
description="Enter a YouTube video URL to generate it's summary. The system will first attempt to fetch the transcript in English, then Hindi if English is unavailable."
)
if __name__ == "__main__":
demo.launch() |