import re import requests import gradio as gr import json def retrieve_transcript(video_url): # Extract video ID from the URL video_id_match = re.search(r"v=([a-zA-Z0-9_-]+)", video_url) if not video_id_match: return "Invalid YouTube URL. Please ensure it contains a valid video ID." video_id = video_id_match.group(1) # Regular expression to extract ytInitialPlayerResponse YT_INITIAL_PLAYER_RESPONSE_RE = re.compile( r"ytInitialPlayerResponse\s*=\s*({.+?})\s*;\s*(?:var\s+(?:meta|head)| Non-English, Non-ASR > ASR if lang_code1 == "en" and lang_code2 != "en": return -1 elif lang_code1 != "en" and lang_code2 == "en": return 1 elif track1.get("kind") != "asr" and track2.get("kind") == "asr": return -1 elif track1.get("kind") == "asr" and track2.get("kind") != "asr": return 1 return 0 # Preserve order if both have same priority # Gradio Interface def gradio_interface(video_url): return retrieve_transcript(video_url) # Create Gradio UI interface = gr.Interface( fn=gradio_interface, inputs="text", outputs="text", title="YouTube Transcript Extractor", description="Enter a YouTube video URL to extract the transcript. The video must have captions available.", ) # Launch the app if __name__ == "__main__": interface.launch()