import gradio as gr import whisper from pytube import YouTube loaded_model = whisper.load_model("medium") current_size = 'medium' def inference(link): yt = YouTube(link) global audio_stream audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first() path = audio_stream.download() #path = yt.streams.get_audio_only().download(mp3=True) options = whisper.DecodingOptions(language= 'Spanish', without_timestamps=True) results = loaded_model.transcribe(path) return results['text'] def change_model(size): if size == current_size: return loaded_model = whisper.load_model(size) current_size = size def populate_metadata(link): yt = YouTube(link) return yt.thumbnail_url, yt.title title="" description="" block = gr.Blocks() with block: gr.HTML( """
""" ) with gr.Group(): with gr.Group(): sz = gr.Dropdown(label="Model Size", choices=['tiny', 'base','small', 'medium', 'large'], value='medium') link = gr.Textbox(label="YouTube Link") gr.Markdown("Ejemplo: https://www.youtube.com/watch?v=bnvgcQB01mQ") with gr.Row(): title = gr.Label(label="Video Title") img = gr.Image(label="Thumbnail") text = gr.Textbox( label="Transcription", placeholder="Transcription Output", lines=5) with gr.Row(): btn = gr.Button("Transcribe") # Events btn.click(inference, inputs=[link], outputs=[text]) link.change(populate_metadata, inputs=[link], outputs=[img, title]) sz.change(change_model, inputs=[sz], outputs=[]) block.launch(debug=True) demo = gr.Interface(css="footer {visibility: hidden}", examples='https://www.youtube.com/watch?v=bnvgcQB01mQ')