import gradio as gr from faster_whisper import WhisperModel # Function to load and initialize the Whisper model def load_model(model_size): model = WhisperModel(model_size, device="cpu", compute_type="int8") return model # Streaming transcription function def transcribe_audio(model_size, audio_file): # Initialize the model with the given size model = load_model(model_size) # Stream the transcription of the audio file transcribed_text = "" segments, info = model.transcribe(audio_file, beam_size=5) # Yield detected language information first yield f"Detected language: {info.language} (Probability: {info.language_probability:.2f})", transcribed_text.strip() # Then yield each segment of transcribed text as it is processed for segment in segments: transcribed_text += segment.text + " " yield "", transcribed_text.strip() # Empty string for language, we only update transcription # Define the Gradio interface interface = gr.Interface( fn=transcribe_audio, # Function to transcribe audio inputs=[ gr.Textbox(label="Model Size (e.g., 'large-v3', 'medium', 'small')", value="large-v3"), # Input for model size gr.Audio(type="filepath") # Upload audio file ], outputs=[ gr.Textbox(label="Detected Language"), gr.Textbox(label="Transcription") ], # Output language and transcription title="Whisper Transcription App", description="Upload an audio file and specify the model size to transcribe it using WhisperModel." ) # Launch the app if __name__ == "__main__": interface.launch()