from transformers import WhisperProcessor, WhisperForConditionalGeneration import gradio as gr # Load model and processor processor = WhisperProcessor.from_pretrained("openai/whisper-small") model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small") def transcribe_audio(audio_file): # Load and process the audio file audio_input, sampling_rate = processor.load_audio(audio_file.name) input_features = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt").input_features # Generate token ids and decode them to text predicted_ids = model.generate(input_features) transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) return transcription[0] # Set up Gradio interface iface = gr.Interface( fn=transcribe_audio, inputs="audio", outputs="text" ) iface.launch()