File size: 869 Bytes
ee82ddd
d7aa11b
f3c7107
ee82ddd
 
 
f3c7107
ee82ddd
 
 
 
f3c7107
ee82ddd
 
 
f3c7107
ee82ddd
 
 
d7aa11b
 
a9879bb
d7aa11b
 
f3c7107
d7aa11b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import gradio as gr

# Load model and processor
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")

def transcribe_audio(audio_file):
    # Load and process the audio file
    audio_input, sampling_rate = processor.load_audio(audio_file.name)
    input_features = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt").input_features

    # Generate token ids and decode them to text
    predicted_ids = model.generate(input_features)
    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)

    return transcription[0]

# Set up Gradio interface
iface = gr.Interface(
    fn=transcribe_audio,
    inputs="audio",
    outputs="text"
)

iface.launch()