Antoniskaraolis commited on
Commit
ee82ddd
·
1 Parent(s): c541d91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -1,19 +1,22 @@
1
- import whisper
2
  import gradio as gr
3
 
4
- def transcribe_audio(file_info):
5
- model = whisper.load_model("base") # Choose the appropriate model size
6
- audio = whisper.load_audio(file_info.name)
7
- audio = whisper.pad_or_trim(audio)
8
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
9
 
10
- _, probs = model.detect_language(mel)
11
- language = max(probs, key=probs.get)
12
- print(f"Detected language: {language}")
 
13
 
14
- result = model.transcribe(mel)
15
- return result["text"]
 
16
 
 
 
 
17
  iface = gr.Interface(
18
  fn=transcribe_audio,
19
  inputs="audio",
 
1
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
2
  import gradio as gr
3
 
4
+ # Load model and processor
5
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
6
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
 
 
7
 
8
+ def transcribe_audio(audio_file):
9
+ # Load and process the audio file
10
+ audio_input, sampling_rate = processor.load_audio(audio_file.name)
11
+ input_features = processor(audio_input, sampling_rate=sampling_rate, return_tensors="pt").input_features
12
 
13
+ # Generate token ids and decode them to text
14
+ predicted_ids = model.generate(input_features)
15
+ transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
16
 
17
+ return transcription[0]
18
+
19
+ # Set up Gradio interface
20
  iface = gr.Interface(
21
  fn=transcribe_audio,
22
  inputs="audio",