srd4
/

faster-whisper-large-v2

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

srd4 commited on Jan 13, 2024

Commit

e59837f

·

verified ·

1 Parent(s): bdcdb1f

Update handler.py

Files changed (1) hide show

handler.py +18 -18

handler.py CHANGED Viewed

@@ -1,26 +1,26 @@
-from typing import Dict
 from faster_whisper import WhisperModel
 import torch
-import io
 class EndpointHandler:
-    def __init__(self, path="/repository"):
-        # Initialize WhisperModel for a Xeon processor on CPU.
-        model_size = "large-v2"
-        device = "cpu"
-        # Ensure that the model files are in the specified directory.
-        self.model = WhisperModel(model_size, device=device, model_dir=path)
-    def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
-        # Get the audio file bytes from the request data.
         audio_bytes = data.get("inputs")
-        # Transcribe using WhisperModel which handles the conversion internally.
-        audio_buffer = io.BytesIO(audio_bytes)
-        segments, info = self.model.transcribe(audio_buffer)
-        # Compile the transcribed text segments.
         text = " ".join(segment.text for segment in segments)
-        # Return the transcribed text and detected language in a dictionary.
-        return {"text": text, "language": info.language}

 from faster_whisper import WhisperModel
 import torch
 class EndpointHandler:
+    def __init__(self):
+        model_size = "large-v2"  # Update model size if different
+        device = "cpu"  # Use CPU for Azure deployment
+        self.model = WhisperModel(model_size, device=device)
+    def __call__(self, data):
+        # Extract audio bytes from the request data
         audio_bytes = data.get("inputs")
+        # Convert audio bytes to audio samples
+        # Note: Additional conversion might be needed depending on the format of the incoming audio bytes
+        # Transcribe the audio using the Whisper model without writing to disk
+        segments, info = self.model.transcribe(audio_bytes)
+        # Combine the text from all segments
         text = " ".join(segment.text for segment in segments)
+        # Return the transcribed text and the detected language
+        return {"text": text, "language": info.language_code}
+# If applicable, write additional conversion code to get samples from bytes