srd4
/

faster-whisper-large-v2

Automatic Speech Recognition

Inference Endpoints

Model card Files Files and versions Community

srd4 commited on Jan 13, 2024

Commit

b360e11

·

verified ·

1 Parent(s): 27851bd

Update handler.py

Files changed (1) hide show

handler.py +20 -21

handler.py CHANGED Viewed

@@ -1,26 +1,25 @@
-import json
-from typing import List
-import requests as r
-import base64
-import mimetypes
-ENDPOINT_URL=""
-HF_TOKEN=""
-def predict(path_to_audio:str=None):
-    # read audio file
-    with open(path_to_audio, "rb") as i:
-      b = i.read()
-    # get mimetype
-    content_type= mimetypes.guess_type(path_to_audio)[0]
-    headers= {
-        "Authorization": f"Bearer {HF_TOKEN}",
-        "Content-Type": content_type
-    }
-    response = r.post(ENDPOINT_URL, headers=headers, data=b)
-    return response.json()
-prediction = predict(path_to_audio="sample.wav")
-prediction

+from typing import Dict
+from faster_whisper import WhisperModel
+import torch
+import io
+class EndpointHandler:
+    def __init__(self, path=""):
+        # Initialize WhisperModel for a Xeon processor on CPU.
+        model_size = "large-v2"
+        device = "cpu"
+        self.model = WhisperModel(model_size, device=device)
+    def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
+        # Get the audio file bytes from the request data.
+        audio_bytes = data.get("inputs")
+        # Transcribe using WhisperModel which handles the conversion internally.
+        audio_buffer = io.BytesIO(audio_bytes)
+        segments, info = self.model.transcribe(audio_buffer)
+        # Compile the transcribed text segments.
+        text = " ".join(segment.text for segment in segments)
+        # Return the transcribed text and detected language in a dictionary.
+        return {"text": text, "language": info.language}