Update handler.py
Browse files- handler.py +20 -21
handler.py
CHANGED
@@ -1,26 +1,25 @@
|
|
1 |
-
import
|
2 |
-
from
|
3 |
-
import
|
4 |
-
import
|
5 |
-
import mimetypes
|
6 |
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
def
|
11 |
-
|
12 |
-
|
13 |
-
b = i.read()
|
14 |
-
# get mimetype
|
15 |
-
content_type= mimetypes.guess_type(path_to_audio)[0]
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
}
|
21 |
-
response = r.post(ENDPOINT_URL, headers=headers, data=b)
|
22 |
-
return response.json()
|
23 |
|
24 |
-
|
|
|
25 |
|
26 |
-
|
|
|
|
1 |
+
from typing import Dict
|
2 |
+
from faster_whisper import WhisperModel
|
3 |
+
import torch
|
4 |
+
import io
|
|
|
5 |
|
6 |
+
class EndpointHandler:
|
7 |
+
def __init__(self, path=""):
|
8 |
+
# Initialize WhisperModel for a Xeon processor on CPU.
|
9 |
+
model_size = "large-v2"
|
10 |
+
device = "cpu"
|
11 |
+
self.model = WhisperModel(model_size, device=device)
|
12 |
|
13 |
+
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
|
14 |
+
# Get the audio file bytes from the request data.
|
15 |
+
audio_bytes = data.get("inputs")
|
|
|
|
|
|
|
16 |
|
17 |
+
# Transcribe using WhisperModel which handles the conversion internally.
|
18 |
+
audio_buffer = io.BytesIO(audio_bytes)
|
19 |
+
segments, info = self.model.transcribe(audio_buffer)
|
|
|
|
|
|
|
20 |
|
21 |
+
# Compile the transcribed text segments.
|
22 |
+
text = " ".join(segment.text for segment in segments)
|
23 |
|
24 |
+
# Return the transcribed text and detected language in a dictionary.
|
25 |
+
return {"text": text, "language": info.language}
|