srd4 commited on
Commit
e59837f
·
verified ·
1 Parent(s): bdcdb1f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +18 -18
handler.py CHANGED
@@ -1,26 +1,26 @@
1
- from typing import Dict
2
  from faster_whisper import WhisperModel
3
  import torch
4
- import io
5
 
6
  class EndpointHandler:
7
- def __init__(self, path="/repository"):
8
- # Initialize WhisperModel for a Xeon processor on CPU.
9
- model_size = "large-v2"
10
- device = "cpu"
11
- # Ensure that the model files are in the specified directory.
12
- self.model = WhisperModel(model_size, device=device, model_dir=path)
13
 
14
- def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]:
15
- # Get the audio file bytes from the request data.
16
  audio_bytes = data.get("inputs")
17
-
18
- # Transcribe using WhisperModel which handles the conversion internally.
19
- audio_buffer = io.BytesIO(audio_bytes)
20
- segments, info = self.model.transcribe(audio_buffer)
21
-
22
- # Compile the transcribed text segments.
 
 
23
  text = " ".join(segment.text for segment in segments)
 
 
 
24
 
25
- # Return the transcribed text and detected language in a dictionary.
26
- return {"text": text, "language": info.language}
 
 
1
  from faster_whisper import WhisperModel
2
  import torch
 
3
 
4
  class EndpointHandler:
5
+ def __init__(self):
6
+ model_size = "large-v2" # Update model size if different
7
+ device = "cpu" # Use CPU for Azure deployment
8
+ self.model = WhisperModel(model_size, device=device)
 
 
9
 
10
+ def __call__(self, data):
11
+ # Extract audio bytes from the request data
12
  audio_bytes = data.get("inputs")
13
+
14
+ # Convert audio bytes to audio samples
15
+ # Note: Additional conversion might be needed depending on the format of the incoming audio bytes
16
+
17
+ # Transcribe the audio using the Whisper model without writing to disk
18
+ segments, info = self.model.transcribe(audio_bytes)
19
+
20
+ # Combine the text from all segments
21
  text = " ".join(segment.text for segment in segments)
22
+
23
+ # Return the transcribed text and the detected language
24
+ return {"text": text, "language": info.language_code}
25
 
26
+ # If applicable, write additional conversion code to get samples from bytes