def transcribe_audio_by_whisper(audio_path):
Browse files- app.py +20 -1
- requirements.txt +2 -1
app.py
CHANGED
@@ -12,6 +12,8 @@ from openai import OpenAI
|
|
12 |
import random
|
13 |
import string
|
14 |
|
|
|
|
|
15 |
|
16 |
# Initialize OpenAI API client
|
17 |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
|
@@ -97,9 +99,26 @@ def download_youtube_audio(youtube_url):
|
|
97 |
def transcribe_audio_by_whisper(audio_path):
|
98 |
# Whisper模型的轉錄實現
|
99 |
start_time = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
end_time = time.time() # 函数结束执行的时间
|
101 |
processing_time = int(end_time - start_time)
|
102 |
-
|
|
|
103 |
|
104 |
def transcribe_audio_by_open_ai(audio_path):
|
105 |
# OpenAI語音識別的轉錄實現
|
|
|
12 |
import random
|
13 |
import string
|
14 |
|
15 |
+
from faster_whisper import WhisperModel
|
16 |
+
|
17 |
|
18 |
# Initialize OpenAI API client
|
19 |
OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
|
|
|
99 |
def transcribe_audio_by_whisper(audio_path):
|
100 |
# Whisper模型的轉錄實現
|
101 |
start_time = time.time()
|
102 |
+
|
103 |
+
model_size = "large-v3"
|
104 |
+
# Run on GPU with FP16
|
105 |
+
# model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
106 |
+
# or run on GPU with INT8
|
107 |
+
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
|
108 |
+
# or run on CPU with INT8
|
109 |
+
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
110 |
+
segments, info = model.transcribe(audio_path, beam_size=5)
|
111 |
+
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
|
112 |
+
|
113 |
+
transcription = ""
|
114 |
+
for segment in segments:
|
115 |
+
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
|
116 |
+
transcription += f"[{segment.start:.3f} -> {segment.end:.3f}] {segment.text}\n"
|
117 |
+
|
118 |
end_time = time.time() # 函数结束执行的时间
|
119 |
processing_time = int(end_time - start_time)
|
120 |
+
|
121 |
+
return transcription, processing_time
|
122 |
|
123 |
def transcribe_audio_by_open_ai(audio_path):
|
124 |
# OpenAI語音識別的轉錄實現
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
yt-dlp
|
2 |
gradio
|
3 |
pydub
|
4 |
-
openai >= 1.0.0
|
|
|
|
1 |
yt-dlp
|
2 |
gradio
|
3 |
pydub
|
4 |
+
openai >= 1.0.0
|
5 |
+
faster-whisper
|