youngtsai commited on
Commit
43a7a45
1 Parent(s): a46b138

def transcribe_audio_by_whisper(audio_path):

Browse files
Files changed (2) hide show
  1. app.py +20 -1
  2. requirements.txt +2 -1
app.py CHANGED
@@ -12,6 +12,8 @@ from openai import OpenAI
12
  import random
13
  import string
14
 
 
 
15
 
16
  # Initialize OpenAI API client
17
  OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
@@ -97,9 +99,26 @@ def download_youtube_audio(youtube_url):
97
  def transcribe_audio_by_whisper(audio_path):
98
  # Whisper模型的轉錄實現
99
  start_time = time.time()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  end_time = time.time() # 函数结束执行的时间
101
  processing_time = int(end_time - start_time)
102
- return "transcribe_audio_by_whisper", processing_time
 
103
 
104
  def transcribe_audio_by_open_ai(audio_path):
105
  # OpenAI語音識別的轉錄實現
 
12
  import random
13
  import string
14
 
15
+ from faster_whisper import WhisperModel
16
+
17
 
18
  # Initialize OpenAI API client
19
  OPEN_AI_KEY = os.getenv("OPEN_AI_KEY")
 
99
  def transcribe_audio_by_whisper(audio_path):
100
  # Whisper模型的轉錄實現
101
  start_time = time.time()
102
+
103
+ model_size = "large-v3"
104
+ # Run on GPU with FP16
105
+ # model = WhisperModel(model_size, device="cuda", compute_type="float16")
106
+ # or run on GPU with INT8
107
+ # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
108
+ # or run on CPU with INT8
109
+ model = WhisperModel(model_size, device="cpu", compute_type="int8")
110
+ segments, info = model.transcribe(audio_path, beam_size=5)
111
+ print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
112
+
113
+ transcription = ""
114
+ for segment in segments:
115
+ print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
116
+ transcription += f"[{segment.start:.3f} -> {segment.end:.3f}] {segment.text}\n"
117
+
118
  end_time = time.time() # 函数结束执行的时间
119
  processing_time = int(end_time - start_time)
120
+
121
+ return transcription, processing_time
122
 
123
  def transcribe_audio_by_open_ai(audio_path):
124
  # OpenAI語音識別的轉錄實現
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  yt-dlp
2
  gradio
3
  pydub
4
- openai >= 1.0.0
 
 
1
  yt-dlp
2
  gradio
3
  pydub
4
+ openai >= 1.0.0
5
+ faster-whisper