ashhadahsan commited on
Commit
4751966
·
1 Parent(s): eeb50b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -28
app.py CHANGED
@@ -9,7 +9,6 @@ from utils import (
9
  read,
10
  get_key,
11
  )
12
- import subprocess
13
  import whisperx as whisper
14
  import json
15
  import pandas as pd
@@ -164,33 +163,22 @@ with output:
164
  if audio_uploaded is not None:
165
  if audio_uploaded.name.endswith(".wav"):
166
  temp = AudioSegment.from_wav(audio_uploaded)
167
- input=f"{name}.wav"
168
- temp.export(input)
169
  if audio_uploaded.name.endswith(".mp3"):
170
- input=f"{name}.mp3"
171
 
172
-
173
- with open(input, "wb") as f:
174
 
175
- f.write(audio_uploaded.getbuffer())
176
-
177
-
178
-
179
- # subprocess.call(['ffmpeg', '-i', audio_uploaded.name,
180
- # f'{name}.wav'])
181
- # try:
182
 
183
- # temp = AudioSegment.from_file(audio_uploaded, format="mp3")
184
- # temp.export(f"{name}.wav")
185
- # except:
186
-
187
 
188
- # temp = AudioSegment.from_file(audio_uploaded, format="mp4")
189
- # temp.export(f"{name}.wav")
190
  if language == "":
191
  model = whisper.load_model(model_name)
192
  with st.spinner("Detecting language..."):
193
- detection = detect_language(input, model)
194
  language = detection.get("detected_language")
195
  del model
196
  if len(language) > 2:
@@ -204,7 +192,7 @@ with output:
204
  with st.container():
205
  with st.spinner(f"Running with {model_name} model"):
206
  result = model.transcribe(
207
- input,
208
  language=language,
209
  patience=patience,
210
  initial_prompt=initial_prompt,
@@ -228,15 +216,15 @@ with output:
228
  result["segments"],
229
  model_a,
230
  metadata,
231
- input,
232
  device=device,
233
  )
234
  write(
235
- input,
236
  dtype=transcription,
237
  result_aligned=result_aligned,
238
  )
239
- trans_text = read(input, transcription)
240
  trans.text_area(
241
  "transcription", trans_text, height=None, max_chars=None, key=None
242
  )
@@ -319,16 +307,16 @@ with output:
319
  cont,
320
  model_a,
321
  metadata,
322
- input,
323
  device=device,
324
  )
325
  words_segments = result_aligned["word_segments"]
326
  write(
327
- input,
328
  dtype=transcription,
329
  result_aligned=result_aligned,
330
  )
331
- trans_text = read(input, transcription)
332
  char_segments = []
333
  word_segments = []
334
 
@@ -387,4 +375,4 @@ with output:
387
  "detected language", language_dict.get(language), disabled=True
388
  )
389
  os.remove(f"{name}.wav")
390
- os.remove(f"{json_filname}.json")
 
9
  read,
10
  get_key,
11
  )
 
12
  import whisperx as whisper
13
  import json
14
  import pandas as pd
 
163
  if audio_uploaded is not None:
164
  if audio_uploaded.name.endswith(".wav"):
165
  temp = AudioSegment.from_wav(audio_uploaded)
166
+ temp.export(f"{name}.wav")
 
167
  if audio_uploaded.name.endswith(".mp3"):
 
168
 
169
+ try:
 
170
 
 
 
 
 
 
 
 
171
 
172
+ temp = AudioSegment.from_file(audio_uploaded, format="mp3")
173
+ temp.export(f"{name}.wav")
174
+ except:
 
175
 
176
+ temp = AudioSegment.from_file(audio_uploaded, format="mp4")
177
+ temp.export(f"{name}.wav")
178
  if language == "":
179
  model = whisper.load_model(model_name)
180
  with st.spinner("Detecting language..."):
181
+ detection = detect_language(f"{name}.wav", model)
182
  language = detection.get("detected_language")
183
  del model
184
  if len(language) > 2:
 
192
  with st.container():
193
  with st.spinner(f"Running with {model_name} model"):
194
  result = model.transcribe(
195
+ f"{name}.wav",
196
  language=language,
197
  patience=patience,
198
  initial_prompt=initial_prompt,
 
216
  result["segments"],
217
  model_a,
218
  metadata,
219
+ f"{name}.wav",
220
  device=device,
221
  )
222
  write(
223
+ f"{name}.wav",
224
  dtype=transcription,
225
  result_aligned=result_aligned,
226
  )
227
+ trans_text = read(f"{name}.wav", transcription)
228
  trans.text_area(
229
  "transcription", trans_text, height=None, max_chars=None, key=None
230
  )
 
307
  cont,
308
  model_a,
309
  metadata,
310
+ f"{name}.wav",
311
  device=device,
312
  )
313
  words_segments = result_aligned["word_segments"]
314
  write(
315
+ f"{name}.wav",
316
  dtype=transcription,
317
  result_aligned=result_aligned,
318
  )
319
+ trans_text = read(f"{name}.wav", transcription)
320
  char_segments = []
321
  word_segments = []
322
 
 
375
  "detected language", language_dict.get(language), disabled=True
376
  )
377
  os.remove(f"{name}.wav")
378
+ os.remove(f"{json_filname}.json")