fabianzeiher commited on
Commit
eb21f26
·
1 Parent(s): a8e533c

Add desriptive output

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
3
 
4
  pipe_fine = pipeline(model="zeihers-mart/whisper-small-swedish-basic", device_map="auto")
5
  pipe_raw = pipeline(model="openai/whisper-small", device_map="auto")
@@ -19,24 +20,32 @@ pipe_raw.model.config.forced_decoder_ids = (
19
  )
20
 
21
  def transcribe(audio):
 
22
  text_sv = pipe_fine(audio)["text"]
23
- print(f"Audio transcribed: {text_sv}")
 
 
 
24
  text_raw= pipe_raw(audio)["text"]
25
- print(f"Text translated: {text_raw}")
 
26
  sentiment= sa(text_sv)
27
  print(f"Sentiment result: {sentiment}")
28
  sentiment= sentiment[0]["label"]
29
  path = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/SNice.svg/1200px-SNice.svg.png"
30
  if sentiment == "NEGATIVE":
31
- path = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/42/Sad_smiley_yellow_simple.svg/2048px-Sad_smiley_yellow_simple.svg.png"
32
- return text_sv, text_raw, path
 
 
33
 
34
  iface = gr.Interface(
35
  fn=transcribe,
36
  inputs=gr.Audio(sources=["microphone"], type="filepath"),
37
  outputs=[gr.Textbox(label="Fine-tuned transcription"),
38
  gr.Textbox(label="Whisper transcription"),
39
- gr.Image(label="Sentiment from Fine-tuned transcription", width=100, height=100)],
 
40
  title="Finetuned Whisper Swedish Small",
41
  description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
42
  )
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import time
4
 
5
  pipe_fine = pipeline(model="zeihers-mart/whisper-small-swedish-basic", device_map="auto")
6
  pipe_raw = pipeline(model="openai/whisper-small", device_map="auto")
 
20
  )
21
 
22
  def transcribe(audio):
23
+ start = time.time()
24
  text_sv = pipe_fine(audio)["text"]
25
+ time_fine = time.time() - start
26
+ print(f"Fine-tuned: audio transcribed in {time_fine} seconds: {text_sv}")
27
+
28
+ start = time.time()
29
  text_raw= pipe_raw(audio)["text"]
30
+ time_raw = time.time() - start
31
+ print(f"Raw: audio transcribed in {time_raw} seconds: {text_raw}")
32
  sentiment= sa(text_sv)
33
  print(f"Sentiment result: {sentiment}")
34
  sentiment= sentiment[0]["label"]
35
  path = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/SNice.svg/1200px-SNice.svg.png"
36
  if sentiment == "NEGATIVE":
37
+ path = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/42/Sad_smiley_yellow_simple.svg/240px-Sad_smiley_yellow_simple.svg.png"
38
+
39
+ description = f"The fine-tuned model took {time_fine} seconds while the original Whisper model took {raw_time} seconds.\nThe sentiment was evaluated form the fine-tuned model transcription as {lower(sentiment)}."
40
+ return text_sv, text_raw, path, description
41
 
42
  iface = gr.Interface(
43
  fn=transcribe,
44
  inputs=gr.Audio(sources=["microphone"], type="filepath"),
45
  outputs=[gr.Textbox(label="Fine-tuned transcription"),
46
  gr.Textbox(label="Whisper transcription"),
47
+ gr.Image(label="Sentiment from Fine-tuned transcription", width=250, height=250),
48
+ gr.Textbox(label="Description")],
49
  title="Finetuned Whisper Swedish Small",
50
  description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
51
  )