Spaces:
Runtime error
Runtime error
File size: 2,212 Bytes
483362e eb21f26 483362e 8dbb96b d311eed 8dbb96b 1e61fa7 6b4a273 8dbb96b 1e61fa7 6b4a273 8dbb96b 483362e eb21f26 0cc869c eb21f26 0cc869c eb21f26 a64dd7c a8e533c a64dd7c 03eb644 e93ec4d eb21f26 f1e1b93 eb21f26 483362e 84d64d1 0cc869c d311eed eb21f26 0cc869c 483362e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from transformers import pipeline
import gradio as gr
import time
pipe_fine = pipeline(model="zeihers-mart/whisper-small-swedish-basic", device_map="auto")
pipe_raw = pipeline(model="openai/whisper-small", device_map="auto")
sa = pipeline('sentiment-analysis', model='marma/bert-base-swedish-cased-sentiment')
# force swedish
pipe_fine.model.config.forced_decoder_ids = (
pipe_fine.tokenizer.get_decoder_prompt_ids(
language="sv", task="transcribe"
)
)
pipe_raw.model.config.forced_decoder_ids = (
pipe_raw.tokenizer.get_decoder_prompt_ids(
language="sv", task="transcribe"
)
)
def transcribe(audio):
start = time.time()
text_sv = pipe_fine(audio)["text"]
time_fine = time.time() - start
print(f"Fine-tuned: audio transcribed in {time_fine} seconds: {text_sv}")
start = time.time()
text_raw= pipe_raw(audio)["text"]
time_raw = time.time() - start
print(f"Raw: audio transcribed in {time_raw} seconds: {text_raw}")
sentiment= sa(text_sv)
print(f"Sentiment result: {sentiment}")
sentiment= sentiment[0]["label"]
happy_path = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/SNice.svg/1200px-SNice.svg.png"
sad_path = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/42/Sad_smiley_yellow_simple.svg/1024px-Sad_smiley_yellow_simple.svg.png"
path = happy_path if sentiment == "POSITIVE" else sad_path
description = f"The fine-tuned model took {time_fine} seconds while the original Whisper model took {time_raw} seconds.\nThe sentiment was evaluated from the fine-tuned model transcription as {sentiment.lower()}."
return text_sv, text_raw, path, description
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone"], type="filepath"),
outputs=[gr.Textbox(label="Fine-tuned transcription"),
gr.Textbox(label="Whisper transcription"),
gr.Image(label="Sentiment from Fine-tuned transcription", width=250, height=250),
gr.Textbox(label="Description")],
title="Finetuned Whisper Swedish Small",
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.",
)
iface.launch() |