Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
import gradio as gr | |
import time | |
pipe_fine = pipeline(model="zeihers-mart/whisper-small-swedish-basic", device_map="auto") | |
pipe_raw = pipeline(model="openai/whisper-small", device_map="auto") | |
sa = pipeline('sentiment-analysis', model='marma/bert-base-swedish-cased-sentiment') | |
# force swedish | |
pipe_fine.model.config.forced_decoder_ids = ( | |
pipe_fine.tokenizer.get_decoder_prompt_ids( | |
language="sv", task="transcribe" | |
) | |
) | |
pipe_raw.model.config.forced_decoder_ids = ( | |
pipe_raw.tokenizer.get_decoder_prompt_ids( | |
language="sv", task="transcribe" | |
) | |
) | |
def transcribe(audio): | |
start = time.time() | |
text_sv = pipe_fine(audio)["text"] | |
time_fine = time.time() - start | |
print(f"Fine-tuned: audio transcribed in {time_fine} seconds: {text_sv}") | |
start = time.time() | |
text_raw= pipe_raw(audio)["text"] | |
time_raw = time.time() - start | |
print(f"Raw: audio transcribed in {time_raw} seconds: {text_raw}") | |
sentiment= sa(text_sv) | |
print(f"Sentiment result: {sentiment}") | |
sentiment= sentiment[0]["label"] | |
happy_path = "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e0/SNice.svg/1200px-SNice.svg.png" | |
sad_path = "https://upload.wikimedia.org/wikipedia/commons/thumb/0/06/Face-sad.svg/480px-Face-sad.svg.png" | |
path = happy_path if sentiment == "POSITIVE" else sad_path | |
description = f"The fine-tuned model took {time_fine} seconds while the original Whisper model took {time_raw} seconds.\nThe sentiment was evaluated from the fine-tuned model transcription as {sentiment.lower()}." | |
return text_sv, text_raw, path, description | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(sources=["microphone"], type="filepath"), | |
outputs=[gr.Textbox(label="Fine-tuned transcription"), | |
gr.Textbox(label="Whisper transcription"), | |
gr.Image(label="Sentiment from Fine-tuned transcription", width=250, height=250), | |
gr.Textbox(label="Description")], | |
title="Finetuned Whisper Swedish Small", | |
description="Realtime demo for Swedish speech recognition using a fine-tuned Whisper small model.", | |
) | |
iface.launch() |