Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
# Load the pre-trained models for transcription and summarization | |
asr_model = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english") | |
summarization_model = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
# Function to transcribe and summarize | |
def transcribe_and_summarize(audio): | |
if audio is None: | |
return "Error: No audio file provided.", None | |
# Check if the audio is from a mic recording (tuple) or file (ndarray) | |
if isinstance(audio, tuple): # Mic recordings are returned as (sample_rate, data) | |
audio = np.array(audio[1], dtype=np.float32) | |
transcription = asr_model(audio)["text"] | |
summary = summarization_model(transcription, max_length=130, min_length=30, do_sample=False)[0]["summary_text"] | |
return transcription, summary | |
# Create a Gradio interface | |
interface = gr.Interface( | |
fn=transcribe_and_summarize, | |
inputs=gr.Audio(type="filepath"), | |
outputs=["text", "text"], | |
title="Meeting Transcription and Summarization", | |
description="Upload an audio file or record using the mic to get a transcription and summary." | |
) | |
# Launch the interface | |
interface.launch() | |