ABIDFAYAZ's picture
Update app.py
ed9373e verified
import gradio as gr
from transformers import pipeline
import numpy as np
# Load the pre-trained models for transcription and summarization
asr_model = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-english")
summarization_model = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
# Function to transcribe and summarize
def transcribe_and_summarize(audio):
if audio is None:
return "Error: No audio file provided.", None
# Check if the audio is from a mic recording (tuple) or file (ndarray)
if isinstance(audio, tuple): # Mic recordings are returned as (sample_rate, data)
audio = np.array(audio[1], dtype=np.float32)
transcription = asr_model(audio)["text"]
summary = summarization_model(transcription, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]
return transcription, summary
# Create a Gradio interface
interface = gr.Interface(
fn=transcribe_and_summarize,
inputs=gr.Audio(type="filepath"),
outputs=["text", "text"],
title="Meeting Transcription and Summarization",
description="Upload an audio file or record using the mic to get a transcription and summary."
)
# Launch the interface
interface.launch()