import gradio as gr from transformers import pipeline import numpy as np pipe = pipeline("automatic-speech-recognition", model="Thaihoa/whisper-small-vi") def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "vietnamese", }, # update with the language you've fine-tuned on chunk_length_s=30, batch_size=8, ) return output["text"] import gradio as gr demo = gr.Blocks() mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.components.Textbox(), ) file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.components.Textbox(), ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch(debug=True)