from transformers import pipeline
import gradio as gr

p = pipeline("automatic-speech-recognition")

def transcribe(audio, state=""):
  """ Speech to text function using pipeline"""
  text = p(audio)["text"]
  state += text + " "
  return state, state
  
gr.Interface(fn=transcribe, 
  inputs=[gr.inputs.Audio(source="upload", type="filepath", label="Record/ Drop audio"), "state"], 
  outputs=["textbox", "state"], 
  title="Automatic Speech Recognition test", 
  description="Enable the recognition spoken language into text by computers.", 
  theme="huggingface", 
  live=True).launch()