GiorgiSekhniashvili's picture
Update app.py
7f125dd
raw
history blame contribute delete
784 Bytes
import gradio as gr
from transformers import WhisperForConditionalGeneration, AutoProcessor
model_name = "GiorgiSekhniashvili/whisper-tiny-ka-01"
processor = AutoProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name)
def predict(audio):
sr, waveform = audio
input_values = processor(waveform, sampling_rate=16_000, return_tensors="pt")
res = model.generate(
input_values["input_features"],
forced_decoder_ids=forced_decoder_ids,
max_new_tokens=448,
)
return processor.batch_decode(res, skip_special_tokens=True)
mic = gr.Audio(source="microphone", type="numpy", label="Speak here...")
demo = gr.Interface(predict, mic, "audio")
if __name__ == "__main__":
demo.launch()