|
import gradio as gr |
|
|
|
from transformers import WhisperForConditionalGeneration, AutoProcessor |
|
model_name = "GiorgiSekhniashvili/whisper-tiny-ka-01" |
|
|
|
processor = AutoProcessor.from_pretrained(model_name) |
|
model = WhisperForConditionalGeneration.from_pretrained(model_name) |
|
|
|
def predict(audio): |
|
sr, waveform = audio |
|
input_values = processor(waveform, sampling_rate=16_000, return_tensors="pt") |
|
res = model.generate( |
|
input_values["input_features"], |
|
forced_decoder_ids=forced_decoder_ids, |
|
max_new_tokens=448, |
|
) |
|
return processor.batch_decode(res, skip_special_tokens=True) |
|
|
|
mic = gr.Audio(source="microphone", type="numpy", label="Speak here...") |
|
|
|
demo = gr.Interface(predict, mic, "audio") |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|
|
|