Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline, Wav2Vec2ProcessorWithLM | |
from pyannote.audio import Pipeline | |
from librosa import load, resample | |
from rpunct import RestorePuncts | |
asr_model = 'patrickvonplaten/wav2vec2-base-100h-with-lm' | |
processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model) | |
asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder) | |
speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation") | |
rpunct = RestorePuncts() | |
def transcribe(filepath): | |
speech, sampling_rate = load(filepath) | |
if sampling_rate != 16000: | |
speech = resample(speech, sampling_rate, 16000) | |
speaker_output = speaker_segmentation(speech) | |
text = asr(speech, return_timestamps="word") | |
full_text = text['text'].lower() | |
chunks = text['chunks'] | |
diarizaed_output = "" | |
i = 0 | |
for turn, _, speaker in speaker_output.itertracks(yield_label=True): | |
diarized = "" | |
while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end: | |
diarized += chunks[i]['text'].lower() + ' ' | |
i += 1 | |
if diarized != "": | |
diarized = rpunct.punctuate(diarized) | |
diarized_output += "{}: ''{}'' from {:.3f}-{:.3f}\n".format(speaker,diarized,turn.start,turn.end) | |
return diarizaed_output, full_text | |
mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input', optional=False) | |
diarized_transcript = gr.outputs.Textbox(type='auto', label='Diarized Output') | |
full_transcript = gr.outputs.Textbox(type='auto', label='Full Transcript') | |
iface = gr.Interface( | |
theme='huggingface', | |
description='Testing transcription', | |
fn=transcribe, | |
inputs=[mic], | |
outputs=[diarized_transcript, full_transcript] | |
) | |
iface.launch() | |