Update README.md
Browse files
README.md
CHANGED
@@ -65,35 +65,3 @@ The following hyperparameters were used during training:
|
|
65 |
|
66 |
|
67 |
|
68 |
-
import gradio as gr
|
69 |
-
import torch
|
70 |
-
from transformers import WhisperForConditionalGeneration, WhisperTokenizer
|
71 |
-
|
72 |
-
torch.backends.cudnn.enabled = True
|
73 |
-
|
74 |
-
# Load the speech-to-text model from Hugging Face
|
75 |
-
model_name = "Ranjit/Whisper_v2.0"
|
76 |
-
task = "transcribe"
|
77 |
-
tokenizer = WhisperTokenizer.from_pretrained(model_name, task=task)
|
78 |
-
model = WhisperForConditionalGeneration.from_pretrained(model_name).to("cuda")
|
79 |
-
|
80 |
-
# Define a function to transcribe speech to text
|
81 |
-
def transcribe_audio(audio):
|
82 |
-
input_values = tokenizer(audio, return_tensors="pt").input_values.to("cuda")
|
83 |
-
logits = model(input_values).logits
|
84 |
-
predicted_ids = torch.argmax(logits, dim=-1)
|
85 |
-
transcription = tokenizer.batch_decode(predicted_ids)[0]
|
86 |
-
return transcription
|
87 |
-
|
88 |
-
# Create the Gradio interface
|
89 |
-
gradio_interface = gr.Interface(
|
90 |
-
fn=transcribe_audio,
|
91 |
-
inputs="microphone",
|
92 |
-
outputs="text",
|
93 |
-
capture_session=True, # Leverage GPU acceleration
|
94 |
-
title="Speech-to-Text",
|
95 |
-
description="Transcribe speech to text using a Wav2Vec2 model.",
|
96 |
-
theme="default",
|
97 |
-
)
|
98 |
-
|
99 |
-
gradio_interface.launch(share=True)
|
|
|
65 |
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|