techysanoj commited on
Commit
9412793
·
1 Parent(s): a68a51b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +31 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import soundfile as sf
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
4
+
5
+ # Load the pre-trained model and tokenizer
6
+ model_name = "facebook/wav2vec2-large-960h-lv60-self"
7
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
8
+ model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english")
9
+
10
+ # Define the speech recognition function
11
+ def transcribe_audio(audio):
12
+ audio_path = "audio.wav"
13
+ sf.write(audio_path, audio, samplerate=16000)
14
+ transcriptions = model.transcribe(audio_path)
15
+ return transcriptions["transcription"]
16
+
17
+ # Set up the Gradio interface
18
+ audio_input = gr.inputs.Audio(source="microphone", type="numpy")
19
+ text_output = gr.outputs.Textbox()
20
+
21
+ interface = gr.Interface(
22
+ fn=transcribe_audio,
23
+ inputs=audio_input,
24
+ outputs=text_output,
25
+ title="Speech Recognition",
26
+ description="Transcribe speech in real-time.",
27
+ server_port=8000,
28
+ )
29
+
30
+ if __name__ == "__main__":
31
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio==2.3.0
2
+ torch
3
+ transformers
4
+ torchaudio
5
+ huggingsound
6
+ soundfile