techysanoj commited on
Commit
4d14671
·
1 Parent(s): 6ab3f9b

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +6 -8
  2. app.py +16 -36
  3. run.py +17 -0
README.md CHANGED
@@ -1,13 +1,11 @@
 
1
  ---
2
- title: Avishkaar Check
3
- emoji: 🏃
4
- colorFrom: yellow
5
- colorTo: red
6
  sdk: gradio
7
  sdk_version: 3.35.2
8
- app_file: app.py
9
  pinned: false
10
- license: openrail
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+
2
  ---
3
+ title: automatic-speech-recognition
4
+ emoji: 🔥
5
+ colorFrom: indigo
6
+ colorTo: indigo
7
  sdk: gradio
8
  sdk_version: 3.35.2
9
+ app_file: run.py
10
  pinned: false
 
11
  ---
 
 
app.py CHANGED
@@ -1,37 +1,17 @@
1
- import torch
2
- import torchaudio
3
- from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
4
  import gradio as gr
5
-
6
- # Load pre-trained model and tokenizer
7
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h")
8
- tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-960h")
9
-
10
- def transcribe_speech(audio_file):
11
- # Load and convert audio file to waveform
12
- waveform, _ = torchaudio.load(audio_file)
13
-
14
- # Preprocess waveform
15
- input_values = tokenizer(waveform, return_tensors="pt").input_values
16
-
17
- # Perform inference
18
- with torch.no_grad():
19
- logits = model(input_values).logits
20
-
21
- # Get predicted transcription
22
- predicted_ids = torch.argmax(logits, dim=-1)
23
- transcription = tokenizer.batch_decode(predicted_ids)[0]
24
-
25
- return transcription
26
-
27
- # Define Gradio interface
28
- def speech_recognition(audio_file):
29
- transcription = transcribe_speech(audio_file)
30
- return transcription
31
-
32
- inputs = gr.inputs.Audio(type="file", label="Upload Audio File")
33
- outputs = gr.outputs.Textbox(label="Transcription")
34
- interface = gr.Interface(fn=speech_recognition, inputs=inputs, outputs=outputs)
35
-
36
- # Run the Gradio interface
37
- interface.launch()
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+
4
+ # save your HF API token from https:/hf.co/settings/tokens as an env variable to avoid rate limiting
5
+ auth_token = os.getenv("auth_token")
6
+
7
+ # automatically load the interface from a HF model
8
+ # you can remove the api_key parameter if you don't care about rate limiting.
9
+ demo = gr.Interface.load(
10
+ "huggingface/facebook/wav2vec2-base-960h",
11
+ title="Speech-to-text",
12
+ inputs="mic",
13
+ description="Let me try to guess what you're saying!",
14
+ api_key=auth_token
15
+ )
16
+
17
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
run.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+
4
+ # save your HF API token from https:/hf.co/settings/tokens as an env variable to avoid rate limiting
5
+ auth_token = os.getenv("auth_token")
6
+
7
+ # automatically load the interface from a HF model
8
+ # you can remove the api_key parameter if you don't care about rate limiting.
9
+ demo = gr.load(
10
+ "huggingface/facebook/wav2vec2-base-960h",
11
+ title="Speech-to-text",
12
+ inputs="mic",
13
+ description="Let me try to guess what you're saying!",
14
+ hf_token=auth_token
15
+ )
16
+
17
+ demo.launch()