Arnab Das commited on
Commit
85bca98
·
1 Parent(s): c23ea28

Added transcription interface

Browse files
Files changed (2) hide show
  1. app.py +35 -0
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
  import gradio as gr
3
  import models as MOD
4
  import process_data as PD
 
5
 
6
  model_master = {
7
  "SSL-AASIST (Trained on ASV-Spoof5)": {"eer_threshold": 3.3330237865448,
@@ -60,6 +61,40 @@ file_proc = gr.Interface(
60
  cache_examples=True,
61
  allow_flagging="never",
62
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  with demo:
65
  gr.TabbedInterface([file_proc], ["Analyze Audio File"])
 
2
  import gradio as gr
3
  import models as MOD
4
  import process_data as PD
5
+ from transformers import pipeline
6
 
7
  model_master = {
8
  "SSL-AASIST (Trained on ASV-Spoof5)": {"eer_threshold": 3.3330237865448,
 
61
  cache_examples=True,
62
  allow_flagging="never",
63
  )
64
+ #####################################################################################
65
+ # For ASR interface
66
+ pipe = pipeline(
67
+ task="automatic-speech-recognition",
68
+ model="openai/whisper-large-v3",
69
+ chunk_length_s=30,
70
+ device="cpu",
71
+ )
72
+
73
+ def transcribe(inputs):
74
+ if inputs is None:
75
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
76
+
77
+ op = pipe(inputs, batch_size=1, generate_kwargs={"task": "transcribe"}, return_timestamps=True)
78
+ lang = op["language"]
79
+ text = op["text"]
80
+
81
+ return lang, text
82
+
83
+ transcribe_proc = gr.Interface(
84
+ fn = transcribe,
85
+ inputs = [
86
+ gr.Audio(type="filepath", label="Speech file (<30s)", max_length=30, sources=["microphone", "upload"])
87
+ ],
88
+ outputs=[
89
+ gr.Text(label="Predicted Language", info="Language identification is performed automatically."),
90
+ gr.Text(label="Predicted transcription", info="Best hypothesis."),
91
+ ],
92
+ title="Transcribe Anything.",
93
+ description=(
94
+ "Automatactic language identification and transcription service by Whisper Large V3. Upload a .wav or .flac file."
95
+ ),
96
+ allow_flagging="never"
97
+ )
98
 
99
  with demo:
100
  gr.TabbedInterface([file_proc], ["Analyze Audio File"])
requirements.txt CHANGED
@@ -2,4 +2,5 @@ gradio==3.50.2
2
  torch==2.3.0
3
  fairseq @ git+https://github.com/facebookresearch/fairseq.git
4
  librosa==0.10.1
5
- numpy==1.24.4
 
 
2
  torch==2.3.0
3
  fairseq @ git+https://github.com/facebookresearch/fairseq.git
4
  librosa==0.10.1
5
+ numpy==1.24.4
6
+ git+https://github.com/huggingface/transformers