Spaces:

arnabdas8901
/

Find_The_Fake

Sleeping

App Files Files Community

Arnab Das commited on Sep 18, 2024

Commit

85bca98

1 Parent(s): c23ea28

Added transcription interface

Browse files

Files changed (2) hide show

app.py +35 -0
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import torch
 import gradio as gr
 import models as MOD
 import process_data as PD
 model_master = {
     "SSL-AASIST (Trained on ASV-Spoof5)": {"eer_threshold": 3.3330237865448,
@@ -60,6 +61,40 @@ file_proc = gr.Interface(
     cache_examples=True,
     allow_flagging="never",
 )
 with demo:
     gr.TabbedInterface([file_proc], ["Analyze Audio File"])

 import gradio as gr
 import models as MOD
 import process_data as PD
+from transformers import pipeline
 model_master = {
     "SSL-AASIST (Trained on ASV-Spoof5)": {"eer_threshold": 3.3330237865448,
     cache_examples=True,
     allow_flagging="never",
 )
+#####################################################################################
+# For ASR interface
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model="openai/whisper-large-v3",
+    chunk_length_s=30,
+    device="cpu",
+)
+def transcribe(inputs):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    op = pipe(inputs, batch_size=1, generate_kwargs={"task": "transcribe"}, return_timestamps=True)
+    lang = op["language"]
+    text = op["text"]
+    return  lang, text
+transcribe_proc = gr.Interface(
+    fn = transcribe,
+    inputs = [
+        gr.Audio(type="filepath", label="Speech file (<30s)", max_length=30, sources=["microphone", "upload"])
+    ],
+    outputs=[
+        gr.Text(label="Predicted Language", info="Language identification is performed automatically."),
+        gr.Text(label="Predicted transcription", info="Best hypothesis."),
+    ],
+    title="Transcribe Anything.",
+    description=(
+        "Automatactic language identification and transcription service by Whisper Large V3. Upload a .wav or .flac file."
+    ),
+    allow_flagging="never"
+)
 with demo:
     gr.TabbedInterface([file_proc], ["Analyze Audio File"])

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ gradio==3.50.2
 torch==2.3.0
 fairseq @ git+https://github.com/facebookresearch/fairseq.git
 librosa==0.10.1
-numpy==1.24.4

 torch==2.3.0
 fairseq @ git+https://github.com/facebookresearch/fairseq.git
 librosa==0.10.1
+numpy==1.24.4
+git+https://github.com/huggingface/transformers