Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

649867e

verified ·

1 Parent(s): 3104f70

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -24

app.py CHANGED Viewed

@@ -1,54 +1,52 @@
-import transformers
 import librosa
-from transformers import pipeline
 from gtts import gTTS
-import os
 import gradio as gr
-import torch
 import spaces
 # Function to safely load pipeline
 def load_pipeline(model_name, **kwargs):
     try:
-        return transformers.pipeline(model=model_name, **kwargs)
     except Exception as e:
         print(f"Error loading {model_name} pipeline: {e}")
         return None
-# Load Shuka v1 for speech recognition
 @spaces.GPU
-def load_shuka():
     try:
-        return load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32)
     except Exception as e:
-        print(f"Error loading Shuka v1: {e}")
-        return None
 # Load sarvam-2b for text generation
 @spaces.GPU
 def load_sarvam():
-    try:
-        return load_pipeline('sarvamai/sarvam-2b-v0.5')
-    except Exception as e:
-        print(f"Error loading sarvam-2b: {e}")
-        return None
 # Attempt to load models
-shuka_pipe = load_shuka()
 sarvam_pipe = load_sarvam()
 def process_audio_input(audio):
-    if shuka_pipe is None:
-        return "Error: Shuka v1 model is not available. Please type your message instead."
     try:
         audio, sr = librosa.load(audio, sr=16000)
-        turns = [
-            {'role': 'system', 'content': 'Respond naturally and informatively.'},
-            {'role': 'user', 'content': '<|audio|>'}
-        ]
-        result = shuka_pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
-        return result[0]['generated_text']
     except Exception as e:
         return f"Error processing audio: {str(e)}. Please type your message instead."
@@ -83,6 +81,7 @@ def detect_language(text):
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
         if input_type == "audio" and audio_input is not None:

+import torch
 import librosa
+from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
 from gtts import gTTS
 import gradio as gr
 import spaces
+# Check for GPU availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
 # Function to safely load pipeline
 def load_pipeline(model_name, **kwargs):
     try:
+        return pipeline(model=model_name, device=device, **kwargs)
     except Exception as e:
         print(f"Error loading {model_name} pipeline: {e}")
         return None
+# Load Whisper model for speech recognition
 @spaces.GPU
+def load_whisper():
     try:
+        processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
+        return processor, model
     except Exception as e:
+        print(f"Error loading Whisper model: {e}")
+        return None, None
 # Load sarvam-2b for text generation
 @spaces.GPU
 def load_sarvam():
+    return load_pipeline('sarvamai/sarvam-2b-v0.5')
 # Attempt to load models
+whisper_processor, whisper_model = load_whisper()
 sarvam_pipe = load_sarvam()
 def process_audio_input(audio):
+    if whisper_processor is None or whisper_model is None:
+        return "Error: Speech recognition model is not available. Please type your message instead."
     try:
         audio, sr = librosa.load(audio, sr=16000)
+        input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(device)
+        predicted_ids = whisper_model.generate(input_features)
+        transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+        return transcription
     except Exception as e:
         return f"Error processing audio: {str(e)}. Please type your message instead."
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
+@spaces.GPU
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
         if input_type == "audio" and audio_input is not None: