Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

68b4319

verified ·

1 Parent(s): e0324df

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -17

app.py CHANGED Viewed

@@ -4,14 +4,49 @@ from transformers import pipeline
 from gtts import gTTS
 import os
 import gradio as gr
 # Load Shuka v1 for speech recognition
-shuka_pipe = transformers.pipeline(model='sarvamai/shuka_v1', trust_remote_code=True, device=0, torch_dtype='bfloat16')
 # Load sarvam-2b for text generation
-sarvam_pipe = pipeline(model='sarvamai/sarvam-2b-v0.5', device=0)
 def process_audio_input(audio):
     audio, sr = librosa.load(audio, sr=16000)
     turns = [
         {'role': 'system', 'content': 'Respond naturally and informatively.'},
@@ -21,6 +56,9 @@ def process_audio_input(audio):
     return result[0]['generated_text']
 def generate_response(text_input):
     response = sarvam_pipe(text_input, max_new_tokens=100, temperature=0.7, repetition_penalty=1.2)[0]['generated_text']
     return response
@@ -30,8 +68,6 @@ def text_to_speech(text, lang='hi'):
     return "response.mp3"
 def detect_language(text):
-    # This is a simplified language detection.
-    # You might want to use a more robust method in a production environment.
     lang_codes = {
         'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
         'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
@@ -43,20 +79,25 @@ def detect_language(text):
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
 def indic_language_assistant(audio):
-    # Transcribe audio input
-    transcription = process_audio_input(audio)
-    # Generate response
-    response = generate_response(transcription)
-    # Detect language
-    lang = detect_language(response)
-    # Convert response to speech
-    audio_response = text_to_speech(response, lang)
-    return transcription, response, audio_response
 # Create Gradio interface
 iface = gr.Interface(

 from gtts import gTTS
 import os
 import gradio as gr
+import torch
+import spaces
+# Function to safely load pipeline
+def load_pipeline(model_name, **kwargs):
+    try:
+        return transformers.pipeline(model=model_name, **kwargs)
+    except Exception as e:
+        print(f"Error loading {model_name} pipeline: {e}")
+        return None
 # Load Shuka v1 for speech recognition
+@spaces.GPU
+def load_shuka():
+    return load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32)
 # Load sarvam-2b for text generation
+@spaces.GPU
+def load_sarvam():
+    return load_pipeline('sarvamai/sarvam-2b-v0.5')
+try:
+    shuka_pipe = load_shuka()
+    sarvam_pipe = load_sarvam()
+    print("Models loaded successfully on GPU.")
+except RuntimeError as e:
+    if "CUDA out of memory" in str(e):
+        print("GPU out of memory. Falling back to CPU.")
+        shuka_pipe = load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32, device="cpu")
+        sarvam_pipe = load_pipeline('sarvamai/sarvam-2b-v0.5', device="cpu")
+    elif "CUDA initialization" in str(e):
+        print("No GPU available. Using CPU.")
+        shuka_pipe = load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32, device="cpu")
+        sarvam_pipe = load_pipeline('sarvamai/sarvam-2b-v0.5', device="cpu")
+    else:
+        print(f"An unexpected error occurred: {e}")
+        shuka_pipe = None
+        sarvam_pipe = None
 def process_audio_input(audio):
+    if shuka_pipe is None:
+        return "Error: Shuka v1 model is not available."
     audio, sr = librosa.load(audio, sr=16000)
     turns = [
         {'role': 'system', 'content': 'Respond naturally and informatively.'},
     return result[0]['generated_text']
 def generate_response(text_input):
+    if sarvam_pipe is None:
+        return "Error: sarvam-2b model is not available."
     response = sarvam_pipe(text_input, max_new_tokens=100, temperature=0.7, repetition_penalty=1.2)[0]['generated_text']
     return response
     return "response.mp3"
 def detect_language(text):
     lang_codes = {
         'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
         'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
             return 'hi'  # Default to Hindi for simplicity
     return 'en'  # Default to English if no Indic script is detected
+@spaces.GPU
 def indic_language_assistant(audio):
+    try:
+        # Transcribe audio input
+        transcription = process_audio_input(audio)
+        # Generate response
+        response = generate_response(transcription)
+        # Detect language
+        lang = detect_language(response)
+        # Convert response to speech
+        audio_response = text_to_speech(response, lang)
+        return transcription, response, audio_response
+    except Exception as e:
+        error_message = f"An error occurred: {str(e)}"
+        return error_message, error_message, None
 # Create Gradio interface
 iface = gr.Interface(