Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 24, 2024

Commit

879dfdc

verified ·

1 Parent(s): 5d4bcc0

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -197

app.py CHANGED Viewed

@@ -4,12 +4,6 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
 from gtts import gTTS
 import gradio as gr
 import spaces
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 print("Using GPU for operations when available")
@@ -18,10 +12,9 @@ print("Using GPU for operations when available")
 def load_pipeline(model_name, **kwargs):
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
-        logger.info(f"Loading {model_name} on device: {device}")
         return pipeline(model=model_name, device=device, **kwargs)
     except Exception as e:
-        logger.error(f"Error loading {model_name} pipeline: {e}")
         return None
 # Load Whisper model for speech recognition within a GPU-decorated function
@@ -29,30 +22,18 @@ def load_pipeline(model_name, **kwargs):
 def load_whisper():
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
-        logger.info(f"Loading Whisper model on device: {device}")
         processor = WhisperProcessor.from_pretrained("openai/whisper-small")
         model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
         return processor, model
     except Exception as e:
-        logger.error(f"Error loading Whisper model: {e}")
         return None, None
 # Load sarvam-2b for text generation within a GPU-decorated function
 @spaces.GPU
 def load_sarvam():
-    logger.info("Loading sarvam-2b model")
     return load_pipeline('sarvamai/sarvam-2b-v0.5')
-# Global variables for models
-whisper_processor, whisper_model = load_whisper()
-sarvam_pipe = load_sarvam()
-# Check if models are loaded
-if whisper_processor is None or whisper_model is None:
-    logger.error("Whisper model failed to load")
-if sarvam_pipe is None:
-    logger.error("Sarvam model failed to load")
 # Process audio input within a GPU-decorated function
 @spaces.GPU
 def process_audio_input(audio, whisper_processor, whisper_model):
@@ -70,29 +51,15 @@ def process_audio_input(audio, whisper_processor, whisper_model):
 # Generate response within a GPU-decorated function
 @spaces.GPU
-def generate_response(transcription, sarvam_pipe):
-    if sarvam_pipe is None:
-        return "Error: Text generation model is not available."
-    try:
-        # Prepare the prompt
-        prompt = f"Human: {transcription}\n\nAssistant:"
-        # Generate response using the sarvam-2b model
-        response = sarvam_pipe(prompt, max_length=200, num_return_sequences=1, do_sample=True, temperature=0.7)[0]['generated_text']
-        # Extract the assistant's response
-        assistant_response = response.split("Assistant:")[-1].strip()
-        return assistant_response
-    except Exception as e:
-        return f"Error generating response: {str(e)}"
-# Text-to-speech function
 def text_to_speech(text, lang='hi'):
     try:
         # Use a better TTS engine for Indic languages
         if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
             tts = gTTS(text=text, lang=lang, tld='co.in')  # Use Indian TLD
         else:
             tts = gTTS(text=text, lang=lang)
@@ -103,7 +70,7 @@ def text_to_speech(text, lang='hi'):
         print(f"Error in text-to-speech: {str(e)}")
         return None
-# Language detection function
 def detect_language(text):
     lang_codes = {
         'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
@@ -120,21 +87,31 @@ def detect_language(text):
             if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text):  # Devanagari script
                 return 'hi'
         return 'en'  # Default to English if no Indic script is detected
 @spaces.GPU
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
         if input_type == "audio" and audio_input is not None:
-            if whisper_processor is None or whisper_model is None:
-                return "Error: Speech recognition model is not available.", "", None
             transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
         elif input_type == "text" and text_input:
             transcription = text_input
         else:
-            return "Please provide either audio or text input.", "", None
-        if sarvam_pipe is None:
-            return transcription, "Error: Text generation model is not available.", None
         response = generate_response(transcription, sarvam_pipe)
         lang = detect_language(response)
@@ -142,157 +119,24 @@ def indic_language_assistant(input_type, audio_input, text_input):
         return transcription, response, audio_response
     except Exception as e:
-        logger.error(f"An error occurred in indic_language_assistant: {str(e)}")
-        return str(e), "An error occurred while processing your request.", None
-# Updated Custom CSS
-custom_css = """
-body {
-    background-color: #0b0f19;
-    color: #e2e8f0;
-    font-family: 'Arial', sans-serif;
-}
-#custom-header {
-    text-align: center;
-    padding: 20px 0;
-    background-color: #1a202c;
-    margin-bottom: 20px;
-    border-radius: 10px;
-}
-#custom-header h1 {
-    font-size: 2.5rem;
-    margin-bottom: 0.5rem;
-}
-#custom-header h1 .blue {
-    color: #60a5fa;
-}
-#custom-header h1 .pink {
-    color: #f472b6;
-}
-#custom-header h2 {
-    font-size: 1.5rem;
-    color: #94a3b8;
-}
-.suggestions {
-    display: flex;
-    justify-content: center;
-    flex-wrap: wrap;
-    gap: 1rem;
-    margin: 20px 0;
-}
-.suggestion {
-    background-color: #1e293b;
-    border-radius: 0.5rem;
-    padding: 1rem;
-    display: flex;
-    align-items: center;
-    transition: transform 0.3s ease;
-    width: 200px;
-}
-.suggestion:hover {
-    transform: translateY(-5px);
-}
-.suggestion-icon {
-    font-size: 1.5rem;
-    margin-right: 1rem;
-    background-color: #2d3748;
-    padding: 0.5rem;
-    border-radius: 50%;
-}
-.gradio-container {
-    max-width: 100% !important;
-}
-#component-0, #component-1, #component-2 {
-    max-width: 100% !important;
-}
-footer {
-    text-align: center;
-    margin-top: 2rem;
-    color: #64748b;
-}
-"""
-# Custom HTML for the header
-custom_header = """
-<div id="custom-header">
-    <h1>
-        <span class="blue">Hello,</span>
-        <span class="pink">User</span>
-    </h1>
-    <h2>How can I help you today?</h2>
-</div>
-"""
-# Custom HTML for suggestions
-custom_suggestions = """
-<div class="suggestions">
-    <div class="suggestion">
-        <span class="suggestion-icon">🎤</span>
-        <p>Speak in any Indic language</p>
-    </div>
-    <div class="suggestion">
-        <span class="suggestion-icon">⌨️</span>
-        <p>Type in any Indic language</p>
-    </div>
-    <div class="suggestion">
-        <span class="suggestion-icon">🤖</span>
-        <p>Get AI-generated responses</p>
-    </div>
-    <div class="suggestion">
-        <span class="suggestion-icon">🔊</span>
-        <p>Listen to audio responses</p>
-    </div>
-</div>
-"""
 # Create Gradio interface
-with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
-    body_background_fill="#0b0f19",
-    body_text_color="#e2e8f0",
-    button_primary_background_fill="#3b82f6",
-    button_primary_background_fill_hover="#2563eb",
-    button_primary_text_color="white",
-    block_title_text_color="#94a3b8",
-    block_label_text_color="#94a3b8",
-)) as iface:
-    gr.HTML(custom_header)
-    gr.HTML(custom_suggestions)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### Indic Assistant")
-        with gr.Column(scale=1, min_width=100):
-            gr.Button("Try Advanced Features", size="sm")
-    input_type = gr.Radio(["audio", "text"], label="Input Type", value="audio")
-    audio_input = gr.Audio(type="filepath", label="Speak (if audio input selected)")
-    text_input = gr.Textbox(label="Type your message (if text input selected)")
-    submit_btn = gr.Button("Submit")
-    output_transcription = gr.Textbox(label="Transcription/Input")
-    output_response = gr.Textbox(label="Generated Response")
-    output_audio = gr.Audio(label="Audio Response")
-    submit_btn.click(
-        fn=indic_language_assistant,
-        inputs=[input_type, audio_input, text_input],
-        outputs=[output_transcription, output_response, output_audio]
-    )
-    gr.HTML("<footer>Powered by Indic Language AI</footer>")
 # Launch the app
 iface.launch()

 from gtts import gTTS
 import gradio as gr
 import spaces
 print("Using GPU for operations when available")
 def load_pipeline(model_name, **kwargs):
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
         return pipeline(model=model_name, device=device, **kwargs)
     except Exception as e:
+        print(f"Error loading {model_name} pipeline: {e}")
         return None
 # Load Whisper model for speech recognition within a GPU-decorated function
 def load_whisper():
     try:
         device = 0 if torch.cuda.is_available() else "cpu"
         processor = WhisperProcessor.from_pretrained("openai/whisper-small")
         model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
         return processor, model
     except Exception as e:
+        print(f"Error loading Whisper model: {e}")
         return None, None
 # Load sarvam-2b for text generation within a GPU-decorated function
 @spaces.GPU
 def load_sarvam():
     return load_pipeline('sarvamai/sarvam-2b-v0.5')
 # Process audio input within a GPU-decorated function
 @spaces.GPU
 def process_audio_input(audio, whisper_processor, whisper_model):
 # Generate response within a GPU-decorated function
 @spaces.GPU
 def text_to_speech(text, lang='hi'):
     try:
         # Use a better TTS engine for Indic languages
         if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
+            # You might want to use a different TTS library here
+            # For example, you could use the Google Cloud Text-to-Speech API
+            # or a specialized Indic language TTS library
+            # This is a placeholder for a better Indic TTS solution
             tts = gTTS(text=text, lang=lang, tld='co.in')  # Use Indian TLD
         else:
             tts = gTTS(text=text, lang=lang)
         print(f"Error in text-to-speech: {str(e)}")
         return None
+# Replace the existing detect_language function with this improved version
 def detect_language(text):
     lang_codes = {
         'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
             if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text):  # Devanagari script
                 return 'hi'
         return 'en'  # Default to English if no Indic script is detected
+@spaces.GPU
+def generate_response(transcription, sarvam_pipe):
+    if sarvam_pipe is None:
+        return "Error: Text generation model is not available."
+    try:
+        # Generate response using the sarvam-2b model
+        response = sarvam_pipe(transcription, max_length=100, num_return_sequences=1)[0]['generated_text']
+        return response
+    except Exception as e:
+        return f"Error generating response: {str(e)}"
 @spaces.GPU
 def indic_language_assistant(input_type, audio_input, text_input):
     try:
+        # Load models within the GPU-decorated function
+        whisper_processor, whisper_model = load_whisper()
+        sarvam_pipe = load_sarvam()
         if input_type == "audio" and audio_input is not None:
             transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
         elif input_type == "text" and text_input:
             transcription = text_input
         else:
+            return "Please provide either audio or text input.", "No input provided.", None
         response = generate_response(transcription, sarvam_pipe)
         lang = detect_language(response)
         return transcription, response, audio_response
     except Exception as e:
+        error_message = f"An error occurred: {str(e)}"
+        return error_message, error_message, None
 # Create Gradio interface
+iface = gr.Interface(
+    fn=indic_language_assistant,
+    inputs=[
+        gr.Radio(["audio", "text"], label="Input Type", value="audio"),
+        gr.Audio(type="filepath", label="Speak (if audio input selected)"),
+        gr.Textbox(label="Type your message (if text input selected)")
+    ],
+    outputs=[
+        gr.Textbox(label="Transcription/Input"),
+        gr.Textbox(label="Generated Response"),
+        gr.Audio(label="Audio Response")
+    ],
+    title="Indic Language Virtual Assistant",
+    description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio."
+)
 # Launch the app
 iface.launch()