Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,14 +4,49 @@ from transformers import pipeline
|
|
4 |
from gtts import gTTS
|
5 |
import os
|
6 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
# Load Shuka v1 for speech recognition
|
9 |
-
|
|
|
|
|
10 |
|
11 |
# Load sarvam-2b for text generation
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
def process_audio_input(audio):
|
|
|
|
|
|
|
15 |
audio, sr = librosa.load(audio, sr=16000)
|
16 |
turns = [
|
17 |
{'role': 'system', 'content': 'Respond naturally and informatively.'},
|
@@ -21,6 +56,9 @@ def process_audio_input(audio):
|
|
21 |
return result[0]['generated_text']
|
22 |
|
23 |
def generate_response(text_input):
|
|
|
|
|
|
|
24 |
response = sarvam_pipe(text_input, max_new_tokens=100, temperature=0.7, repetition_penalty=1.2)[0]['generated_text']
|
25 |
return response
|
26 |
|
@@ -30,8 +68,6 @@ def text_to_speech(text, lang='hi'):
|
|
30 |
return "response.mp3"
|
31 |
|
32 |
def detect_language(text):
|
33 |
-
# This is a simplified language detection.
|
34 |
-
# You might want to use a more robust method in a production environment.
|
35 |
lang_codes = {
|
36 |
'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
|
37 |
'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
|
@@ -43,20 +79,25 @@ def detect_language(text):
|
|
43 |
return 'hi' # Default to Hindi for simplicity
|
44 |
return 'en' # Default to English if no Indic script is detected
|
45 |
|
|
|
46 |
def indic_language_assistant(audio):
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
60 |
|
61 |
# Create Gradio interface
|
62 |
iface = gr.Interface(
|
|
|
4 |
from gtts import gTTS
|
5 |
import os
|
6 |
import gradio as gr
|
7 |
+
import torch
|
8 |
+
import spaces
|
9 |
+
|
10 |
+
# Function to safely load pipeline
|
11 |
+
def load_pipeline(model_name, **kwargs):
|
12 |
+
try:
|
13 |
+
return transformers.pipeline(model=model_name, **kwargs)
|
14 |
+
except Exception as e:
|
15 |
+
print(f"Error loading {model_name} pipeline: {e}")
|
16 |
+
return None
|
17 |
|
18 |
# Load Shuka v1 for speech recognition
|
19 |
+
@spaces.GPU
|
20 |
+
def load_shuka():
|
21 |
+
return load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32)
|
22 |
|
23 |
# Load sarvam-2b for text generation
|
24 |
+
@spaces.GPU
|
25 |
+
def load_sarvam():
|
26 |
+
return load_pipeline('sarvamai/sarvam-2b-v0.5')
|
27 |
+
|
28 |
+
try:
|
29 |
+
shuka_pipe = load_shuka()
|
30 |
+
sarvam_pipe = load_sarvam()
|
31 |
+
print("Models loaded successfully on GPU.")
|
32 |
+
except RuntimeError as e:
|
33 |
+
if "CUDA out of memory" in str(e):
|
34 |
+
print("GPU out of memory. Falling back to CPU.")
|
35 |
+
shuka_pipe = load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32, device="cpu")
|
36 |
+
sarvam_pipe = load_pipeline('sarvamai/sarvam-2b-v0.5', device="cpu")
|
37 |
+
elif "CUDA initialization" in str(e):
|
38 |
+
print("No GPU available. Using CPU.")
|
39 |
+
shuka_pipe = load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32, device="cpu")
|
40 |
+
sarvam_pipe = load_pipeline('sarvamai/sarvam-2b-v0.5', device="cpu")
|
41 |
+
else:
|
42 |
+
print(f"An unexpected error occurred: {e}")
|
43 |
+
shuka_pipe = None
|
44 |
+
sarvam_pipe = None
|
45 |
|
46 |
def process_audio_input(audio):
|
47 |
+
if shuka_pipe is None:
|
48 |
+
return "Error: Shuka v1 model is not available."
|
49 |
+
|
50 |
audio, sr = librosa.load(audio, sr=16000)
|
51 |
turns = [
|
52 |
{'role': 'system', 'content': 'Respond naturally and informatively.'},
|
|
|
56 |
return result[0]['generated_text']
|
57 |
|
58 |
def generate_response(text_input):
|
59 |
+
if sarvam_pipe is None:
|
60 |
+
return "Error: sarvam-2b model is not available."
|
61 |
+
|
62 |
response = sarvam_pipe(text_input, max_new_tokens=100, temperature=0.7, repetition_penalty=1.2)[0]['generated_text']
|
63 |
return response
|
64 |
|
|
|
68 |
return "response.mp3"
|
69 |
|
70 |
def detect_language(text):
|
|
|
|
|
71 |
lang_codes = {
|
72 |
'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
|
73 |
'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
|
|
|
79 |
return 'hi' # Default to Hindi for simplicity
|
80 |
return 'en' # Default to English if no Indic script is detected
|
81 |
|
82 |
+
@spaces.GPU
|
83 |
def indic_language_assistant(audio):
|
84 |
+
try:
|
85 |
+
# Transcribe audio input
|
86 |
+
transcription = process_audio_input(audio)
|
87 |
+
|
88 |
+
# Generate response
|
89 |
+
response = generate_response(transcription)
|
90 |
+
|
91 |
+
# Detect language
|
92 |
+
lang = detect_language(response)
|
93 |
+
|
94 |
+
# Convert response to speech
|
95 |
+
audio_response = text_to_speech(response, lang)
|
96 |
+
|
97 |
+
return transcription, response, audio_response
|
98 |
+
except Exception as e:
|
99 |
+
error_message = f"An error occurred: {str(e)}"
|
100 |
+
return error_message, error_message, None
|
101 |
|
102 |
# Create Gradio interface
|
103 |
iface = gr.Interface(
|