sagar007 commited on
Commit
68b4319
·
verified ·
1 Parent(s): e0324df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -17
app.py CHANGED
@@ -4,14 +4,49 @@ from transformers import pipeline
4
  from gtts import gTTS
5
  import os
6
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
7
 
8
  # Load Shuka v1 for speech recognition
9
- shuka_pipe = transformers.pipeline(model='sarvamai/shuka_v1', trust_remote_code=True, device=0, torch_dtype='bfloat16')
 
 
10
 
11
  # Load sarvam-2b for text generation
12
- sarvam_pipe = pipeline(model='sarvamai/sarvam-2b-v0.5', device=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def process_audio_input(audio):
 
 
 
15
  audio, sr = librosa.load(audio, sr=16000)
16
  turns = [
17
  {'role': 'system', 'content': 'Respond naturally and informatively.'},
@@ -21,6 +56,9 @@ def process_audio_input(audio):
21
  return result[0]['generated_text']
22
 
23
  def generate_response(text_input):
 
 
 
24
  response = sarvam_pipe(text_input, max_new_tokens=100, temperature=0.7, repetition_penalty=1.2)[0]['generated_text']
25
  return response
26
 
@@ -30,8 +68,6 @@ def text_to_speech(text, lang='hi'):
30
  return "response.mp3"
31
 
32
  def detect_language(text):
33
- # This is a simplified language detection.
34
- # You might want to use a more robust method in a production environment.
35
  lang_codes = {
36
  'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
37
  'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
@@ -43,20 +79,25 @@ def detect_language(text):
43
  return 'hi' # Default to Hindi for simplicity
44
  return 'en' # Default to English if no Indic script is detected
45
 
 
46
  def indic_language_assistant(audio):
47
- # Transcribe audio input
48
- transcription = process_audio_input(audio)
49
-
50
- # Generate response
51
- response = generate_response(transcription)
52
-
53
- # Detect language
54
- lang = detect_language(response)
55
-
56
- # Convert response to speech
57
- audio_response = text_to_speech(response, lang)
58
-
59
- return transcription, response, audio_response
 
 
 
 
60
 
61
  # Create Gradio interface
62
  iface = gr.Interface(
 
4
  from gtts import gTTS
5
  import os
6
  import gradio as gr
7
+ import torch
8
+ import spaces
9
+
10
+ # Function to safely load pipeline
11
+ def load_pipeline(model_name, **kwargs):
12
+ try:
13
+ return transformers.pipeline(model=model_name, **kwargs)
14
+ except Exception as e:
15
+ print(f"Error loading {model_name} pipeline: {e}")
16
+ return None
17
 
18
  # Load Shuka v1 for speech recognition
19
+ @spaces.GPU
20
+ def load_shuka():
21
+ return load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32)
22
 
23
  # Load sarvam-2b for text generation
24
+ @spaces.GPU
25
+ def load_sarvam():
26
+ return load_pipeline('sarvamai/sarvam-2b-v0.5')
27
+
28
+ try:
29
+ shuka_pipe = load_shuka()
30
+ sarvam_pipe = load_sarvam()
31
+ print("Models loaded successfully on GPU.")
32
+ except RuntimeError as e:
33
+ if "CUDA out of memory" in str(e):
34
+ print("GPU out of memory. Falling back to CPU.")
35
+ shuka_pipe = load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32, device="cpu")
36
+ sarvam_pipe = load_pipeline('sarvamai/sarvam-2b-v0.5', device="cpu")
37
+ elif "CUDA initialization" in str(e):
38
+ print("No GPU available. Using CPU.")
39
+ shuka_pipe = load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32, device="cpu")
40
+ sarvam_pipe = load_pipeline('sarvamai/sarvam-2b-v0.5', device="cpu")
41
+ else:
42
+ print(f"An unexpected error occurred: {e}")
43
+ shuka_pipe = None
44
+ sarvam_pipe = None
45
 
46
  def process_audio_input(audio):
47
+ if shuka_pipe is None:
48
+ return "Error: Shuka v1 model is not available."
49
+
50
  audio, sr = librosa.load(audio, sr=16000)
51
  turns = [
52
  {'role': 'system', 'content': 'Respond naturally and informatively.'},
 
56
  return result[0]['generated_text']
57
 
58
  def generate_response(text_input):
59
+ if sarvam_pipe is None:
60
+ return "Error: sarvam-2b model is not available."
61
+
62
  response = sarvam_pipe(text_input, max_new_tokens=100, temperature=0.7, repetition_penalty=1.2)[0]['generated_text']
63
  return response
64
 
 
68
  return "response.mp3"
69
 
70
  def detect_language(text):
 
 
71
  lang_codes = {
72
  'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
73
  'ml': 'Malayalam', 'mr': 'Marathi', 'or': 'Oriya', 'pa': 'Punjabi',
 
79
  return 'hi' # Default to Hindi for simplicity
80
  return 'en' # Default to English if no Indic script is detected
81
 
82
+ @spaces.GPU
83
  def indic_language_assistant(audio):
84
+ try:
85
+ # Transcribe audio input
86
+ transcription = process_audio_input(audio)
87
+
88
+ # Generate response
89
+ response = generate_response(transcription)
90
+
91
+ # Detect language
92
+ lang = detect_language(response)
93
+
94
+ # Convert response to speech
95
+ audio_response = text_to_speech(response, lang)
96
+
97
+ return transcription, response, audio_response
98
+ except Exception as e:
99
+ error_message = f"An error occurred: {str(e)}"
100
+ return error_message, error_message, None
101
 
102
  # Create Gradio interface
103
  iface = gr.Interface(