sagar007 commited on
Commit
879dfdc
·
verified ·
1 Parent(s): 5d4bcc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -197
app.py CHANGED
@@ -4,12 +4,6 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
4
  from gtts import gTTS
5
  import gradio as gr
6
  import spaces
7
- import logging
8
-
9
- # Set up logging
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
 
14
  print("Using GPU for operations when available")
15
 
@@ -18,10 +12,9 @@ print("Using GPU for operations when available")
18
  def load_pipeline(model_name, **kwargs):
19
  try:
20
  device = 0 if torch.cuda.is_available() else "cpu"
21
- logger.info(f"Loading {model_name} on device: {device}")
22
  return pipeline(model=model_name, device=device, **kwargs)
23
  except Exception as e:
24
- logger.error(f"Error loading {model_name} pipeline: {e}")
25
  return None
26
 
27
  # Load Whisper model for speech recognition within a GPU-decorated function
@@ -29,30 +22,18 @@ def load_pipeline(model_name, **kwargs):
29
  def load_whisper():
30
  try:
31
  device = 0 if torch.cuda.is_available() else "cpu"
32
- logger.info(f"Loading Whisper model on device: {device}")
33
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
34
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
35
  return processor, model
36
  except Exception as e:
37
- logger.error(f"Error loading Whisper model: {e}")
38
  return None, None
39
 
40
  # Load sarvam-2b for text generation within a GPU-decorated function
41
  @spaces.GPU
42
  def load_sarvam():
43
- logger.info("Loading sarvam-2b model")
44
  return load_pipeline('sarvamai/sarvam-2b-v0.5')
45
 
46
- # Global variables for models
47
- whisper_processor, whisper_model = load_whisper()
48
- sarvam_pipe = load_sarvam()
49
-
50
- # Check if models are loaded
51
- if whisper_processor is None or whisper_model is None:
52
- logger.error("Whisper model failed to load")
53
- if sarvam_pipe is None:
54
- logger.error("Sarvam model failed to load")
55
-
56
  # Process audio input within a GPU-decorated function
57
  @spaces.GPU
58
  def process_audio_input(audio, whisper_processor, whisper_model):
@@ -70,29 +51,15 @@ def process_audio_input(audio, whisper_processor, whisper_model):
70
 
71
  # Generate response within a GPU-decorated function
72
  @spaces.GPU
73
- def generate_response(transcription, sarvam_pipe):
74
- if sarvam_pipe is None:
75
- return "Error: Text generation model is not available."
76
-
77
- try:
78
- # Prepare the prompt
79
- prompt = f"Human: {transcription}\n\nAssistant:"
80
-
81
- # Generate response using the sarvam-2b model
82
- response = sarvam_pipe(prompt, max_length=200, num_return_sequences=1, do_sample=True, temperature=0.7)[0]['generated_text']
83
-
84
- # Extract the assistant's response
85
- assistant_response = response.split("Assistant:")[-1].strip()
86
-
87
- return assistant_response
88
- except Exception as e:
89
- return f"Error generating response: {str(e)}"
90
-
91
- # Text-to-speech function
92
  def text_to_speech(text, lang='hi'):
93
  try:
94
  # Use a better TTS engine for Indic languages
95
  if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
 
 
 
 
 
96
  tts = gTTS(text=text, lang=lang, tld='co.in') # Use Indian TLD
97
  else:
98
  tts = gTTS(text=text, lang=lang)
@@ -103,7 +70,7 @@ def text_to_speech(text, lang='hi'):
103
  print(f"Error in text-to-speech: {str(e)}")
104
  return None
105
 
106
- # Language detection function
107
  def detect_language(text):
108
  lang_codes = {
109
  'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
@@ -120,21 +87,31 @@ def detect_language(text):
120
  if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text): # Devanagari script
121
  return 'hi'
122
  return 'en' # Default to English if no Indic script is detected
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  @spaces.GPU
125
  def indic_language_assistant(input_type, audio_input, text_input):
126
  try:
 
 
 
 
127
  if input_type == "audio" and audio_input is not None:
128
- if whisper_processor is None or whisper_model is None:
129
- return "Error: Speech recognition model is not available.", "", None
130
  transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
131
  elif input_type == "text" and text_input:
132
  transcription = text_input
133
  else:
134
- return "Please provide either audio or text input.", "", None
135
-
136
- if sarvam_pipe is None:
137
- return transcription, "Error: Text generation model is not available.", None
138
 
139
  response = generate_response(transcription, sarvam_pipe)
140
  lang = detect_language(response)
@@ -142,157 +119,24 @@ def indic_language_assistant(input_type, audio_input, text_input):
142
 
143
  return transcription, response, audio_response
144
  except Exception as e:
145
- logger.error(f"An error occurred in indic_language_assistant: {str(e)}")
146
- return str(e), "An error occurred while processing your request.", None
147
-
148
- # Updated Custom CSS
149
- custom_css = """
150
- body {
151
- background-color: #0b0f19;
152
- color: #e2e8f0;
153
- font-family: 'Arial', sans-serif;
154
- }
155
-
156
- #custom-header {
157
- text-align: center;
158
- padding: 20px 0;
159
- background-color: #1a202c;
160
- margin-bottom: 20px;
161
- border-radius: 10px;
162
- }
163
-
164
- #custom-header h1 {
165
- font-size: 2.5rem;
166
- margin-bottom: 0.5rem;
167
- }
168
-
169
- #custom-header h1 .blue {
170
- color: #60a5fa;
171
- }
172
-
173
- #custom-header h1 .pink {
174
- color: #f472b6;
175
- }
176
-
177
- #custom-header h2 {
178
- font-size: 1.5rem;
179
- color: #94a3b8;
180
- }
181
-
182
- .suggestions {
183
- display: flex;
184
- justify-content: center;
185
- flex-wrap: wrap;
186
- gap: 1rem;
187
- margin: 20px 0;
188
- }
189
-
190
- .suggestion {
191
- background-color: #1e293b;
192
- border-radius: 0.5rem;
193
- padding: 1rem;
194
- display: flex;
195
- align-items: center;
196
- transition: transform 0.3s ease;
197
- width: 200px;
198
- }
199
-
200
- .suggestion:hover {
201
- transform: translateY(-5px);
202
- }
203
-
204
- .suggestion-icon {
205
- font-size: 1.5rem;
206
- margin-right: 1rem;
207
- background-color: #2d3748;
208
- padding: 0.5rem;
209
- border-radius: 50%;
210
- }
211
-
212
- .gradio-container {
213
- max-width: 100% !important;
214
- }
215
-
216
- #component-0, #component-1, #component-2 {
217
- max-width: 100% !important;
218
- }
219
-
220
- footer {
221
- text-align: center;
222
- margin-top: 2rem;
223
- color: #64748b;
224
- }
225
- """
226
-
227
- # Custom HTML for the header
228
- custom_header = """
229
- <div id="custom-header">
230
- <h1>
231
- <span class="blue">Hello,</span>
232
- <span class="pink">User</span>
233
- </h1>
234
- <h2>How can I help you today?</h2>
235
- </div>
236
- """
237
-
238
- # Custom HTML for suggestions
239
- custom_suggestions = """
240
- <div class="suggestions">
241
- <div class="suggestion">
242
- <span class="suggestion-icon">🎤</span>
243
- <p>Speak in any Indic language</p>
244
- </div>
245
- <div class="suggestion">
246
- <span class="suggestion-icon">⌨️</span>
247
- <p>Type in any Indic language</p>
248
- </div>
249
- <div class="suggestion">
250
- <span class="suggestion-icon">🤖</span>
251
- <p>Get AI-generated responses</p>
252
- </div>
253
- <div class="suggestion">
254
- <span class="suggestion-icon">🔊</span>
255
- <p>Listen to audio responses</p>
256
- </div>
257
- </div>
258
- """
259
-
260
  # Create Gradio interface
261
- with gr.Blocks(css=custom_css, theme=gr.themes.Base().set(
262
- body_background_fill="#0b0f19",
263
- body_text_color="#e2e8f0",
264
- button_primary_background_fill="#3b82f6",
265
- button_primary_background_fill_hover="#2563eb",
266
- button_primary_text_color="white",
267
- block_title_text_color="#94a3b8",
268
- block_label_text_color="#94a3b8",
269
- )) as iface:
270
- gr.HTML(custom_header)
271
- gr.HTML(custom_suggestions)
272
-
273
- with gr.Row():
274
- with gr.Column(scale=1):
275
- gr.Markdown("### Indic Assistant")
276
- with gr.Column(scale=1, min_width=100):
277
- gr.Button("Try Advanced Features", size="sm")
278
-
279
- input_type = gr.Radio(["audio", "text"], label="Input Type", value="audio")
280
- audio_input = gr.Audio(type="filepath", label="Speak (if audio input selected)")
281
- text_input = gr.Textbox(label="Type your message (if text input selected)")
282
-
283
- submit_btn = gr.Button("Submit")
284
-
285
- output_transcription = gr.Textbox(label="Transcription/Input")
286
- output_response = gr.Textbox(label="Generated Response")
287
- output_audio = gr.Audio(label="Audio Response")
288
-
289
- submit_btn.click(
290
- fn=indic_language_assistant,
291
- inputs=[input_type, audio_input, text_input],
292
- outputs=[output_transcription, output_response, output_audio]
293
- )
294
-
295
- gr.HTML("<footer>Powered by Indic Language AI</footer>")
296
 
297
  # Launch the app
298
  iface.launch()
 
4
  from gtts import gTTS
5
  import gradio as gr
6
  import spaces
 
 
 
 
 
 
7
 
8
  print("Using GPU for operations when available")
9
 
 
12
  def load_pipeline(model_name, **kwargs):
13
  try:
14
  device = 0 if torch.cuda.is_available() else "cpu"
 
15
  return pipeline(model=model_name, device=device, **kwargs)
16
  except Exception as e:
17
+ print(f"Error loading {model_name} pipeline: {e}")
18
  return None
19
 
20
  # Load Whisper model for speech recognition within a GPU-decorated function
 
22
  def load_whisper():
23
  try:
24
  device = 0 if torch.cuda.is_available() else "cpu"
 
25
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
26
  model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
27
  return processor, model
28
  except Exception as e:
29
+ print(f"Error loading Whisper model: {e}")
30
  return None, None
31
 
32
  # Load sarvam-2b for text generation within a GPU-decorated function
33
  @spaces.GPU
34
  def load_sarvam():
 
35
  return load_pipeline('sarvamai/sarvam-2b-v0.5')
36
 
 
 
 
 
 
 
 
 
 
 
37
  # Process audio input within a GPU-decorated function
38
  @spaces.GPU
39
  def process_audio_input(audio, whisper_processor, whisper_model):
 
51
 
52
  # Generate response within a GPU-decorated function
53
  @spaces.GPU
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def text_to_speech(text, lang='hi'):
55
  try:
56
  # Use a better TTS engine for Indic languages
57
  if lang in ['hi', 'bn', 'gu', 'kn', 'ml', 'mr', 'or', 'pa', 'ta', 'te']:
58
+ # You might want to use a different TTS library here
59
+ # For example, you could use the Google Cloud Text-to-Speech API
60
+ # or a specialized Indic language TTS library
61
+
62
+ # This is a placeholder for a better Indic TTS solution
63
  tts = gTTS(text=text, lang=lang, tld='co.in') # Use Indian TLD
64
  else:
65
  tts = gTTS(text=text, lang=lang)
 
70
  print(f"Error in text-to-speech: {str(e)}")
71
  return None
72
 
73
+ # Replace the existing detect_language function with this improved version
74
  def detect_language(text):
75
  lang_codes = {
76
  'bn': 'Bengali', 'gu': 'Gujarati', 'hi': 'Hindi', 'kn': 'Kannada',
 
87
  if any(ord(char) >= 0x0900 and ord(char) <= 0x097F for char in text): # Devanagari script
88
  return 'hi'
89
  return 'en' # Default to English if no Indic script is detected
90
+ @spaces.GPU
91
+ def generate_response(transcription, sarvam_pipe):
92
+ if sarvam_pipe is None:
93
+ return "Error: Text generation model is not available."
94
+
95
+ try:
96
+ # Generate response using the sarvam-2b model
97
+ response = sarvam_pipe(transcription, max_length=100, num_return_sequences=1)[0]['generated_text']
98
+ return response
99
+ except Exception as e:
100
+ return f"Error generating response: {str(e)}"
101
 
102
  @spaces.GPU
103
  def indic_language_assistant(input_type, audio_input, text_input):
104
  try:
105
+ # Load models within the GPU-decorated function
106
+ whisper_processor, whisper_model = load_whisper()
107
+ sarvam_pipe = load_sarvam()
108
+
109
  if input_type == "audio" and audio_input is not None:
 
 
110
  transcription = process_audio_input(audio_input, whisper_processor, whisper_model)
111
  elif input_type == "text" and text_input:
112
  transcription = text_input
113
  else:
114
+ return "Please provide either audio or text input.", "No input provided.", None
 
 
 
115
 
116
  response = generate_response(transcription, sarvam_pipe)
117
  lang = detect_language(response)
 
119
 
120
  return transcription, response, audio_response
121
  except Exception as e:
122
+ error_message = f"An error occurred: {str(e)}"
123
+ return error_message, error_message, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  # Create Gradio interface
125
+ iface = gr.Interface(
126
+ fn=indic_language_assistant,
127
+ inputs=[
128
+ gr.Radio(["audio", "text"], label="Input Type", value="audio"),
129
+ gr.Audio(type="filepath", label="Speak (if audio input selected)"),
130
+ gr.Textbox(label="Type your message (if text input selected)")
131
+ ],
132
+ outputs=[
133
+ gr.Textbox(label="Transcription/Input"),
134
+ gr.Textbox(label="Generated Response"),
135
+ gr.Audio(label="Audio Response")
136
+ ],
137
+ title="Indic Language Virtual Assistant",
138
+ description="Speak or type in any supported Indic language or English. The assistant will respond in text and audio."
139
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  # Launch the app
142
  iface.launch()