sagar007 commited on
Commit
649867e
·
verified ·
1 Parent(s): 3104f70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -24
app.py CHANGED
@@ -1,54 +1,52 @@
1
- import transformers
2
  import librosa
3
- from transformers import pipeline
4
  from gtts import gTTS
5
- import os
6
  import gradio as gr
7
- import torch
8
  import spaces
9
 
 
 
 
 
10
  # Function to safely load pipeline
11
  def load_pipeline(model_name, **kwargs):
12
  try:
13
- return transformers.pipeline(model=model_name, **kwargs)
14
  except Exception as e:
15
  print(f"Error loading {model_name} pipeline: {e}")
16
  return None
17
 
18
- # Load Shuka v1 for speech recognition
19
  @spaces.GPU
20
- def load_shuka():
21
  try:
22
- return load_pipeline('sarvamai/shuka_v1', trust_remote_code=True, torch_dtype=torch.float32)
 
 
23
  except Exception as e:
24
- print(f"Error loading Shuka v1: {e}")
25
- return None
26
 
27
  # Load sarvam-2b for text generation
28
  @spaces.GPU
29
  def load_sarvam():
30
- try:
31
- return load_pipeline('sarvamai/sarvam-2b-v0.5')
32
- except Exception as e:
33
- print(f"Error loading sarvam-2b: {e}")
34
- return None
35
 
36
  # Attempt to load models
37
- shuka_pipe = load_shuka()
38
  sarvam_pipe = load_sarvam()
39
 
40
  def process_audio_input(audio):
41
- if shuka_pipe is None:
42
- return "Error: Shuka v1 model is not available. Please type your message instead."
43
 
44
  try:
45
  audio, sr = librosa.load(audio, sr=16000)
46
- turns = [
47
- {'role': 'system', 'content': 'Respond naturally and informatively.'},
48
- {'role': 'user', 'content': '<|audio|>'}
49
- ]
50
- result = shuka_pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
51
- return result[0]['generated_text']
52
  except Exception as e:
53
  return f"Error processing audio: {str(e)}. Please type your message instead."
54
 
@@ -83,6 +81,7 @@ def detect_language(text):
83
  return 'hi' # Default to Hindi for simplicity
84
  return 'en' # Default to English if no Indic script is detected
85
 
 
86
  def indic_language_assistant(input_type, audio_input, text_input):
87
  try:
88
  if input_type == "audio" and audio_input is not None:
 
1
+ import torch
2
  import librosa
3
+ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration
4
  from gtts import gTTS
 
5
  import gradio as gr
 
6
  import spaces
7
 
8
+ # Check for GPU availability
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ print(f"Using device: {device}")
11
+
12
  # Function to safely load pipeline
13
  def load_pipeline(model_name, **kwargs):
14
  try:
15
+ return pipeline(model=model_name, device=device, **kwargs)
16
  except Exception as e:
17
  print(f"Error loading {model_name} pipeline: {e}")
18
  return None
19
 
20
+ # Load Whisper model for speech recognition
21
  @spaces.GPU
22
+ def load_whisper():
23
  try:
24
+ processor = WhisperProcessor.from_pretrained("openai/whisper-small")
25
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
26
+ return processor, model
27
  except Exception as e:
28
+ print(f"Error loading Whisper model: {e}")
29
+ return None, None
30
 
31
  # Load sarvam-2b for text generation
32
  @spaces.GPU
33
  def load_sarvam():
34
+ return load_pipeline('sarvamai/sarvam-2b-v0.5')
 
 
 
 
35
 
36
  # Attempt to load models
37
+ whisper_processor, whisper_model = load_whisper()
38
  sarvam_pipe = load_sarvam()
39
 
40
  def process_audio_input(audio):
41
+ if whisper_processor is None or whisper_model is None:
42
+ return "Error: Speech recognition model is not available. Please type your message instead."
43
 
44
  try:
45
  audio, sr = librosa.load(audio, sr=16000)
46
+ input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(device)
47
+ predicted_ids = whisper_model.generate(input_features)
48
+ transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
49
+ return transcription
 
 
50
  except Exception as e:
51
  return f"Error processing audio: {str(e)}. Please type your message instead."
52
 
 
81
  return 'hi' # Default to Hindi for simplicity
82
  return 'en' # Default to English if no Indic script is detected
83
 
84
+ @spaces.GPU
85
  def indic_language_assistant(input_type, audio_input, text_input):
86
  try:
87
  if input_type == "audio" and audio_input is not None: