sagar007 commited on
Commit
c14fb36
·
verified ·
1 Parent(s): 3f52776

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -4,12 +4,13 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
4
  from gtts import gTTS
5
  import gradio as gr
6
 
7
- print("Using CPU for all operations")
8
 
9
  # Function to safely load pipeline
10
  def load_pipeline(model_name, **kwargs):
11
  try:
12
- return pipeline(model=model_name, device="cpu", **kwargs)
 
13
  except Exception as e:
14
  print(f"Error loading {model_name} pipeline: {e}")
15
  return None
@@ -17,8 +18,9 @@ def load_pipeline(model_name, **kwargs):
17
  # Load Whisper model for speech recognition
18
  def load_whisper():
19
  try:
 
20
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
21
- model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
22
  return processor, model
23
  except Exception as e:
24
  print(f"Error loading Whisper model: {e}")
@@ -38,7 +40,7 @@ def process_audio_input(audio):
38
 
39
  try:
40
  audio, sr = librosa.load(audio, sr=16000)
41
- input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features
42
  predicted_ids = whisper_model.generate(input_features)
43
  transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
44
  return transcription
@@ -99,7 +101,7 @@ iface = gr.Interface(
99
  fn=indic_language_assistant,
100
  inputs=[
101
  gr.Radio(["audio", "text"], label="Input Type", value="audio"),
102
- gr.Audio(source="microphone", type="filepath", label="Speak (if audio input selected)"),
103
  gr.Textbox(label="Type your message (if text input selected)")
104
  ],
105
  outputs=[
@@ -112,4 +114,4 @@ iface = gr.Interface(
112
  )
113
 
114
  # Launch the app
115
- iface.launch()
 
4
  from gtts import gTTS
5
  import gradio as gr
6
 
7
+ print("Using GPU for all operations")
8
 
9
  # Function to safely load pipeline
10
  def load_pipeline(model_name, **kwargs):
11
  try:
12
+ device = 0 if torch.cuda.is_available() else "cpu"
13
+ return pipeline(model=model_name, device=device, **kwargs)
14
  except Exception as e:
15
  print(f"Error loading {model_name} pipeline: {e}")
16
  return None
 
18
  # Load Whisper model for speech recognition
19
  def load_whisper():
20
  try:
21
+ device = 0 if torch.cuda.is_available() else "cpu"
22
  processor = WhisperProcessor.from_pretrained("openai/whisper-small")
23
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
24
  return processor, model
25
  except Exception as e:
26
  print(f"Error loading Whisper model: {e}")
 
40
 
41
  try:
42
  audio, sr = librosa.load(audio, sr=16000)
43
+ input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(whisper_model.device)
44
  predicted_ids = whisper_model.generate(input_features)
45
  transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
46
  return transcription
 
101
  fn=indic_language_assistant,
102
  inputs=[
103
  gr.Radio(["audio", "text"], label="Input Type", value="audio"),
104
+ gr.Audio(type="filepath", label="Speak (if audio input selected)"),
105
  gr.Textbox(label="Type your message (if text input selected)")
106
  ],
107
  outputs=[
 
114
  )
115
 
116
  # Launch the app
117
+ iface.launch()