Spaces:

sagar007
/

shuka_audio

Sleeping

App Files Files Community

sagar007 commited on Aug 23, 2024

Commit

c14fb36

verified ·

1 Parent(s): 3f52776

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -6

app.py CHANGED Viewed

@@ -4,12 +4,13 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
 from gtts import gTTS
 import gradio as gr
-print("Using CPU for all operations")
 # Function to safely load pipeline
 def load_pipeline(model_name, **kwargs):
     try:
-        return pipeline(model=model_name, device="cpu", **kwargs)
     except Exception as e:
         print(f"Error loading {model_name} pipeline: {e}")
         return None
@@ -17,8 +18,9 @@ def load_pipeline(model_name, **kwargs):
 # Load Whisper model for speech recognition
 def load_whisper():
     try:
         processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
         return processor, model
     except Exception as e:
         print(f"Error loading Whisper model: {e}")
@@ -38,7 +40,7 @@ def process_audio_input(audio):
     try:
         audio, sr = librosa.load(audio, sr=16000)
-        input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features
         predicted_ids = whisper_model.generate(input_features)
         transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
         return transcription
@@ -99,7 +101,7 @@ iface = gr.Interface(
     fn=indic_language_assistant,
     inputs=[
         gr.Radio(["audio", "text"], label="Input Type", value="audio"),
-        gr.Audio(source="microphone", type="filepath", label="Speak (if audio input selected)"),
         gr.Textbox(label="Type your message (if text input selected)")
     ],
     outputs=[
@@ -112,4 +114,4 @@ iface = gr.Interface(
 )
 # Launch the app
-iface.launch()

 from gtts import gTTS
 import gradio as gr
+print("Using GPU for all operations")
 # Function to safely load pipeline
 def load_pipeline(model_name, **kwargs):
     try:
+        device = 0 if torch.cuda.is_available() else "cpu"
+        return pipeline(model=model_name, device=device, **kwargs)
     except Exception as e:
         print(f"Error loading {model_name} pipeline: {e}")
         return None
 # Load Whisper model for speech recognition
 def load_whisper():
     try:
+        device = 0 if torch.cuda.is_available() else "cpu"
         processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+        model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
         return processor, model
     except Exception as e:
         print(f"Error loading Whisper model: {e}")
     try:
         audio, sr = librosa.load(audio, sr=16000)
+        input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(whisper_model.device)
         predicted_ids = whisper_model.generate(input_features)
         transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
         return transcription
     fn=indic_language_assistant,
     inputs=[
         gr.Radio(["audio", "text"], label="Input Type", value="audio"),
+        gr.Audio(type="filepath", label="Speak (if audio input selected)"),
         gr.Textbox(label="Type your message (if text input selected)")
     ],
     outputs=[
 )
 # Launch the app
+iface.launch()