Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,13 @@ from transformers import pipeline, WhisperProcessor, WhisperForConditionalGenera
|
|
4 |
from gtts import gTTS
|
5 |
import gradio as gr
|
6 |
|
7 |
-
print("Using
|
8 |
|
9 |
# Function to safely load pipeline
|
10 |
def load_pipeline(model_name, **kwargs):
|
11 |
try:
|
12 |
-
|
|
|
13 |
except Exception as e:
|
14 |
print(f"Error loading {model_name} pipeline: {e}")
|
15 |
return None
|
@@ -17,8 +18,9 @@ def load_pipeline(model_name, **kwargs):
|
|
17 |
# Load Whisper model for speech recognition
|
18 |
def load_whisper():
|
19 |
try:
|
|
|
20 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
21 |
-
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
|
22 |
return processor, model
|
23 |
except Exception as e:
|
24 |
print(f"Error loading Whisper model: {e}")
|
@@ -38,7 +40,7 @@ def process_audio_input(audio):
|
|
38 |
|
39 |
try:
|
40 |
audio, sr = librosa.load(audio, sr=16000)
|
41 |
-
input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features
|
42 |
predicted_ids = whisper_model.generate(input_features)
|
43 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
44 |
return transcription
|
@@ -99,7 +101,7 @@ iface = gr.Interface(
|
|
99 |
fn=indic_language_assistant,
|
100 |
inputs=[
|
101 |
gr.Radio(["audio", "text"], label="Input Type", value="audio"),
|
102 |
-
gr.Audio(
|
103 |
gr.Textbox(label="Type your message (if text input selected)")
|
104 |
],
|
105 |
outputs=[
|
@@ -112,4 +114,4 @@ iface = gr.Interface(
|
|
112 |
)
|
113 |
|
114 |
# Launch the app
|
115 |
-
iface.launch()
|
|
|
4 |
from gtts import gTTS
|
5 |
import gradio as gr
|
6 |
|
7 |
+
print("Using GPU for all operations")
|
8 |
|
9 |
# Function to safely load pipeline
|
10 |
def load_pipeline(model_name, **kwargs):
|
11 |
try:
|
12 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
13 |
+
return pipeline(model=model_name, device=device, **kwargs)
|
14 |
except Exception as e:
|
15 |
print(f"Error loading {model_name} pipeline: {e}")
|
16 |
return None
|
|
|
18 |
# Load Whisper model for speech recognition
|
19 |
def load_whisper():
|
20 |
try:
|
21 |
+
device = 0 if torch.cuda.is_available() else "cpu"
|
22 |
processor = WhisperProcessor.from_pretrained("openai/whisper-small")
|
23 |
+
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small").to(device)
|
24 |
return processor, model
|
25 |
except Exception as e:
|
26 |
print(f"Error loading Whisper model: {e}")
|
|
|
40 |
|
41 |
try:
|
42 |
audio, sr = librosa.load(audio, sr=16000)
|
43 |
+
input_features = whisper_processor(audio, sampling_rate=sr, return_tensors="pt").input_features.to(whisper_model.device)
|
44 |
predicted_ids = whisper_model.generate(input_features)
|
45 |
transcription = whisper_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
46 |
return transcription
|
|
|
101 |
fn=indic_language_assistant,
|
102 |
inputs=[
|
103 |
gr.Radio(["audio", "text"], label="Input Type", value="audio"),
|
104 |
+
gr.Audio(type="filepath", label="Speak (if audio input selected)"),
|
105 |
gr.Textbox(label="Type your message (if text input selected)")
|
106 |
],
|
107 |
outputs=[
|
|
|
114 |
)
|
115 |
|
116 |
# Launch the app
|
117 |
+
iface.launch()
|