archit11 commited on
Commit
ab07d9e
·
verified ·
1 Parent(s): b37983d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -10
app.py CHANGED
@@ -3,6 +3,7 @@ import librosa
3
  import gradio as gr
4
  import spaces
5
 
 
6
  pipe = transformers.pipeline(
7
  model='sarvamai/shuka_v1',
8
  trust_remote_code=True,
@@ -12,17 +13,28 @@ pipe = transformers.pipeline(
12
 
13
  @spaces.GPU(duration=120)
14
  def transcribe_and_respond(audio_file):
15
- audio, sr = librosa.load(audio_file, sr=16000)
16
-
17
- turns = [
18
- {'role': 'system', 'content': 'Respond naturally and informatively.'},
19
- {'role': 'user', 'content': ''}
20
- ]
21
-
22
- response = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
23
-
24
- return response
 
 
 
 
 
 
 
 
 
 
25
 
 
26
  iface = gr.Interface(
27
  fn=transcribe_and_respond,
28
  inputs=gr.Audio(sources="microphone", type="filepath"), # Use the microphone for audio input
@@ -31,4 +43,5 @@ iface = gr.Interface(
31
  description="Record your voice, and the model will respond naturally and informatively."
32
  )
33
 
 
34
  iface.launch()
 
3
  import gradio as gr
4
  import spaces
5
 
6
+ # Load the model pipeline on GPU:0
7
  pipe = transformers.pipeline(
8
  model='sarvamai/shuka_v1',
9
  trust_remote_code=True,
 
13
 
14
  @spaces.GPU(duration=120)
15
  def transcribe_and_respond(audio_file):
16
+ try:
17
+ # Check if the audio file is valid and exists
18
+ if audio_file is None or not isinstance(audio_file, str):
19
+ raise ValueError("Invalid audio file input.")
20
+
21
+ # Load the audio using librosa
22
+ audio, sr = librosa.load(audio_file, sr=16000)
23
+
24
+ # Prepare the conversation turns
25
+ turns = [
26
+ {'role': 'system', 'content': 'Respond naturally and informatively.'},
27
+ {'role': 'user', 'content': ''}
28
+ ]
29
+
30
+ # Run inference with the pipeline
31
+ response = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
32
+
33
+ return response
34
+ except Exception as e:
35
+ return f"Error processing audio: {str(e)}"
36
 
37
+ # Create the Gradio interface with microphone input
38
  iface = gr.Interface(
39
  fn=transcribe_and_respond,
40
  inputs=gr.Audio(sources="microphone", type="filepath"), # Use the microphone for audio input
 
43
  description="Record your voice, and the model will respond naturally and informatively."
44
  )
45
 
46
+ # Launch the Gradio app
47
  iface.launch()