archit11 commited on
Commit
d649fba
·
verified ·
1 Parent(s): 9008874

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -11
app.py CHANGED
@@ -7,28 +7,42 @@ import spaces
7
  @spaces.GPU(duration=120)
8
  def transcribe_and_respond(audio_file):
9
  try:
 
10
  pipe = transformers.pipeline(
11
- model='sarvamai/shuka_v1',
12
- trust_remote_code=True,
13
- device=0,
14
- torch_dtype=torch.bfloat16
15
- )
16
-
 
17
  audio, sr = librosa.load(audio_file, sr=16000)
18
-
 
 
 
 
19
  turns = [
20
  {'role': 'system', 'content': 'Respond naturally and informatively.'},
21
  {'role': 'user', 'content': '<|audio|>'}
22
  ]
23
-
 
 
 
 
24
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
25
- print(output)
26
-
 
 
 
27
  return output
28
 
29
  except Exception as e:
30
  return f"Error: {str(e)}"
31
 
 
32
  iface = gr.Interface(
33
  fn=transcribe_and_respond,
34
  inputs=gr.Audio(sources="microphone", type="filepath"), # Accept audio input from microphone
@@ -38,5 +52,6 @@ iface = gr.Interface(
38
  live=True # Enable live processing
39
  )
40
 
 
41
  if __name__ == "__main__":
42
- iface.launch()
 
7
  @spaces.GPU(duration=120)
8
  def transcribe_and_respond(audio_file):
9
  try:
10
+ # Load the model pipeline
11
  pipe = transformers.pipeline(
12
+ model='sarvamai/shuka_v1',
13
+ trust_remote_code=True,
14
+ device=0,
15
+ torch_dtype=torch.bfloat16
16
+ )
17
+
18
+ # Load the audio file
19
  audio, sr = librosa.load(audio_file, sr=16000)
20
+
21
+ # Print the path of the audio file
22
+ print(f"Audio file path: {audio_file}")
23
+
24
+ # Prepare turns with a placeholder for the audio
25
  turns = [
26
  {'role': 'system', 'content': 'Respond naturally and informatively.'},
27
  {'role': 'user', 'content': '<|audio|>'}
28
  ]
29
+
30
+ # Print the constructed prompt
31
+ print(f"Constructed prompt: {turns}")
32
+
33
+ # Run the pipeline with the audio and constructed prompt
34
  output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
35
+
36
+ # Print the output from the model
37
+ print(f"Model output: {output}")
38
+
39
+ # Return the output for the Gradio interface
40
  return output
41
 
42
  except Exception as e:
43
  return f"Error: {str(e)}"
44
 
45
+ # Set up the Gradio interface
46
  iface = gr.Interface(
47
  fn=transcribe_and_respond,
48
  inputs=gr.Audio(sources="microphone", type="filepath"), # Accept audio input from microphone
 
52
  live=True # Enable live processing
53
  )
54
 
55
+ # Launch the interface
56
  if __name__ == "__main__":
57
+ iface.launch()