Spaces:

archit11
/

shuka_demo

Running on Zero

Meetnote-Support commited on Oct 31, 2024

Commit

b7c7005

verified ·

1 Parent(s): 47c53f6

run

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ def transcribe_and_respond(audio_file):
         print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
         turns = [
-            {'role': 'system', 'content': 'Respond naturally and informatively.'},
             {'role': 'user', 'content': '<|audio|>'}
         ]
@@ -30,7 +30,7 @@ def transcribe_and_respond(audio_file):
         print(f"Initial turns: {turns}")
         # Call the model with the audio and prompt
-        output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
         # Debug: Print the final output from the model
         print(f"Model output: {output}")

         print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
         turns = [
+            {'role': 'system', 'content': 'You are a transcription agent. What ever the speaker has spoken should be outputted as text. Nothing less, nothing more.'},
             {'role': 'user', 'content': '<|audio|>'}
         ]
         print(f"Initial turns: {turns}")
         # Call the model with the audio and prompt
+        output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=1024)
         # Debug: Print the final output from the model
         print(f"Model output: {output}")