asahi417 commited on
Commit
1222a68
·
1 Parent(s): 807995a
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -50,6 +50,7 @@ def transcribe(inputs: str,
50
  num_speakers: float,
51
  min_speakers: float,
52
  max_speakers: float,
 
53
  add_silence_end: float,
54
  add_silence_start: float):
55
  if inputs is None:
@@ -63,6 +64,7 @@ def transcribe(inputs: str,
63
  num_speakers=int(num_speakers) if num_speakers != 0 else None,
64
  min_speakers=int(min_speakers) if min_speakers != 0 else None,
65
  max_speakers=int(max_speakers) if max_speakers != 0 else None,
 
66
  add_silence_end=add_silence_end if add_silence_end != 0 else None,
67
  add_silence_start=add_silence_start if add_silence_start != 0 else None
68
  )
@@ -77,8 +79,8 @@ description = (f"Transcribe and diarize long-form microphone or audio inputs wit
77
  f"Kotoba-Whisper [{model_name}](https://huggingface.co/{model_name}).")
78
  title = f"Audio Transcription and Diarization with {os.path.basename(model_name)}"
79
  shared_config = {"fn": transcribe, "title": title, "description": description, "allow_flagging": "never", "examples": [
80
- [example_file, True, 0, 0, 0, 0.5, 0.5],
81
- [example_file, True, 4, 0, 0, 0.5, 0.5]
82
  ]}
83
  o_upload = gr.Markdown()
84
  o_mic = gr.Markdown()
@@ -92,6 +94,7 @@ i_upload = gr.Interface(
92
  gr.Slider(0, 10, label="num speakers (set 0 for auto-detect mode)", value=0, step=1),
93
  gr.Slider(0, 10, label="min speakers (set 0 for auto-detect mode)", value=0, step=1),
94
  gr.Slider(0, 10, label="max speakers (set 0 for auto-detect mode)", value=0, step=1),
 
95
  gr.Slider(0, 0.5, label="silence at the end", value=0.5, step=0.05),
96
  gr.Slider(0, 0.5, label="silence at the start", value=0.5, step=0.05),
97
  ],
@@ -105,6 +108,7 @@ i_mic = gr.Interface(
105
  gr.Slider(0, 10, label="num speakers (set 0 for auto-detect mode)", value=0, step=1),
106
  gr.Slider(0, 10, label="min speakers (set 0 for auto-detect mode)", value=0, step=1),
107
  gr.Slider(0, 10, label="max speakers (set 0 for auto-detect mode)", value=0, step=1),
 
108
  gr.Slider(0, 0.5, label="silence at the end", value=0.5, step=0.05),
109
  gr.Slider(0, 0.5, label="silence at the start", value=0.5, step=0.05),
110
  ],
 
50
  num_speakers: float,
51
  min_speakers: float,
52
  max_speakers: float,
53
+ chunk_length_s: float,
54
  add_silence_end: float,
55
  add_silence_start: float):
56
  if inputs is None:
 
64
  num_speakers=int(num_speakers) if num_speakers != 0 else None,
65
  min_speakers=int(min_speakers) if min_speakers != 0 else None,
66
  max_speakers=int(max_speakers) if max_speakers != 0 else None,
67
+ chunk_length_s=int(chunk_length_s) if chunk_length_s != 30 else None,
68
  add_silence_end=add_silence_end if add_silence_end != 0 else None,
69
  add_silence_start=add_silence_start if add_silence_start != 0 else None
70
  )
 
79
  f"Kotoba-Whisper [{model_name}](https://huggingface.co/{model_name}).")
80
  title = f"Audio Transcription and Diarization with {os.path.basename(model_name)}"
81
  shared_config = {"fn": transcribe, "title": title, "description": description, "allow_flagging": "never", "examples": [
82
+ [example_file, True, 0, 0, 0, 30, 0.5, 0.5],
83
+ [example_file, True, 4, 0, 0, 30, 0.5, 0.5]
84
  ]}
85
  o_upload = gr.Markdown()
86
  o_mic = gr.Markdown()
 
94
  gr.Slider(0, 10, label="num speakers (set 0 for auto-detect mode)", value=0, step=1),
95
  gr.Slider(0, 10, label="min speakers (set 0 for auto-detect mode)", value=0, step=1),
96
  gr.Slider(0, 10, label="max speakers (set 0 for auto-detect mode)", value=0, step=1),
97
+ gr.Slider(5, 30, label="chunk length for ASR", value=30, step=1),
98
  gr.Slider(0, 0.5, label="silence at the end", value=0.5, step=0.05),
99
  gr.Slider(0, 0.5, label="silence at the start", value=0.5, step=0.05),
100
  ],
 
108
  gr.Slider(0, 10, label="num speakers (set 0 for auto-detect mode)", value=0, step=1),
109
  gr.Slider(0, 10, label="min speakers (set 0 for auto-detect mode)", value=0, step=1),
110
  gr.Slider(0, 10, label="max speakers (set 0 for auto-detect mode)", value=0, step=1),
111
+ gr.Slider(5, 30, label="chunk length for ASR", value=30, step=1),
112
  gr.Slider(0, 0.5, label="silence at the end", value=0.5, step=0.05),
113
  gr.Slider(0, 0.5, label="silence at the start", value=0.5, step=0.05),
114
  ],