add MaskGCT voice cloning option
Browse files
webgui.py
CHANGED
@@ -212,6 +212,22 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
|
|
212 |
video_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
|
213 |
|
214 |
return final_output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
|
216 |
with gr.Blocks() as demo:
|
217 |
gr.Markdown('# EchoMimic')
|
@@ -228,6 +244,20 @@ with gr.Blocks() as demo:
|
|
228 |
with gr.Column():
|
229 |
uploaded_img = gr.Image(type="filepath", label="Reference Image")
|
230 |
uploaded_audio = gr.Audio(type="filepath", label="Input Audio")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
with gr.Accordion(label=advanced_settings_label, open=False):
|
232 |
with gr.Row():
|
233 |
width = gr.Slider(label="Width", minimum=128, maximum=1024, value=default_values["width"], interactive=available_property)
|
@@ -297,6 +327,14 @@ with gr.Blocks() as demo:
|
|
297 |
output_video= final_output_path
|
298 |
return final_output_path
|
299 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
300 |
generate_button.click(
|
301 |
generate_video,
|
302 |
inputs=[
|
|
|
212 |
video_clip.write_videofile(str(final_output_path), codec="libx264", audio_codec="aac")
|
213 |
|
214 |
return final_output_path
|
215 |
+
|
216 |
+
def get_maskGCT_TTS(prompt_audio_maskGCT, audio_to_clone):
|
217 |
+
try:
|
218 |
+
client = Client("amphion/maskgct")
|
219 |
+
except:
|
220 |
+
raise gr.Error(f"amphion/maskgct space's api might not be ready, please wait, or upload an audio instead.")
|
221 |
+
|
222 |
+
result = client.predict(
|
223 |
+
prompt_wav = handle_file(audio_to_clone),
|
224 |
+
target_text = prompt_audio_maskGCT,
|
225 |
+
target_len=-1,
|
226 |
+
n_timesteps=25,
|
227 |
+
api_name="/predict"
|
228 |
+
)
|
229 |
+
print(result)
|
230 |
+
return result, gr.update(value=result, visible=True)
|
231 |
|
232 |
with gr.Blocks() as demo:
|
233 |
gr.Markdown('# EchoMimic')
|
|
|
244 |
with gr.Column():
|
245 |
uploaded_img = gr.Image(type="filepath", label="Reference Image")
|
246 |
uploaded_audio = gr.Audio(type="filepath", label="Input Audio")
|
247 |
+
preprocess_audio_file = gr.File(visible=False)
|
248 |
+
with gr.Accordion(label="Voice cloning with MaskGCT", open=False):
|
249 |
+
prompt_audio_maskGCT = gr.Textbox(
|
250 |
+
label = "Text to synthetize",
|
251 |
+
lines = 2,
|
252 |
+
max_lines = 2,
|
253 |
+
elem_id = "text-synth-maskGCT"
|
254 |
+
)
|
255 |
+
audio_to_clone_maskGCT = gr.Audio(
|
256 |
+
label = "Voice to clone",
|
257 |
+
type = "filepath",
|
258 |
+
elem_id = "audio-clone-elm-maskGCT"
|
259 |
+
)
|
260 |
+
gen_maskGCT_voice_btn = gr.Button("Generate voice clone (optional)")
|
261 |
with gr.Accordion(label=advanced_settings_label, open=False):
|
262 |
with gr.Row():
|
263 |
width = gr.Slider(label="Width", minimum=128, maximum=1024, value=default_values["width"], interactive=available_property)
|
|
|
327 |
output_video= final_output_path
|
328 |
return final_output_path
|
329 |
|
330 |
+
gen_maskGCT_voice_btn.click(
|
331 |
+
fn = get_maskGCT_TTS,
|
332 |
+
inputs = [prompt_audio_maskGCT, audio_to_clone_maskGCT],
|
333 |
+
outputs = [voice, preprocess_audio_file],
|
334 |
+
queue = False,
|
335 |
+
show_api = False
|
336 |
+
)
|
337 |
+
|
338 |
generate_button.click(
|
339 |
generate_video,
|
340 |
inputs=[
|