Update app.py
Browse files
app.py
CHANGED
@@ -580,42 +580,36 @@ combined_examples = [
|
|
580 |
|
581 |
def make_demo():
|
582 |
with gr.Blocks(analytics_enabled=False) as Interface:
|
583 |
-
# First row: Audio upload and Audio examples with adjusted ratio
|
584 |
gr.Markdown(
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
|
590 |
-
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
-
|
602 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
603 |
)
|
604 |
-
|
605 |
-
# gr.Markdown("""
|
606 |
-
# <h4 style="text-align: left;">
|
607 |
-
# This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
|
608 |
-
|
609 |
-
# Details of the low-quality mode:
|
610 |
-
# 1. Lower resolution.
|
611 |
-
# 2. More discontinuous frames (causing noticeable "frame jumps").
|
612 |
-
# 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
613 |
-
# 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
|
614 |
-
# 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
|
615 |
-
|
616 |
-
# Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
617 |
-
# </h4>
|
618 |
-
# """)
|
619 |
|
620 |
# Create a gallery with 5 videos
|
621 |
with gr.Row():
|
@@ -630,7 +624,15 @@ def make_demo():
|
|
630 |
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
631 |
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
632 |
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
633 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
634 |
|
635 |
with gr.Row():
|
636 |
with gr.Column(scale=4):
|
@@ -650,20 +652,20 @@ def make_demo():
|
|
650 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
651 |
gr.Markdown("""
|
652 |
<h4 style="text-align: left;">
|
653 |
-
|
654 |
<br>
|
655 |
-
|
656 |
<br>
|
657 |
-
|
658 |
<br>
|
659 |
-
|
660 |
<br>
|
661 |
-
|
662 |
<br>
|
663 |
-
|
664 |
<br>
|
665 |
<br>
|
666 |
-
|
667 |
</h4>
|
668 |
""")
|
669 |
|
@@ -701,17 +703,17 @@ def make_demo():
|
|
701 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
702 |
)
|
703 |
|
704 |
-
with gr.Row():
|
705 |
-
|
706 |
-
|
707 |
-
|
708 |
-
|
709 |
-
|
710 |
-
|
711 |
-
|
712 |
-
|
713 |
-
|
714 |
-
|
715 |
|
716 |
return Interface
|
717 |
|
|
|
580 |
|
581 |
def make_demo():
|
582 |
with gr.Blocks(analytics_enabled=False) as Interface:
|
|
|
583 |
gr.Markdown(
|
584 |
+
"""
|
585 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
586 |
+
<div>
|
587 |
+
<h1>TANGO</h1>
|
588 |
+
<span>Generating full-body talking videos from audio and reference video</span>
|
589 |
+
<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
|
590 |
+
<a href='https://h-liu1997.github.io/'>Haiyang Liu</a>, \
|
591 |
+
<a href='https://yangxingchao.github.io/'>Xingchao Yang</a>, \
|
592 |
+
<a href=''>Tomoya Akiyama</a>, \
|
593 |
+
<a href='https://sky24h.github.io/'> Yuantian Huang</a>, \
|
594 |
+
<a href=''>Qiaoge Li</a>, \
|
595 |
+
<a href='https://www.tut.ac.jp/english/university/faculty/cs/164.html'>Shigeru Kuriyama</a>, \
|
596 |
+
<a href='https://taketomitakafumi.sakura.ne.jp/web/en/'>Takafumi Taketomi</a>\
|
597 |
+
</h2>
|
598 |
+
<br>
|
599 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
600 |
+
<a href="https://arxiv.org/abs/2410.04221"><img src="https://img.shields.io/badge/arXiv-2410.04221-blue"></a>
|
601 |
+
|
602 |
+
<a href="https://pantomatrix.github.io/TANGO/"><img src="https://img.shields.io/badge/Project_Page-TANGO-orange" alt="Project Page"></a>
|
603 |
+
|
604 |
+
<a href="https://github.com/CyberAgentAILab/TANGO"><img src="https://img.shields.io/badge/Github-Code-green"></a>
|
605 |
+
|
606 |
+
<a href="https://github.com/CyberAgentAILab/TANGO"><img src="https://img.shields.io/github/stars/CyberAgentAILab/TANGO
|
607 |
+
"></a>
|
608 |
+
</div>
|
609 |
+
</div>
|
610 |
+
</div>
|
611 |
+
"""
|
612 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
613 |
|
614 |
# Create a gallery with 5 videos
|
615 |
with gr.Row():
|
|
|
624 |
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
625 |
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
626 |
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
627 |
+
|
628 |
+
with gr.Row():
|
629 |
+
gr.Markdown(
|
630 |
+
"""
|
631 |
+
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
632 |
+
This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
|
633 |
+
</div>
|
634 |
+
"""
|
635 |
+
)
|
636 |
|
637 |
with gr.Row():
|
638 |
with gr.Column(scale=4):
|
|
|
652 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
653 |
gr.Markdown("""
|
654 |
<h4 style="text-align: left;">
|
655 |
+
Details of the low-quality mode:
|
656 |
<br>
|
657 |
+
1. Lower resolution.
|
658 |
<br>
|
659 |
+
2. More discontinuous graph nodes (causing noticeable "frame jumps").
|
660 |
<br>
|
661 |
+
3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
662 |
<br>
|
663 |
+
4. only use first 8 seconds of your input audio.
|
664 |
<br>
|
665 |
+
5. custom character for a video up to 10 seconds.
|
666 |
<br>
|
667 |
<br>
|
668 |
+
Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
669 |
</h4>
|
670 |
""")
|
671 |
|
|
|
703 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
704 |
)
|
705 |
|
706 |
+
# with gr.Row():
|
707 |
+
# with gr.Column(scale=4):
|
708 |
+
# print(combined_examples)
|
709 |
+
# gr.Examples(
|
710 |
+
# examples=combined_examples,
|
711 |
+
# inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
|
712 |
+
# outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
|
713 |
+
# fn=tango, # Function that processes both audio and video inputs
|
714 |
+
# label="Select Combined Audio and Video Examples (Cached)",
|
715 |
+
# cache_examples=True
|
716 |
+
# )
|
717 |
|
718 |
return Interface
|
719 |
|