Spaces:
Running
on
L40S
Running
on
L40S
Update hf_gradio_app.py
Browse files- hf_gradio_app.py +26 -4
hf_gradio_app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import os, random, time
|
|
|
2 |
from huggingface_hub import snapshot_download
|
3 |
|
4 |
# Download models
|
@@ -70,7 +71,7 @@ with torch.inference_mode():
|
|
70 |
pipeline.to(device=device, dtype=weight_dtype)
|
71 |
|
72 |
@torch.inference_mode()
|
73 |
-
def generate(input_video, input_audio, seed):
|
74 |
resolution = 512
|
75 |
num_generated_frames_per_clip = 16
|
76 |
fps = 30
|
@@ -86,7 +87,7 @@ def generate(input_video, input_audio, seed):
|
|
86 |
generator = torch.manual_seed(seed)
|
87 |
img_size = (resolution, resolution)
|
88 |
pixel_values, face_emb = preprocess_image(face_analysis_model="./checkpoints/misc/face_analysis", image_path=input_video, image_size=resolution)
|
89 |
-
|
90 |
output_dir = "./outputs"
|
91 |
os.makedirs(output_dir, exist_ok=True)
|
92 |
cache_dir = os.path.join(output_dir, "audio_preprocess")
|
@@ -149,7 +150,9 @@ def generate(input_video, input_audio, seed):
|
|
149 |
video_frames = video_frames.squeeze(0)
|
150 |
video_frames = video_frames[:, :audio_length]
|
151 |
|
152 |
-
|
|
|
|
|
153 |
tensor_to_video(video_frames, video_path, input_audio, fps=fps)
|
154 |
|
155 |
return video_path
|
@@ -158,7 +161,26 @@ import gradio as gr
|
|
158 |
|
159 |
with gr.Blocks(analytics_enabled=False) as demo:
|
160 |
with gr.Column():
|
161 |
-
gr.Markdown("# MEMO")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
with gr.Row():
|
164 |
with gr.Column():
|
|
|
1 |
import os, random, time
|
2 |
+
import uuid
|
3 |
from huggingface_hub import snapshot_download
|
4 |
|
5 |
# Download models
|
|
|
71 |
pipeline.to(device=device, dtype=weight_dtype)
|
72 |
|
73 |
@torch.inference_mode()
|
74 |
+
def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=True)):
|
75 |
resolution = 512
|
76 |
num_generated_frames_per_clip = 16
|
77 |
fps = 30
|
|
|
87 |
generator = torch.manual_seed(seed)
|
88 |
img_size = (resolution, resolution)
|
89 |
pixel_values, face_emb = preprocess_image(face_analysis_model="./checkpoints/misc/face_analysis", image_path=input_video, image_size=resolution)
|
90 |
+
|
91 |
output_dir = "./outputs"
|
92 |
os.makedirs(output_dir, exist_ok=True)
|
93 |
cache_dir = os.path.join(output_dir, "audio_preprocess")
|
|
|
150 |
video_frames = video_frames.squeeze(0)
|
151 |
video_frames = video_frames[:, :audio_length]
|
152 |
|
153 |
+
# Save the output video
|
154 |
+
unique_id = str(uuid.uuid4())
|
155 |
+
video_path = os.path.join(output_dir, f"memo-{seed}_{unique_id}.png")
|
156 |
tensor_to_video(video_frames, video_path, input_audio, fps=fps)
|
157 |
|
158 |
return video_path
|
|
|
161 |
|
162 |
with gr.Blocks(analytics_enabled=False) as demo:
|
163 |
with gr.Column():
|
164 |
+
gr.Markdown("# MEMO: Memory-Guided Diffusion for Expressive Talking Video Generation")
|
165 |
+
gr.HTML("""
|
166 |
+
<div style="display:flex;column-gap:4px;">
|
167 |
+
<a href="https://github.com/memoavatar/memo">
|
168 |
+
<img src='https://img.shields.io/badge/GitHub-Repo-blue'>
|
169 |
+
</a>
|
170 |
+
<a href="https://memoavatar.github.io/">
|
171 |
+
<img src='https://img.shields.io/badge/Project-Page-green'>
|
172 |
+
</a>
|
173 |
+
<a href="https://arxiv.org/abs/2412.04448">
|
174 |
+
<img src='https://img.shields.io/badge/ArXiv-Paper-red'>
|
175 |
+
</a>
|
176 |
+
<a href="https://huggingface.co/spaces/fffiloni/MEMO?duplicate=true">
|
177 |
+
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-sm.svg" alt="Duplicate this Space">
|
178 |
+
</a>
|
179 |
+
<a href="https://huggingface.co/fffiloni">
|
180 |
+
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-sm-dark.svg" alt="Follow me on HF">
|
181 |
+
</a>
|
182 |
+
</div>
|
183 |
+
""")
|
184 |
|
185 |
with gr.Row():
|
186 |
with gr.Column():
|