Spaces:

fffiloni
/

MEMO

Running on L40S

App Files Files Community

fffiloni commited on Dec 9, 2024

Commit

b4acdb1

verified ·

1 Parent(s): b29d3b0

Update hf_gradio_app.py

Browse files

Files changed (1) hide show

hf_gradio_app.py +26 -4

hf_gradio_app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os, random, time
 from huggingface_hub import snapshot_download
 # Download models
@@ -70,7 +71,7 @@ with torch.inference_mode():
     pipeline.to(device=device, dtype=weight_dtype)
 @torch.inference_mode()
-def generate(input_video, input_audio, seed):
     resolution = 512
     num_generated_frames_per_clip = 16
     fps = 30
@@ -86,7 +87,7 @@ def generate(input_video, input_audio, seed):
     generator = torch.manual_seed(seed)
     img_size = (resolution, resolution)
     pixel_values, face_emb = preprocess_image(face_analysis_model="./checkpoints/misc/face_analysis", image_path=input_video, image_size=resolution)
     output_dir = "./outputs"
     os.makedirs(output_dir, exist_ok=True)
     cache_dir = os.path.join(output_dir, "audio_preprocess")
@@ -149,7 +150,9 @@ def generate(input_video, input_audio, seed):
     video_frames = video_frames.squeeze(0)
     video_frames = video_frames[:, :audio_length]
-    video_path = f"/content/memo-{seed}-tost.mp4"
     tensor_to_video(video_frames, video_path, input_audio, fps=fps)
     return video_path
@@ -158,7 +161,26 @@ import gradio as gr
 with gr.Blocks(analytics_enabled=False) as demo:
     with gr.Column():
-        gr.Markdown("# MEMO")
         with gr.Row():
             with gr.Column():

 import os, random, time
+import uuid
 from huggingface_hub import snapshot_download
 # Download models
     pipeline.to(device=device, dtype=weight_dtype)
 @torch.inference_mode()
+def generate(input_video, input_audio, seed, progress=gr.Progress(track_tqdm=True)):
     resolution = 512
     num_generated_frames_per_clip = 16
     fps = 30
     generator = torch.manual_seed(seed)
     img_size = (resolution, resolution)
     pixel_values, face_emb = preprocess_image(face_analysis_model="./checkpoints/misc/face_analysis", image_path=input_video, image_size=resolution)
     output_dir = "./outputs"
     os.makedirs(output_dir, exist_ok=True)
     cache_dir = os.path.join(output_dir, "audio_preprocess")
     video_frames = video_frames.squeeze(0)
     video_frames = video_frames[:, :audio_length]
+    # Save the output video
+    unique_id = str(uuid.uuid4())
+    video_path = os.path.join(output_dir, f"memo-{seed}_{unique_id}.png")
     tensor_to_video(video_frames, video_path, input_audio, fps=fps)
     return video_path
 with gr.Blocks(analytics_enabled=False) as demo:
     with gr.Column():
+        gr.Markdown("# MEMO: Memory-Guided Diffusion for Expressive Talking Video Generation")
+        gr.HTML("""
+        <div style="display:flex;column-gap:4px;">
+            <a href="https://github.com/memoavatar/memo">
+                <img src='https://img.shields.io/badge/GitHub-Repo-blue'>
+            </a>
+            <a href="https://memoavatar.github.io/">
+                <img src='https://img.shields.io/badge/Project-Page-green'>
+            </a>
+			<a href="https://arxiv.org/abs/2412.04448">
+                <img src='https://img.shields.io/badge/ArXiv-Paper-red'>
+            </a>
+            <a href="https://huggingface.co/spaces/fffiloni/MEMO?duplicate=true">
+				<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-sm.svg" alt="Duplicate this Space">
+			</a>
+			<a href="https://huggingface.co/fffiloni">
+				<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-sm-dark.svg" alt="Follow me on HF">
+			</a>
+        </div>
+        """)
         with gr.Row():
             with gr.Column():