import os import time from pathlib import Path from loguru import logger from datetime import datetime import gradio as gr import random import spaces from hyvideo.utils.file_utils import save_videos_grid from hyvideo.config import parse_args from hyvideo.inference import HunyuanVideoSampler from hyvideo.constants import NEGATIVE_PROMPT from huggingface_hub import hf_hub_download hf_hub_download(repo_id="tencent/HunyuanVideo", filename="LICENSE", local_dir="tencent_HunyuanVideo") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="Notice", local_dir="tencent_HunyuanVideo") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="README.md", local_dir="tencent_HunyuanVideo") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="config.json", local_dir="tencent_HunyuanVideo") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states.pt", local_dir="tencent_HunyuanVideo/hunyuan-video-t2v-720p/transformers") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states_fp8.pt", local_dir="tencent_HunyuanVideo/hunyuan-video-t2v-720p/transformers") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="hunyuan-video-t2v-720p/transformers/mp_rank_00_model_states_fp8_map.pt", local_dir="tencent_HunyuanVideo/hunyuan-video-t2v-720p/transformers") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="hunyuan-video-t2v-720p/vae/config.json", local_dir="tencent_HunyuanVideo/hunyuan-video-t2v-720p/vae") hf_hub_download(repo_id="tencent/HunyuanVideo", filename="hunyuan-video-t2v-720p/vae/pytorch_model.pt", local_dir="tencent_HunyuanVideo/hunyuan-video-t2v-720p/vae") def initialize_model(model_path): args = parse_args() models_root_path = Path(model_path) if not models_root_path.exists(): raise ValueError(f"`models_root` not exists: {models_root_path}") hunyuan_video_sampler = HunyuanVideoSampler.from_pretrained(models_root_path, args=args) return hunyuan_video_sampler @spaces.GPU(duration=120) def generate_video( model, prompt, resolution, video_length, seed, num_inference_steps, guidance_scale, flow_shift, embedded_guidance_scale ): seed = None if seed == -1 else seed width, height = resolution.split("x") width, height = int(width), int(height) negative_prompt = "" # not applicable in the inference outputs = model.predict( prompt=prompt, height=height, width=width, video_length=video_length, seed=seed, negative_prompt=negative_prompt, infer_steps=num_inference_steps, guidance_scale=guidance_scale, num_videos_per_prompt=1, flow_shift=flow_shift, batch_size=1, embedded_guidance_scale=embedded_guidance_scale ) samples = outputs['samples'] sample = samples[0].unsqueeze(0) save_path = "./gradio_outputs" os.makedirs(save_path, exist_ok=True) time_flag = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d-%H:%M:%S") video_path = f"{save_path}/{time_flag}_seed{outputs['seeds'][0]}_{outputs['prompts'][0][:100].replace('/','')}.mp4" save_videos_grid(sample, video_path, fps=24) logger.info(f'Sample saved to: {video_path}') return video_path def create_demo(model_path): model = initialize_model(model_path) with gr.Blocks() as demo: if torch.cuda.device_count() == 0: with gr.Row(): gr.HTML("""

⚠️To use Hunyuan Video, duplicate this space and set a GPU with 80 GB VRAM. You can't use Hunyuan Video directly here because this space runs on a CPU, which is not enough for Hunyuan Video. Please provide feedback if you have issues.

""") gr.Markdown("# Hunyuan Video Generation") with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt", value="A cat walks on the grass, realistic style.") with gr.Row(): resolution = gr.Dropdown( choices=[ # 720p ("1280x720 (16:9, 720p)", "1280x720"), ("720x1280 (9:16, 720p)", "720x1280"), ("1104x832 (4:3, 720p)", "1104x832"), ("832x1104 (3:4, 720p)", "832x1104"), ("960x960 (1:1, 720p)", "960x960"), # 540p ("960x544 (16:9, 540p)", "960x544"), ("544x960 (9:16, 540p)", "544x960"), ("832x624 (4:3, 540p)", "832x624"), ("624x832 (3:4, 540p)", "624x832"), ("720x720 (1:1, 540p)", "720x720"), ], value="832x624", label="Resolution" ) video_length = gr.Dropdown( label="Video Length", choices=[ ("2s(65f)", 65), ("5s(129f)", 129), ], value=65, ) num_inference_steps = gr.Slider(1, 100, value=5, step=1, label="Number of Inference Steps") show_advanced = gr.Checkbox(label="Show Advanced Options", value=False) with gr.Row(visible=False) as advanced_row: with gr.Column(): seed = gr.Number(value=-1, label="Seed (-1 for random)") guidance_scale = gr.Slider(1.0, 20.0, value=1.0, step=0.5, label="Guidance Scale") flow_shift = gr.Slider(0.0, 10.0, value=7.0, step=0.1, label="Flow Shift") embedded_guidance_scale = gr.Slider(1.0, 20.0, value=6.0, step=0.5, label="Embedded Guidance Scale") show_advanced.change(fn=lambda x: gr.Row(visible=x), inputs=[show_advanced], outputs=[advanced_row]) generate_btn = gr.Button(value = "🚀 Generate Video", variant = "primary") with gr.Column(): output = gr.Video(label = "Generated Video", autoplay = True) generate_btn.click( fn=lambda *inputs: generate_video(model, *inputs), inputs=[ prompt, resolution, video_length, seed, num_inference_steps, guidance_scale, flow_shift, embedded_guidance_scale ], outputs=output ) return demo if __name__ == "__main__": os.environ["GRADIO_ANALYTICS_ENABLED"] = "False" demo = create_demo("tencent_HunyuanVideo") demo.queue(10).launch()