Spaces:
Runtime error
Runtime error
File size: 2,422 Bytes
413d4d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os
from typing import Union
import torch
from huggingface_hub import snapshot_download, hf_hub_download
from PIL import Image
from videogen_hub import MODEL_PATH
class I2VGenXL:
def __init__(self):
"""
Initializes the I2VGenXL model using the ali-vilab/i2vgen-xl checkpoint from the Hugging Face Hub.
Args:
None
"""
from diffusers import I2VGenXLPipeline
model_path = os.path.join(MODEL_PATH, "i2vgen-xl")
model_path = snapshot_download("ali-vilab/i2vgen-xl", local_dir=model_path, ignore_patterns=["*fp16*", "*png"])
self.pipeline = I2VGenXLPipeline.from_pretrained(
model_path, torch_dtype=torch.float16, variant="fp16"
)
def infer_one_video(
self,
input_image: Image.Image,
prompt: str = None,
size: list = [320, 512],
seconds: int = 2,
fps: int = 8,
seed: int = 42,
):
"""
Generates a single video based on a textual prompt and first frame image, using either a provided image or an image path as the starting point. The output is a tensor representing the video.
Args:
input_image (Image.Image): The input image path or tensor to use as the basis for video generation.
prompt (str, optional): The text prompt that guides the video generation. If not specified, the video generation will rely solely on the input image. Defaults to None.
size (list, optional): Specifies the resolution of the output video as [height, width]. Defaults to [320, 512].
seconds (int, optional): The duration of the video in seconds. Defaults to 2.
fps (int, optional): The number of frames per second in the generated video. This determines how smooth the video appears. Defaults to 8.
seed (int, optional): A seed value for random number generation, ensuring reproducibility of the video generation process. Defaults to 42.
Returns:
torch.Tensor: A tensor representing the generated video, structured as (time, channel, height, width).
"""
return self.pipeline(
prompt=prompt,
image=input_image,
height=size[0],
width=size[1],
target_fps=fps,
num_frames=seconds * fps,
generator=torch.manual_seed(seed),
)
|