VoiceClone

Running on Zero

File size: 6,153 Bytes

import os
import uuid
import GPUtil
import gradio as gr
import psutil
import spaces
from videosys import CogVideoXConfig, CogVideoXPABConfig, VideoSysEngine
from transformers import pipeline

os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), ".tmp_outputs")
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# 번역기 설정
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")

def translate_to_english(text):
    if any('\uAC00' <= char <= '\uD7A3' for char in text):
        return translator(text, max_length=512)[0]['translation_text']
    return text

def load_model(model_name, enable_video_sys=False, pab_threshold=[100, 850], pab_range=2):
    pab_config = CogVideoXPABConfig(spatial_threshold=pab_threshold, spatial_range=pab_range)
    config = CogVideoXConfig(model_name, enable_pab=enable_video_sys, pab_config=pab_config)
    engine = VideoSysEngine(config)
    return engine

def generate(engine, prompt, num_inference_steps=50, guidance_scale=6.0):
    translated_prompt = translate_to_english(prompt)
    video = engine.generate(translated_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).video[0]

    unique_filename = f"{uuid.uuid4().hex}.mp4"
    output_path = os.path.join("./.tmp_outputs", unique_filename)

    engine.save_video(video, output_path)
    return output_path

@spaces.GPU()
def generate_vanilla(model_name, prompt, num_inference_steps, guidance_scale, progress=gr.Progress(track_tqdm=True)):
    engine = load_model(model_name)
    video_path = generate(engine, prompt, num_inference_steps, guidance_scale)
    return video_path

@spaces.GPU()
def generate_vs(
    model_name,
    prompt,
    num_inference_steps,
    guidance_scale,
    threshold_start,
    threshold_end,
    gap,
    progress=gr.Progress(track_tqdm=True),
):
    threshold = [int(threshold_end), int(threshold_start)]
    gap = int(gap)
    engine = load_model(model_name, enable_video_sys=True, pab_threshold=threshold, pab_range=gap)
    video_path = generate(engine, prompt, num_inference_steps, guidance_scale)
    return video_path

def get_server_status():
    cpu_percent = psutil.cpu_percent()
    memory = psutil.virtual_memory()
    disk = psutil.disk_usage("/")
    try:
        gpus = GPUtil.getGPUs()
        if gpus:
            gpu = gpus[0]
            gpu_memory = f"{gpu.memoryUsed}/{gpu.memoryTotal}MB ({gpu.memoryUtil*100:.1f}%)"
        else:
            gpu_memory = "GPU를 찾을 수 없음"
    except:
        gpu_memory = "GPU 정보를 사용할 수 없음"

    return {
        "cpu": f"{cpu_percent}%",
        "memory": f"{memory.percent}%",
        "disk": f"{disk.percent}%",
        "gpu_memory": gpu_memory,
    }

def update_server_status():
    status = get_server_status()
    return (status["cpu"], status["memory"], status["disk"], status["gpu_memory"])

css = """
footer {
    visibility: hidden;
}
"""

with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="프롬프트 (200단어 이내)", value="바다 위의 일몰.", lines=3)

            with gr.Column():
                gr.Markdown("**생성 매개변수**<br>")
                with gr.Row():
                    model_name = gr.Radio(
                        ["THUDM/CogVideoX-2b", "THUDM/CogVideoX-5b"], label="모델 유형", value="THUDM/CogVideoX-2b"
                    )
                with gr.Row():
                    num_inference_steps = gr.Number(label="추론 단계", value=50)
                    guidance_scale = gr.Number(label="가이던스 스케일", value=6.0)
                with gr.Row():
                    pab_range = gr.Number(
                        label="PAB 브로드캐스트 범위", value=2, precision=0, info="브로드캐스트 타임스텝 범위."
                    )
                    pab_threshold_start = gr.Number(label="PAB 시작 타임스텝", value=850, info="1000 단계에서 시작.")
                    pab_threshold_end = gr.Number(label="PAB 종료 타임스텝", value=100, info="0 단계에서 종료.")
                with gr.Row():
                    generate_button_vs = gr.Button("⚡️ VideoSys로 비디오 생성 (더 빠름)")
                    generate_button = gr.Button("🎬 비디오 생성 (원본)")
                with gr.Column(elem_classes="server-status"):
                    gr.Markdown("#### 서버 상태")

                    with gr.Row():
                        cpu_status = gr.Textbox(label="CPU", scale=1)
                        memory_status = gr.Textbox(label="메모리", scale=1)

                    with gr.Row():
                        disk_status = gr.Textbox(label="디스크", scale=1)
                        gpu_status = gr.Textbox(label="GPU 메모리", scale=1)

                    with gr.Row():
                        refresh_button = gr.Button("새로고침")

        with gr.Column():
            with gr.Row():
                video_output_vs = gr.Video(label="VideoSys를 사용한 CogVideoX", width=720, height=480)
            with gr.Row():
                video_output = gr.Video(label="CogVideoX", width=720, height=480)

    generate_button.click(
        generate_vanilla,
        inputs=[model_name, prompt, num_inference_steps, guidance_scale],
        outputs=[video_output],
        concurrency_id="gen",
        concurrency_limit=1,
    )

    generate_button_vs.click(
        generate_vs,
        inputs=[
            model_name,
            prompt,
            num_inference_steps,
            guidance_scale,
            pab_threshold_start,
            pab_threshold_end,
            pab_range,
        ],
        outputs=[video_output_vs],
        concurrency_id="gen",
        concurrency_limit=1,
    )

    refresh_button.click(update_server_status, outputs=[cpu_status, memory_status, disk_status, gpu_status])
    demo.load(update_server_status, outputs=[cpu_status, memory_status, disk_status, gpu_status], every=1)

if __name__ == "__main__":
    demo.queue(max_size=10, default_concurrency_limit=1)
    demo.launch()