sketch-to-3d

Running on Zero

App Files Files Community

linoyts HF staff commited on 23 days ago

Commit

7cb2038

verified ·

1 Parent(s): b7b00e2

add scribble controlnet

Browse files

Files changed (1) hide show

app.py +59 -3

app.py CHANGED Viewed

@@ -16,10 +16,41 @@ from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
@@ -31,7 +62,11 @@ def end_session(req: gr.Request):
     shutil.rmtree(user_dir)
-def preprocess_image(image: Image.Image) -> Image.Image:
     """
     Preprocess the input image.
@@ -41,6 +76,21 @@ def preprocess_image(image: Image.Image) -> Image.Image:
     Returns:
         Image.Image: The preprocessed image.
     """
     processed_image = pipeline.preprocess_image(image)
     return processed_image
@@ -268,7 +318,9 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         with gr.Column():
             with gr.Tabs() as input_tabs:
                 with gr.Tab(label="Single Image", id=0) as single_image_input_tab:
-                    image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=300)
                 with gr.Tab(label="Multiple Images", id=1) as multiimage_input_tab:
                     multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=300, columns=3)
                     gr.Markdown("""
@@ -352,7 +404,7 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
     image_prompt.upload(
         preprocess_image,
-        inputs=[image_prompt],
         outputs=[image_prompt],
     )
     multiimage_prompt.upload(
@@ -365,6 +417,10 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         get_seed,
         inputs=[randomize_seed, seed],
         outputs=[seed],
     ).then(
         image_to_3d,
         inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],

 from trellis.utils import render_utils, postprocessing_utils
+import os
+import random
+import torch
+import torchvision.transforms.functional as TF
+from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
+from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
+from controlnet_aux import PidiNetDetector, HEDdetector
+from diffusers.utils import load_image
+from huggingface_hub import HfApi
+from pathlib import Path
+from PIL import Image, ImageOps
+import torch
+import numpy as np
+import cv2
+import os
+import random
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 os.makedirs(TMP_DIR, exist_ok=True)
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "sd-community/sdxl-flash",
+    controlnet=controlnet,
+    vae=vae,
+    torch_dtype=torch.float16,
+    # scheduler=eulera_scheduler,
+)
+pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+pipe.to(device)
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shutil.rmtree(user_dir)
+def preprocess_image(image: Image.Image,
+                     prompt: str,
+                    num_steps: int = 25,
+                    guidance_scale: float = 5,
+                    controlnet_conditioning_scale: float = 1.0,) -> Image.Image:
     """
     Preprocess the input image.
     Returns:
         Image.Image: The preprocessed image.
     """
+    width, height  = image['composite'].size
+    ratio = np.sqrt(1024. * 1024. / (width * height))
+    new_width, new_height = int(width * ratio), int(height * ratio)
+    image = image['composite'].resize((new_width, new_height))
+    image = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        image=image,
+        num_inference_steps=num_steps,
+        controlnet_conditioning_scale=controlnet_conditioning_scale,
+        guidance_scale=guidance_scale,
+        width=new_width,
+        height=new_height,).images[0]
     processed_image = pipeline.preprocess_image(image)
     return processed_image
         with gr.Column():
             with gr.Tabs() as input_tabs:
                 with gr.Tab(label="Single Image", id=0) as single_image_input_tab:
+                    #image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=300)
+                    image_prompt = image = gr.ImageEditor(type="pil", image_mode="L", crop_size=(512, 512))
+                    prompt = gr.Textbox(label="Prompt")
                 with gr.Tab(label="Multiple Images", id=1) as multiimage_input_tab:
                     multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=300, columns=3)
                     gr.Markdown("""
     image_prompt.upload(
         preprocess_image,
+        inputs=[image_prompt, prompt],
         outputs=[image_prompt],
     )
     multiimage_prompt.upload(
         get_seed,
         inputs=[randomize_seed, seed],
         outputs=[seed],
+    ).then(
+        preprocess_image,
+        inputs=[image_prompt, prompt],
+        outputs=[image_prompt],
     ).then(
         image_to_3d,
         inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],