ldm3d-gaudi / app_main.py
estellea's picture
Update app_main.py
016532e verified
from optimum.habana.diffusers import GaudiDDIMScheduler, GaudiStableDiffusionLDM3DPipeline
import gradio as gr
import torch
from PIL import Image
import base64
from io import BytesIO
from tempfile import NamedTemporaryFile
from pathlib import Path
Path("tmp").mkdir(exist_ok=True)
device = "hpu"
print(f"Device is {device}")
model_name = "Intel/ldm3d-pano"
scheduler = GaudiDDIMScheduler.from_pretrained(model_name, subfolder="scheduler")
pipe = GaudiStableDiffusionLDM3DPipeline.from_pretrained( model_name,
scheduler=scheduler,
use_habana=True,
use_hpu_graphs=True,
gaudi_config="Habana/stable-diffusion")
pipe.to(device)
def get_iframe(rgb_path: str, depth_path: str, viewer_mode: str = "6DOF"):
# buffered = BytesIO()
# rgb.convert("RGB").save(buffered, format="JPEG")
# rgb_base64 = base64.b64encode(buffered.getvalue())
# buffered = BytesIO()
# depth.convert("RGB").save(buffered, format="JPEG")
# depth_base64 = base64.b64encode(buffered.getvalue())
# rgb_base64 = "data:image/jpeg;base64," + rgb_base64.decode("utf-8")
# depth_base64 = "data:image/jpeg;base64," + depth_base64.decode("utf-8")
rgb_base64 = f"/file={rgb_path}"
depth_base64 = f"/file={depth_path}"
if viewer_mode == "6DOF":
return f"""<iframe src="file=static/three6dof.html" width="100%" height="500px" data-rgb="{rgb_base64}" data-depth="{depth_base64}"></iframe>"""
else:
return f"""<iframe src="file=static/depthmap.html" width="100%" height="500px" data-rgb="{rgb_base64}" data-depth="{depth_base64}"></iframe>"""
def predict(
prompt: str,
negative_prompt: str,
guidance_scale: float = 5.0,
seed: int = 0,
randomize_seed: bool = True,
):
generator = torch.Generator() if randomize_seed else torch.manual_seed(seed)
output = pipe(
prompt,
width=1024,
height=512,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
generator=generator,
num_inference_steps=50,
) # type: ignore
rgb_image, depth_image = output.rgb[0], output.depth[0] # type: ignore
with NamedTemporaryFile(suffix=".png", delete=False, dir="tmp") as rgb_file:
rgb_image.save(rgb_file.name)
rgb_image = rgb_file.name
with NamedTemporaryFile(suffix=".png", delete=False, dir="tmp") as depth_file:
depth_image.save(depth_file.name)
depth_image = depth_file.name
iframe = get_iframe(rgb_image, depth_image)
return rgb_image, depth_image, generator.seed(), iframe
with gr.Blocks() as block:
gr.Markdown(
"""
## LDM3d Demo
[Model card](https://huggingface.co/Intel/ldm3d-pano)
[Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/ldm3d_diffusion)
For better results, specify "360 view of" or "panoramic view of" in the prompt
"""
)
with gr.Row():
with gr.Column(scale=1):
prompt = gr.Textbox(label="Prompt")
negative_prompt = gr.Textbox(label="Negative Prompt")
guidance_scale = gr.Slider(
label="Guidance Scale", minimum=0, maximum=10, step=0.1, value=5.0
)
randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
seed = gr.Slider(label="Seed", minimum=0,
maximum=2**64 - 1, step=1)
generated_seed = gr.Number(label="Generated Seed")
markdown = gr.Markdown(label="Output Box")
with gr.Row():
new_btn = gr.Button("New Image")
with gr.Column(scale=2):
html = gr.HTML(height='50%')
with gr.Row():
rgb = gr.Image(label="RGB Image", type="filepath")
depth = gr.Image(label="Depth Image", type="filepath")
gr.Examples(
examples=[
["360 view of a large bedroom", "", 7.0, 42, False]],
inputs=[prompt, negative_prompt, guidance_scale, seed, randomize_seed],
outputs=[rgb, depth, generated_seed, html],
fn=predict,
cache_examples=True)
new_btn.click(
fn=predict,
inputs=[prompt, negative_prompt, guidance_scale, seed, randomize_seed],
outputs=[rgb, depth, generated_seed, html],
)
block.launch()