Spaces:

kadirnar
/

Video-Diffusion-WebUI

Runtime error

App Files Files Community

kadirnar commited on Mar 10, 2023

Commit

31f45e8

1 Parent(s): 54948a8

Upload 7 files

Browse files

Files changed (7) hide show

app.py +4 -4
inpaint_zoom/app/__init__.py +0 -0
inpaint_zoom/app/zoom_in_app.py +195 -0
inpaint_zoom/app/zoom_out_app.py +144 -0
inpaint_zoom/utils/__init__.py +0 -0
inpaint_zoom/utils/zoom_in_utils.py +69 -0
inpaint_zoom/utils/zoom_out_utils.py +45 -0

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from inpaint_zoom.zoom_out_app import stable_diffusion_text2img_app
 import gradio as gr
@@ -23,8 +23,8 @@ with app:
     with gr.Row():
         with gr.Column():
             with gr.Tab('Zoom Out'):
-                stable_diffusion_text2img_app()
             with gr.Tab('Zoom In'):
-                pass
 app.launch(debug=True)

+from inpaint_zoom.app.zoom_out_app import stable_diffusion_zoom_out_app
+from inpaint_zoom.app.zoom_in_app import stable_diffusion_zoom_in_app
 import gradio as gr
     with gr.Row():
         with gr.Column():
             with gr.Tab('Zoom Out'):
+                stable_diffusion_zoom_out_app()
             with gr.Tab('Zoom In'):
+                stable_diffusion_zoom_in_app()
 app.launch(debug=True)

inpaint_zoom/app/__init__.py ADDED Viewed

File without changes

inpaint_zoom/app/zoom_in_app.py ADDED Viewed

	@@ -0,0 +1,195 @@

+from inpaint_zoom.utils.zoom_in_utils import image_grid, shrink_and_paste_on_blank, dummy, write_video
+from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
+from PIL import Image
+import gradio as gr
+import numpy as np
+import torch
+import os
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+stable_paint_model_list = [
+  "stabilityai/stable-diffusion-2-inpainting",
+  "runwayml/stable-diffusion-inpainting"
+]
+stable_paint_prompt_list = [
+        "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
+        "A beautiful landscape of a mountain range with a lake in the foreground",
+]
+stable_paint_negative_prompt_list = [
+        "lurry, bad art, blurred, text, watermark",
+    ]
+def stable_diffusion_zoom_in(
+    model_id,
+    prompt,
+    negative_prompt,
+    guidance_scale,
+    num_inference_steps,
+    ):
+    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16")
+    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+    pipe = pipe.to("cuda")
+    pipe.safety_checker = dummy
+    pipe.enable_attention_slicing()
+    g_cuda = torch.Generator(device='cuda')
+    num_init_images = 2
+    seed = 9999
+    height = 512
+    width = height
+    current_image = Image.new(mode="RGBA", size=(height, width))
+    mask_image = np.array(current_image)[:,:,3]
+    mask_image = Image.fromarray(255-mask_image).convert("RGB")
+    current_image = current_image.convert("RGB")
+    init_images =  pipe(prompt=[prompt]*num_init_images,
+                        negative_prompt=[negative_prompt]*num_init_images,
+                        image=current_image,
+                        guidance_scale = guidance_scale,
+                        height = height,
+                        width = width,
+                        generator = g_cuda.manual_seed(seed),
+                        mask_image=mask_image,
+                        num_inference_steps=num_inference_steps)[0]
+    image_grid(init_images, rows=1, cols=num_init_images)
+    init_image_selected = 1 #@param
+    if num_init_images == 1:
+        init_image_selected = 0
+    else:
+        init_image_selected = init_image_selected - 1
+    num_outpainting_steps = 20 #@param
+    mask_width = 128 #@param
+    num_interpol_frames = 30 #@param
+    current_image = init_images[init_image_selected]
+    all_frames = []
+    all_frames.append(current_image)
+    for i in range(num_outpainting_steps):
+        print('Generating image: ' + str(i+1) + ' / ' + str(num_outpainting_steps))
+        prev_image_fix = current_image
+        prev_image = shrink_and_paste_on_blank(current_image, mask_width)
+        current_image = prev_image
+        #create mask (black image with white mask_width width edges)
+        mask_image = np.array(current_image)[:,:,3]
+        mask_image = Image.fromarray(255-mask_image).convert("RGB")
+        #inpainting step
+        current_image = current_image.convert("RGB")
+        images = pipe(prompt=prompt,
+                        negative_prompt=negative_prompt,
+                        image=current_image,
+                        guidance_scale = guidance_scale,
+                        height = height,
+                        width = width,
+                        #this can make the whole thing deterministic but the output less exciting
+                        #generator = g_cuda.manual_seed(seed),
+                        mask_image=mask_image,
+                        num_inference_steps=num_inference_steps)[0]
+        current_image = images[0]
+        current_image.paste(prev_image, mask=prev_image)
+        #interpolation steps bewteen 2 inpainted images (=sequential zoom and crop)
+        for j in range(num_interpol_frames - 1):
+            interpol_image = current_image
+            interpol_width = round(
+                (1- ( 1-2*mask_width/height )**( 1-(j+1)/num_interpol_frames ) )*height/2
+                )
+            interpol_image = interpol_image.crop((interpol_width,
+                                                interpol_width,
+                                                width - interpol_width,
+                                                height - interpol_width))
+            interpol_image = interpol_image.resize((height, width))
+            #paste the higher resolution previous image in the middle to avoid drop in quality caused by zooming
+            interpol_width2 = round(
+                ( 1 - (height-2*mask_width) / (height-2*interpol_width) ) / 2*height
+                )
+            prev_image_fix_crop = shrink_and_paste_on_blank(prev_image_fix, interpol_width2)
+            interpol_image.paste(prev_image_fix_crop, mask = prev_image_fix_crop)
+            all_frames.append(interpol_image)
+        all_frames.append(current_image)
+    video_file_name = "infinite_zoom_out"
+    fps = 30
+    save_path = video_file_name + ".mp4"
+    write_video(save_path, all_frames, fps)
+    return save_path
+def stable_diffusion_zoom_in_app():
+    with gr.Blocks():
+        with gr.Row():
+            with gr.Column():
+                text2image_in_model_path = gr.Dropdown(
+                    choices=stable_paint_model_list,
+                    value=stable_paint_model_list[0],
+                    label='Text-Image Model Id'
+                )
+                text2image_in_prompt = gr.Textbox(
+                    lines=1,
+                    value=stable_paint_prompt_list[0],
+                    label='Prompt'
+                )
+                text2image_in_negative_prompt = gr.Textbox(
+                    lines=1,
+                    value=stable_paint_negative_prompt_list[0],
+                    label='Negative Prompt'
+                )
+                with gr.Accordion("Advanced Options", open=False):
+                    text2image_in_guidance_scale = gr.Slider(
+                        minimum=0.1,
+                        maximum=15,
+                        step=0.1,
+                        value=7.5,
+                        label='Guidance Scale'
+                    )
+                    text2image_in_num_inference_step = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=50,
+                        label='Num Inference Step'
+                    )
+                text2image_in_predict = gr.Button(value='Generator')
+            with gr.Column():
+                output_image = gr.Video(label='Output')
+        text2image_in_predict.click(
+            fn=stable_diffusion_zoom_in,
+            inputs=[
+                text2image_in_model_path,
+                text2image_in_prompt,
+                text2image_in_negative_prompt,
+                text2image_in_guidance_scale,
+                text2image_in_num_inference_step,
+            ],
+            outputs=output_image
+        )

inpaint_zoom/app/zoom_out_app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
+from inpaint_zoom.utils.zoom_out_utils import preprocess_image, preprocess_mask_image, write_video, dummy
+from PIL import Image
+import gradio as gr
+import torch
+import os
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+stable_paint_model_list = [
+  "stabilityai/stable-diffusion-2-inpainting",
+  "runwayml/stable-diffusion-inpainting"
+]
+stable_paint_prompt_list = [
+        "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
+        "A beautiful landscape of a mountain range with a lake in the foreground",
+]
+stable_paint_negative_prompt_list = [
+        "lurry, bad art, blurred, text, watermark",
+    ]
+def stable_diffusion_zoom_out(
+  model_id,
+  original_prompt,
+  negative_prompt,
+  guidance_scale,
+  num_inference_steps,
+  step_size,
+  num_frames,
+    ):
+    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+    pipe.set_use_memory_efficient_attention_xformers(True)
+    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+    pipe = pipe.to("cuda")
+    pipe.safety_checker = dummy
+    new_image = Image.new(mode="RGBA", size=(512,512))
+    current_image, mask_image = preprocess_mask_image(new_image)
+    current_image = pipe(
+      prompt=[original_prompt],
+      negative_prompt=[negative_prompt],
+      image=current_image,
+      mask_image=mask_image,
+      num_inference_steps=num_inference_steps,
+      guidance_scale=guidance_scale
+    ).images[0]
+    all_frames = []
+    all_frames.append(current_image)
+    for i in range(num_frames):
+        prev_image = preprocess_image(current_image, step_size, 512)
+        current_image = prev_image
+        current_image, mask_image = preprocess_mask_image(current_image)
+        current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
+        current_image.paste(prev_image, mask=prev_image)
+        all_frames.append(current_image)
+    save_path = "output.mp4"
+    write_video(save_path, all_frames, fps=30)
+    return save_path
+def stable_diffusion_zoom_out_app():
+    with gr.Blocks():
+        with gr.Row():
+            with gr.Column():
+                text2image_out_model_path = gr.Dropdown(
+                    choices=stable_paint_model_list,
+                    value=stable_paint_model_list[0],
+                    label='Text-Image Model Id'
+                )
+                text2image_out_prompt = gr.Textbox(
+                    lines=1,
+                    value=stable_paint_prompt_list[0],
+                    label='Prompt'
+                )
+                text2image_out_negative_prompt = gr.Textbox(
+                    lines=1,
+                    value=stable_paint_negative_prompt_list[0],
+                    label='Negative Prompt'
+                )
+                with gr.Accordion("Advanced Options", open=False):
+                    text2image_out_guidance_scale = gr.Slider(
+                        minimum=0.1,
+                        maximum=15,
+                        step=0.1,
+                        value=7.5,
+                        label='Guidance Scale'
+                    )
+                    text2image_out_num_inference_step = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=50,
+                        label='Num Inference Step'
+                    )
+                    text2image_out_step_size = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=10,
+                        label='Step Size'
+                    )
+                    text2image_out_num_frames = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=10,
+                        label='Frames'
+                    )
+                text2image_out_predict = gr.Button(value='Generator')
+            with gr.Column():
+                output_image = gr.Video(label="Output Video")
+        text2image_out_predict.click(
+            fn=stable_diffusion_zoom_out,
+            inputs=[
+                text2image_out_model_path,
+                text2image_out_prompt,
+                text2image_out_negative_prompt,
+                text2image_out_guidance_scale,
+                text2image_out_num_inference_step,
+                text2image_out_step_size,
+                text2image_out_num_frames,
+            ],
+            outputs=output_image
+        )

inpaint_zoom/utils/__init__.py ADDED Viewed

File without changes

inpaint_zoom/utils/zoom_in_utils.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from PIL import Image
+import numpy as np
+import cv2
+import os
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+def write_video(file_path, frames, fps, reversed = True):
+  """
+  Writes frames to an mp4 video file
+  :param file_path: Path to output video, must end with .mp4
+  :param frames: List of PIL.Image objects
+  :param fps: Desired frame rate
+  :param reversed: if order of images to be reversed (default = True)
+  """
+  if reversed == True:
+    frames.reverse()
+  w, h = frames[0].size
+  fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+  #fourcc = cv2.VideoWriter_fourcc(*'avc1')
+  writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
+  for frame in frames:
+      np_frame = np.array(frame.convert('RGB'))
+      cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
+      writer.write(cv_frame)
+  writer.release()
+def image_grid(imgs, rows, cols):
+  assert len(imgs) == rows*cols
+  w, h = imgs[0].size
+  grid = Image.new('RGB', size=(cols*w, rows*h))
+  grid_w, grid_h = grid.size
+  for i, img in enumerate(imgs):
+      grid.paste(img, box=(i%cols*w, i//cols*h))
+  return grid
+def shrink_and_paste_on_blank(current_image, mask_width):
+  """
+  Decreases size of current_image by mask_width pixels from each side,
+  then adds a mask_width width transparent frame,
+  so that the image the function returns is the same size as the input.
+  :param current_image: input image to transform
+  :param mask_width: width in pixels to shrink from each side
+  """
+  height = current_image.height
+  width = current_image.width
+  #shrink down by mask_width
+  prev_image = current_image.resize((height-2*mask_width,width-2*mask_width))
+  prev_image = prev_image.convert("RGBA")
+  prev_image = np.array(prev_image)
+  #create blank non-transparent image
+  blank_image = np.array(current_image.convert("RGBA"))*0
+  blank_image[:,:,3] = 1
+  #paste shrinked onto blank
+  blank_image[mask_width:height-mask_width,mask_width:width-mask_width,:] = prev_image
+  prev_image = Image.fromarray(blank_image)
+  return prev_image
+def dummy(images, **kwargs):
+    return images, False

inpaint_zoom/utils/zoom_out_utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import numpy as np
+import cv2
+from PIL import Image
+def write_video(file_path, frames, fps):
+    """
+    Writes frames to an mp4 video file
+    :param file_path: Path to output video, must end with .mp4
+    :param frames: List of PIL.Image objects
+    :param fps: Desired frame rate
+    """
+    w, h = frames[0].size
+    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+    writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
+    for frame in frames:
+        np_frame = np.array(frame.convert('RGB'))
+        cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
+        writer.write(cv_frame)
+    writer.release()
+def dummy(images, **kwargs):
+    return images, False
+def preprocess_image(current_image, steps, image_size):
+    next_image = np.array(current_image.convert("RGBA"))*0
+    prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
+    prev_image = prev_image.convert("RGBA")
+    prev_image = np.array(prev_image)
+    next_image[:, :, 3] = 1
+    next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
+    prev_image = Image.fromarray(next_image)
+    return prev_image
+def preprocess_mask_image(current_image):
+    mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
+    mask_image = Image.fromarray(255-mask_image).convert("RGB")
+    current_image = current_image.convert("RGB")
+    return current_image, mask_image