import os # os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # os.environ['CUDA_VISIBLE_DEVICES'] = '2' # os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "caching_allocator" import gradio as gr import numpy as np from models import make_inpainting import utils from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation from PIL import Image import requests from transformers import pipeline import torch import random import io import base64 import json from diffusers import DiffusionPipeline from diffusers import StableDiffusionLatentUpscalePipeline, StableDiffusionPipeline from diffusers import StableDiffusionUpscalePipeline from diffusers import LDMSuperResolutionPipeline import cv2 import onnxruntime from split_image import split def removeFurniture(input_img1, input_img2, positive_prompt, negative_prompt, num_of_images, resolution ): print("removeFurniture") HEIGHT = resolution WIDTH = resolution input_img1 = input_img1.resize((resolution, resolution)) input_img2 = input_img2.resize((resolution, resolution)) canvas_mask = np.array(input_img2) mask = utils.get_mask(canvas_mask) print(input_img1, mask, positive_prompt, negative_prompt) retList= make_inpainting(positive_prompt=positive_prompt, image=input_img1, mask_image=mask, negative_prompt=negative_prompt, num_of_images=num_of_images, resolution=resolution ) # add the rest up to 10 while (len(retList)<10): retList.append(None) return retList def imageToString(img): output = io.BytesIO() img.save(output, format="png") return output.getvalue() def segmentation(img): print("segmentation") # semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024") pipe = pipeline("image-segmentation", "facebook/maskformer-swin-large-ade") results = pipe(img) for p in results: p['mask'] = utils.image_to_byte_array(p['mask']) p['mask'] = base64.b64encode(p['mask']).decode("utf-8") #print(results) return json.dumps(results) def upscale1(image, prompt): device = "cuda" if torch.cuda.is_available() else "cpu" print("upscale1", device, image, prompt) # image.thumbnail((512, 512)) # print("resize",image) torch.backends.cuda.matmul.allow_tf32 = True pipe = StableDiffusionUpscalePipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, use_safetensors=True) # pipe = StableDiffusionLatentUpscalePipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16) pipe = pipe.to(device) pipe.enable_attention_slicing() pipe.enable_xformers_memory_efficient_attention() # pipe.enable_xformers_memory_efficient_attention(attention_op=xformers.ops.MemoryEfficientAttentionFlashAttentionOp) # Workaround for not accepting attention shape using VAE for Flash Attention pipe.vae.enable_xformers_memory_efficient_attention() ret = pipe(prompt=prompt, image=image, num_inference_steps=10, guidance_scale=0) print("ret",ret) upscaled_image = ret.images[0] print("up",upscaled_image) return upscaled_image def upscale2(image, prompt): print("upscale2",image,prompt) device = "cuda" if torch.cuda.is_available() else "cpu" print("device",device) pipe = LDMSuperResolutionPipeline.from_pretrained("CompVis/ldm-super-resolution-4x-openimages", torch_dtype=torch.float16) pipe = pipe.to(device) pipe.enable_attention_slicing() pipe.enable_xformers_memory_efficient_attention(attention_op=xformers.ops.MemoryEfficientAttentionFlashAttentionOp) # Workaround for not accepting attention shape using VAE for Flash Attention pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None) upscaled_image = pipe(image, num_inference_steps=10, eta=1).images[0] return upscaled_image def convert_pil_to_cv2(image): # pil_image = image.convert("RGB") open_cv_image = np.array(image) # RGB to BGR open_cv_image = open_cv_image[:, :, ::-1].copy() return open_cv_image def inference(model_path: str, img_array: np.array) -> np.array: options = onnxruntime.SessionOptions() options.intra_op_num_threads = 1 options.inter_op_num_threads = 1 ort_session = onnxruntime.InferenceSession(model_path, options) ort_inputs = {ort_session.get_inputs()[0].name: img_array} ort_outs = ort_session.run(None, ort_inputs) return ort_outs[0] def post_process(img: np.array) -> np.array: # 1, C, H, W -> C, H, W img = np.squeeze(img) # C, H, W -> H, W, C img = np.transpose(img, (1, 2, 0))[:, :, ::-1].astype(np.uint8) return img def pre_process(img: np.array) -> np.array: # H, W, C -> C, H, W img = np.transpose(img[:, :, 0:3], (2, 0, 1)) # C, H, W -> 1, C, H, W img = np.expand_dims(img, axis=0).astype(np.float32) return img def upscale3(image): print("upscale3",image) model_path = f"up_models/modelx4.ort" img = convert_pil_to_cv2(image) # if img.ndim == 2: # print("upscale3","img.ndim == 2") # img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # if img.shape[2] == 4: # print("upscale3","img.shape[2] == 4") # alpha = img[:, :, 3] # GRAY # alpha = cv2.cvtColor(alpha, cv2.COLOR_GRAY2BGR) # BGR # alpha_output = post_process(inference(model_path, pre_process(alpha))) # BGR # alpha_output = cv2.cvtColor(alpha_output, cv2.COLOR_BGR2GRAY) # GRAY # img = img[:, :, 0:3] # BGR # image_output = post_process(inference(model_path, pre_process(img))) # BGR # image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2BGRA) # BGRA # image_output[:, :, 3] = alpha_output # print("upscale3","img.shape[2] == 3") image_output = post_process(inference(model_path, pre_process(img))) # BGR return image_output def split_image(im, rows, cols, should_square, should_quiet=False): im_width, im_height = im.size row_width = int(im_width / cols) row_height = int(im_height / rows) name = "image" ext = ".png" name = os.path.basename(name) images = [] if should_square: min_dimension = min(im_width, im_height) max_dimension = max(im_width, im_height) if not should_quiet: print("Resizing image to a square...") print("Determining background color...") bg_color = split.determine_bg_color(im) if not should_quiet: print("Background color is... " + str(bg_color)) im_r = Image.new("RGBA" if ext == "png" else "RGB", (max_dimension, max_dimension), bg_color) offset = int((max_dimension - min_dimension) / 2) if im_width > im_height: im_r.paste(im, (0, offset)) else: im_r.paste(im, (offset, 0)) im = im_r row_width = int(max_dimension / cols) row_height = int(max_dimension / rows) n = 0 for i in range(0, rows): for j in range(0, cols): box = (j * row_width, i * row_height, j * row_width + row_width, i * row_height + row_height) outp = im.crop(box) outp_path = name + "_" + str(n) + ext if not should_quiet: print("Exporting image tile: " + outp_path) images.append(outp) n += 1 return [img for img in images] def upscale_image(img, rows, up_factor, cols, seed, prompt, negative_prompt, xformers, cpu_offload, attention_slicing, enable_custom_sliders=False, guidance=7, iterations=50): if up_factor==2: model_id = "stabilityai/sd-x2-latent-upscaler" try: pipeline = StableDiffusionLatentUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16) except: pipeline = StableDiffusionLatentUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16, local_files_only=True) if up_factor==4: model_id = "stabilityai/stable-diffusion-x4-upscaler" try: pipeline = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16) except: pipeline = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16, local_files_only=True) pipeline = pipeline.to("cuda") if xformers: pipeline.enable_xformers_memory_efficient_attention() else: pipeline.disable_xformers_memory_efficient_attention() if cpu_offload: try: pipeline.enable_sequential_cpu_offload() except: pass if attention_slicing: pipeline.enable_attention_slicing() else: pipeline.disable_attention_slicing() img = Image.fromarray(img) # load model and scheduler if seed==-1: generator = torch.manual_seed(random.randint(0, 9999999)) else: generator = torch.manual_seed(seed) original_width, original_height = img.size max_dimension = max(original_width, original_height) tiles = split_image(img, rows, cols, True, False) ups_tiles = [] i = 0 for x in tiles: i=i+1 if enable_custom_sliders: ups_tile = pipeline(prompt=prompt,negative_prompt=negative_prompt,guidance_scale=guidance, num_inference_steps=iterations, image=x.convert("RGB"),generator=generator).images[0] else: ups_tile = pipeline(prompt=prompt,negative_prompt=negative_prompt, image=x.convert("RGB"),generator=generator).images[0] ups_tiles.append(ups_tile) # Determine the size of the merged upscaled image total_width = 0 total_height = 0 side = 0 for ups_tile in ups_tiles: side = ups_tile.width break for x in tiles: tsize = x.width break ups_times = abs(side/tsize) new_size = (max_dimension * ups_times, max_dimension * ups_times) total_width = cols*side total_height = rows*side # Create a blank image with the calculated size merged_image = Image.new("RGB", (total_width, total_height)) # Paste each upscaled tile into the blank image current_width = 0 current_height = 0 maximum_width = cols*side for ups_tile in ups_tiles: merged_image.paste(ups_tile, (current_width, current_height)) current_width += ups_tile.width if current_width>=maximum_width: current_width = 0 current_height = current_height+side # Using the center of the image as pivot, crop the image to the original dimension times four crop_left = (new_size[0] - original_width * ups_times) // 2 crop_upper = (new_size[1] - original_height * ups_times) // 2 crop_right = crop_left + original_width * ups_times crop_lower = crop_upper + original_height * ups_times final_img = merged_image.crop((crop_left, crop_upper, crop_right, crop_lower)) # The resulting image should be identical to the original image in proportions / aspect ratio, with no loss of elements. # Save the merged image return final_img def upscale( image, prompt, negative_prompt, rows, up_factor, guidance, iterations, xformers_input, cpu_offload_input, attention_slicing_input): print("upscale", prompt, negative_prompt, rows, up_factor, guidance, iterations, xformers_input, cpu_offload_input, attention_slicing_input) return upscale_image(img=image, rows=rows,cols=rows, up_factor=up_factor, seed=-1, prompt=prompt, negative_prompt=negative_prompt, enable_custom_sliders=True, xformers=xformers_input, cpu_offload=cpu_offload_input, attention_slicing=attention_slicing_input, guidance=guidance, iterations=iterations) # modes = { # '1': '1', # 'img2img': 'Image to Image', # 'inpaint': 'Inpainting', # 'upscale4x': 'Upscale 4x', # } with gr.Blocks() as app: gr.HTML( f""" Running on {"GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"} """ ) with gr.Row(): with gr.Column(): gr.Button("FurnituRemove").click(removeFurniture, inputs=[gr.Image(label="img", type="pil"), gr.Image(label="mask", type="pil"), gr.Textbox(label="positive_prompt",value="empty room"), gr.Textbox(label="negative_prompt",value=""), gr.Number(label="num_of_images",value=2), gr.Number(label="resolution",value=512) ], outputs=[ gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image(), gr.Image()]) with gr.Column(): gr.Button("Segmentation").click(segmentation, inputs=gr.Image(type="pil"), outputs=gr.JSON()) with gr.Column(): gr.Button("Upscale").click( upscale, inputs=[ gr.Image(label="Source Image to upscale"), gr.Textbox(label="prompt",value="empty room"), gr.Textbox(label="negative prompt",value="jpeg artifacts, lowres, bad quality, watermark, text"), gr.Number(value=2, label="Tile grid dimension amount (number of rows and columns) - X by X "), gr.Slider(2, 4, 2, step=2, label='Upscale 2 or 4'), gr.Slider(2, 15, 7, step=1, label='Guidance Scale: How much the AI influences the Upscaling.'), gr.Slider(2, 100, 10, step=1, label='Number of Iterations'), gr.Checkbox(value=True,label="Enable Xformers memory efficient attention"), gr.Checkbox(value=True,label="Enable sequential CPU offload"), gr.Checkbox(value=True,label="Enable attention slicing") ], outputs=gr.Image()) # app.queue() app.launch() # UP 1