File size: 14,972 Bytes
6672974
8229af0
 
4e717cf
372f297
13cb3ce
302f20e
 
eff32bf
04c7187
 
 
 
 
 
 
 
1a9af2e
ac5f3b8
 
131bf04
2f9196a
024905c
 
866b10e
04c7187
302f20e
405f281
 
8a5a456
26987c4
 
405f281
f59e95f
302f20e
1cd414e
 
26987c4
1cd414e
 
f59e95f
bc435a1
302f20e
bc435a1
8a5a456
28471f6
1cd414e
 
 
 
 
 
 
920ece4
 
 
 
e9f68f0
13cb3ce
04c7187
 
 
 
 
 
 
 
 
 
1a9af2e
 
04c7187
 
1a9af2e
04c7187
1a9af2e
04c7187
302f20e
5c187de
5bccb70
 
 
 
 
 
3e709a9
 
656c364
5bccb70
 
 
 
 
19b5412
 
 
5bccb70
 
98eda10
5bccb70
3e709a9
19b5412
b8e6b50
8229af0
b8e6b50
 
 
 
 
ac5f3b8
302f20e
2f9196a
 
11d7771
19b5412
2f9196a
19b5412
 
 
5bccb70
 
 
19b5412
 
2f9196a
 
024905c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19b5412
 
024905c
 
 
19b5412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
024905c
 
2f9196a
 
5bccb70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866b10e
 
 
5bccb70
866b10e
 
 
 
 
 
 
 
 
 
5bccb70
 
 
 
 
 
 
 
 
 
302f20e
5bccb70
 
 
 
 
 
 
302f20e
 
8f75876
 
302f20e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656c364
8f75876
1a9af2e
ac5f3b8
8f75876
5bccb70
 
 
866b10e
 
 
 
 
 
 
 
 
5bccb70
 
302f20e
2f9196a
61f93a2
5bccb70
00c9e6e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
import os
# os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
# os.environ['CUDA_VISIBLE_DEVICES'] = '2'
# os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "caching_allocator"
import gradio as gr
import numpy as np
from models import make_inpainting
import utils

from transformers import MaskFormerImageProcessor, MaskFormerForInstanceSegmentation
from PIL import Image
import requests
from transformers import pipeline
import torch
import random
import io
import base64
import json
from diffusers import DiffusionPipeline
from diffusers import StableDiffusionLatentUpscalePipeline, StableDiffusionPipeline
from diffusers import StableDiffusionUpscalePipeline
from diffusers import LDMSuperResolutionPipeline
import cv2
import onnxruntime
from split_image import split

def removeFurniture(input_img1,
            input_img2,
            positive_prompt,
            negative_prompt,
            num_of_images,
            resolution
            ):

    print("removeFurniture")
    HEIGHT = resolution
    WIDTH = resolution

    input_img1 = input_img1.resize((resolution, resolution))
    input_img2 = input_img2.resize((resolution, resolution))

    canvas_mask = np.array(input_img2)
    mask = utils.get_mask(canvas_mask)

    print(input_img1, mask, positive_prompt, negative_prompt)

    retList=  make_inpainting(positive_prompt=positive_prompt,
                               image=input_img1,
                               mask_image=mask,
                               negative_prompt=negative_prompt,
                               num_of_images=num_of_images,
                               resolution=resolution
                               )
    # add the rest up to 10
    while (len(retList)<10):
        retList.append(None)

    return retList

def imageToString(img):

    output = io.BytesIO()
    img.save(output, format="png")
    return output.getvalue()

def segmentation(img):
    print("segmentation")

    # semantic_segmentation = pipeline("image-segmentation", "nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
    pipe = pipeline("image-segmentation", "facebook/maskformer-swin-large-ade")    
    results = pipe(img)
    for p in results:
        p['mask'] = utils.image_to_byte_array(p['mask'])
        p['mask'] = base64.b64encode(p['mask']).decode("utf-8")
    #print(results)
    return json.dumps(results)
    




def upscale1(image, prompt):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("upscale1", device, image, prompt)    
    
    # image.thumbnail((512, 512))
    # print("resize",image)

    torch.backends.cuda.matmul.allow_tf32 = True

    pipe = StableDiffusionUpscalePipeline.from_pretrained("stabilityai/stable-diffusion-x4-upscaler",                                                           
                                                          torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                                                          use_safetensors=True)
    # pipe = StableDiffusionLatentUpscalePipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16)
    pipe = pipe.to(device)
    pipe.enable_attention_slicing()
    pipe.enable_xformers_memory_efficient_attention()
    # pipe.enable_xformers_memory_efficient_attention(attention_op=xformers.ops.MemoryEfficientAttentionFlashAttentionOp)
    # Workaround for not accepting attention shape using VAE for Flash Attention
    pipe.vae.enable_xformers_memory_efficient_attention()

    ret = pipe(prompt=prompt, 
                   image=image,
                   num_inference_steps=10,
                   guidance_scale=0)
    print("ret",ret)
    upscaled_image = ret.images[0]
    print("up",upscaled_image)

    return upscaled_image

def upscale2(image, prompt):
    print("upscale2",image,prompt)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("device",device)

    pipe = LDMSuperResolutionPipeline.from_pretrained("CompVis/ldm-super-resolution-4x-openimages", torch_dtype=torch.float16)
    pipe = pipe.to(device)
    pipe.enable_attention_slicing()
    pipe.enable_xformers_memory_efficient_attention(attention_op=xformers.ops.MemoryEfficientAttentionFlashAttentionOp)
    # Workaround for not accepting attention shape using VAE for Flash Attention
    pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None)

    upscaled_image = pipe(image, num_inference_steps=10, eta=1).images[0]
    return upscaled_image

def convert_pil_to_cv2(image):
    # pil_image = image.convert("RGB")
    open_cv_image = np.array(image)
    # RGB to BGR
    open_cv_image = open_cv_image[:, :, ::-1].copy()
    return open_cv_image

def inference(model_path: str, img_array: np.array) -> np.array:
    options = onnxruntime.SessionOptions()
    options.intra_op_num_threads = 1
    options.inter_op_num_threads = 1
    ort_session = onnxruntime.InferenceSession(model_path, options)
    ort_inputs = {ort_session.get_inputs()[0].name: img_array}
    ort_outs = ort_session.run(None, ort_inputs)

    return ort_outs[0]

def post_process(img: np.array) -> np.array:
    # 1, C, H, W -> C, H, W
    img = np.squeeze(img)
    # C, H, W -> H, W, C
    img = np.transpose(img, (1, 2, 0))[:, :, ::-1].astype(np.uint8)
    return img

def pre_process(img: np.array) -> np.array:
    # H, W, C -> C, H, W
    img = np.transpose(img[:, :, 0:3], (2, 0, 1))
    # C, H, W -> 1, C, H, W
    img = np.expand_dims(img, axis=0).astype(np.float32)
    return img

def upscale3(image):
    print("upscale3",image)

    model_path = f"up_models/modelx4.ort"
    img = convert_pil_to_cv2(image)
    
    # if img.ndim == 2:
    #     print("upscale3","img.ndim == 2")
    #     img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

    # if img.shape[2] == 4:
    #     print("upscale3","img.shape[2] == 4")
    #     alpha = img[:, :, 3]  # GRAY
    #     alpha = cv2.cvtColor(alpha, cv2.COLOR_GRAY2BGR)  # BGR
    #     alpha_output = post_process(inference(model_path, pre_process(alpha)))  # BGR
    #     alpha_output = cv2.cvtColor(alpha_output, cv2.COLOR_BGR2GRAY)  # GRAY

    #     img = img[:, :, 0:3]  # BGR
    #     image_output = post_process(inference(model_path, pre_process(img)))  # BGR
    #     image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2BGRA)  # BGRA
    #     image_output[:, :, 3] = alpha_output

    # print("upscale3","img.shape[2] == 3")
    image_output = post_process(inference(model_path, pre_process(img)))  # BGR

    return image_output



def split_image(im, rows, cols, should_square, should_quiet=False):
    im_width, im_height = im.size
    row_width = int(im_width / cols)
    row_height = int(im_height / rows)
    name = "image"
    ext = ".png"
    name = os.path.basename(name)
    images = []
    if should_square:
        min_dimension = min(im_width, im_height)
        max_dimension = max(im_width, im_height)
        if not should_quiet:
            print("Resizing image to a square...")
            print("Determining background color...")
        bg_color = split.determine_bg_color(im)
        if not should_quiet:
            print("Background color is... " + str(bg_color))
        im_r = Image.new("RGBA" if ext == "png" else "RGB",
                         (max_dimension, max_dimension), bg_color)
        offset = int((max_dimension - min_dimension) / 2)
        if im_width > im_height:
            im_r.paste(im, (0, offset))
        else:
            im_r.paste(im, (offset, 0))
        im = im_r
        row_width = int(max_dimension / cols)
        row_height = int(max_dimension / rows)
    n = 0
    for i in range(0, rows):
        for j in range(0, cols):
            box = (j * row_width, i * row_height, j * row_width +
                   row_width, i * row_height + row_height)
            outp = im.crop(box)
            outp_path = name + "_" + str(n) + ext
            if not should_quiet:
                print("Exporting image tile: " + outp_path)
            images.append(outp)
            n += 1
    return [img for img in images]

def upscale_image(img, rows, cols, seed, prompt, negative_prompt, xformers, cpu_offload, attention_slicing, enable_custom_sliders=False, guidance=7, iterations=50):
    model_id = "stabilityai/stable-diffusion-x4-upscaler"
    try:
        pipeline = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
    except:
        pipeline = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16, local_files_only=True)
    pipeline = pipeline.to("cuda")
    if xformers:
        pipeline.enable_xformers_memory_efficient_attention()
    else:
        pipeline.disable_xformers_memory_efficient_attention()
    if cpu_offload:
        try:
            pipeline.enable_sequential_cpu_offload()
        except:
            pass
    if attention_slicing:
        pipeline.enable_attention_slicing()
    else:
        pipeline.disable_attention_slicing()
    img = Image.fromarray(img)
    # load model and scheduler
    if seed==-1:
        generator = torch.manual_seed(random.randint(0, 9999999))
    else:
        generator = torch.manual_seed(seed)
    
    original_width, original_height = img.size
    max_dimension = max(original_width, original_height)
    tiles = split_image(img, rows, cols, True, False)
    ups_tiles = []
    i = 0
    for x in tiles:
        i=i+1
        if enable_custom_sliders:
            ups_tile = pipeline(prompt=prompt,negative_prompt=negative_prompt,guidance_scale=guidance, num_inference_steps=iterations, image=x.convert("RGB"),generator=generator).images[0]
        else:
            ups_tile = pipeline(prompt=prompt,negative_prompt=negative_prompt, image=x.convert("RGB"),generator=generator).images[0]
        ups_tiles.append(ups_tile)
        
    # Determine the size of the merged upscaled image
    total_width = 0
    total_height = 0
    side = 0
    for ups_tile in ups_tiles:
        side = ups_tile.width
        break
    for x in tiles:
        tsize = x.width
        break

    ups_times = abs(side/tsize)
    new_size = (max_dimension * ups_times, max_dimension * ups_times)
    total_width = cols*side
    total_height = rows*side

    # Create a blank image with the calculated size
    merged_image = Image.new("RGB", (total_width, total_height))

    # Paste each upscaled tile into the blank image
    current_width = 0
    current_height = 0
    maximum_width = cols*side
    for ups_tile in ups_tiles:
        merged_image.paste(ups_tile, (current_width, current_height))
        current_width += ups_tile.width
        if current_width>=maximum_width:
            current_width = 0
            current_height = current_height+side

    # Using the center of the image as pivot, crop the image to the original dimension times four
    crop_left = (new_size[0] - original_width * ups_times) // 2
    crop_upper = (new_size[1] - original_height * ups_times) // 2
    crop_right = crop_left + original_width * ups_times
    crop_lower = crop_upper + original_height * ups_times
    final_img = merged_image.crop((crop_left, crop_upper, crop_right, crop_lower))

    # The resulting image should be identical to the original image in proportions / aspect ratio, with no loss of elements.
    # Save the merged image
    return final_img

    
def upscale( image, prompt, negative_prompt, rows, guidance, iterations, xformers_input, cpu_offload_input, attention_slicing_input):
    print("upscale", image, prompt, negative_prompt, rows, guidance, iterations, xformers_input, cpu_offload_input, attention_slicing_input)   
    # return upscale1(image, prompt)
    return upscale_image(image,
                         rows=rows,cols=rows,
                         seed=-1,
                         prompt=prompt,
                         guidance=guidance,
                         negative_prompt=negative_prompt,
                         xformers=xformers_input,
                         cpu_offload=cpu_offload_input,
                         attention_slicing=attention_slicing_input,
                         iterations=iterations)

modes = {
    '1': '1',
    'img2img': 'Image to Image',
    'inpaint': 'Inpainting',
    'upscale4x': 'Upscale 4x',
}



with gr.Blocks() as app:    
    gr.HTML(
        f"""         
            Running on <b>{"GPU 🔥" if torch.cuda.is_available() else "CPU 🥶"}</b>
          </div>
        """
    )

    with gr.Row():

        with gr.Column():
            gr.Button("FurnituRemove").click(removeFurniture, 
                                        inputs=[gr.Image(label="img", type="pil"),
                                                gr.Image(label="mask", type="pil"),
                                                gr.Textbox(label="positive_prompt",value="empty room"),
                                                gr.Textbox(label="negative_prompt",value=""),
                                                gr.Number(label="num_of_images",value=2),
                                                gr.Number(label="resolution",value=512)
                                                ], 
                                        outputs=[
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image(),
                                                gr.Image()])
        
        with gr.Column():  
            gr.Button("Segmentation").click(segmentation, inputs=gr.Image(type="pil"), outputs=gr.JSON())

        with gr.Column():
            gr.Button("Upscale").click(
                upscale, 
                inputs=[
                    gr.Image(label="Source Image to upscale"),
                    gr.Textbox(label="prompt",value="empty room"),
                    gr.Textbox(label="negative prompt",value="jpeg artifacts, lowres, bad quality, watermark, text"),
                    gr.Number(value=2, label="Tile grid dimension amount (number of rows and columns) - X by X "),
                    gr.Slider(2, 15, 7, step=1, label='Guidance Scale: How much the AI influences the Upscaling.'),
                    gr.Slider(10, 100, 10, step=1, label='Number of Iterations'),
                    gr.Checkbox(value=True,label="Enable Xformers memory efficient attention"),                    
                    gr.Checkbox(value=True,label="Enable sequential CPU offload"),
                    gr.Checkbox(value=True,label="Enable attention slicing")
                    ], 
                outputs=gr.Image())


# app.queue()
app.launch(debug=True,share=True, height=768)

# UP 1