ginipick commited on
Commit
93f1d2a
1 Parent(s): 4980936

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -940
app.py CHANGED
@@ -1,940 +0,0 @@
1
- import tempfile
2
- import time
3
- from collections.abc import Sequence
4
- from typing import Any, cast
5
- import os
6
- from huggingface_hub import login, hf_hub_download
7
-
8
- import gradio as gr
9
- import numpy as np
10
- import pillow_heif
11
- import spaces
12
- import torch
13
- from gradio_image_annotation import image_annotator
14
- from gradio_imageslider import ImageSlider
15
- from PIL import Image
16
- from pymatting.foreground.estimate_foreground_ml import estimate_foreground_ml
17
- from refiners.fluxion.utils import no_grad
18
- from refiners.solutions import BoxSegmenter
19
- from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
20
- from diffusers import FluxPipeline
21
- from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
22
- import gc
23
-
24
- from PIL import Image, ImageDraw, ImageFont
25
- from PIL import Image
26
- from gradio_client import Client, handle_file
27
- import uuid
28
-
29
-
30
- def clear_memory():
31
- """메모리 정리 함수"""
32
- gc.collect()
33
- try:
34
- if torch.cuda.is_available():
35
- with torch.cuda.device(0): # 명시적으로 device 0 사용
36
- torch.cuda.empty_cache()
37
- except:
38
- pass
39
-
40
- # GPU 설정
41
- device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 명시적으로 cuda:0 지정
42
-
43
- # GPU 설정을 try-except로 감싸기
44
- if torch.cuda.is_available():
45
- try:
46
- with torch.cuda.device(0):
47
- torch.cuda.empty_cache()
48
- torch.backends.cudnn.benchmark = True
49
- torch.backends.cuda.matmul.allow_tf32 = True
50
- except:
51
- print("Warning: Could not configure CUDA settings")
52
-
53
- # 번역 모델 초기화
54
- model_name = "Helsinki-NLP/opus-mt-ko-en"
55
- tokenizer = AutoTokenizer.from_pretrained(model_name)
56
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to('cpu')
57
- translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)
58
-
59
- def translate_to_english(text: str) -> str:
60
- """한글 텍스트를 영어로 번역"""
61
- try:
62
- if any(ord('가') <= ord(char) <= ord('힣') for char in text):
63
- translated = translator(text, max_length=128)[0]['translation_text']
64
- print(f"Translated '{text}' to '{translated}'")
65
- return translated
66
- return text
67
- except Exception as e:
68
- print(f"Translation error: {str(e)}")
69
- return text
70
-
71
- BoundingBox = tuple[int, int, int, int]
72
-
73
- pillow_heif.register_heif_opener()
74
- pillow_heif.register_avif_opener()
75
-
76
- # HF 토큰 설정
77
- HF_TOKEN = os.getenv("HF_TOKEN")
78
- if HF_TOKEN is None:
79
- raise ValueError("Please set the HF_TOKEN environment variable")
80
-
81
- try:
82
- login(token=HF_TOKEN)
83
- except Exception as e:
84
- raise ValueError(f"Failed to login to Hugging Face: {str(e)}")
85
-
86
- # 모델 초기화
87
- segmenter = BoxSegmenter(device="cpu")
88
- segmenter.device = device
89
- segmenter.model = segmenter.model.to(device=segmenter.device)
90
-
91
- gd_model_path = "IDEA-Research/grounding-dino-base"
92
- gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
93
- gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_dtype=torch.float32)
94
- gd_model = gd_model.to(device=device)
95
- assert isinstance(gd_model, GroundingDinoForObjectDetection)
96
-
97
- # FLUX 파이프라인 초기화
98
- pipe = FluxPipeline.from_pretrained(
99
- "black-forest-labs/FLUX.1-dev",
100
- torch_dtype=torch.float16,
101
- use_auth_token=HF_TOKEN
102
- )
103
- pipe.enable_attention_slicing(slice_size="auto")
104
-
105
- # LoRA 가중치 로드
106
- pipe.load_lora_weights(
107
- hf_hub_download(
108
- "ByteDance/Hyper-SD",
109
- "Hyper-FLUX.1-dev-8steps-lora.safetensors",
110
- use_auth_token=HF_TOKEN
111
- )
112
- )
113
- pipe.fuse_lora(lora_scale=0.125)
114
-
115
- # GPU 설정을 try-except로 감싸기
116
- try:
117
- if torch.cuda.is_available():
118
- pipe = pipe.to("cuda:0") # 명시적으로 cuda:0 지정
119
- except Exception as e:
120
- print(f"Warning: Could not move pipeline to CUDA: {str(e)}")
121
-
122
- client = Client("NabeelShar/BiRefNet_for_text_writing")
123
-
124
- class timer:
125
- def __init__(self, method_name="timed process"):
126
- self.method = method_name
127
- def __enter__(self):
128
- self.start = time.time()
129
- print(f"{self.method} starts")
130
- def __exit__(self, exc_type, exc_val, exc_tb):
131
- end = time.time()
132
- print(f"{self.method} took {str(round(end - self.start, 2))}s")
133
-
134
- def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
135
- if not bboxes:
136
- return None
137
- for bbox in bboxes:
138
- assert len(bbox) == 4
139
- assert all(isinstance(x, int) for x in bbox)
140
- return (
141
- min(bbox[0] for bbox in bboxes),
142
- min(bbox[1] for bbox in bboxes),
143
- max(bbox[2] for bbox in bboxes),
144
- max(bbox[3] for bbox in bboxes),
145
- )
146
-
147
- def corners_to_pixels_format(bboxes: torch.Tensor, width: int, height: int) -> torch.Tensor:
148
- x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
149
- return torch.stack((x1.clamp_(0, width), y1.clamp_(0, height), x2.clamp_(0, width), y2.clamp_(0, height)), dim=-1)
150
-
151
- def gd_detect(img: Image.Image, prompt: str) -> BoundingBox | None:
152
- inputs = gd_processor(images=img, text=f"{prompt}.", return_tensors="pt").to(device=device)
153
- with no_grad():
154
- outputs = gd_model(**inputs)
155
- width, height = img.size
156
- results: dict[str, Any] = gd_processor.post_process_grounded_object_detection(
157
- outputs,
158
- inputs["input_ids"],
159
- target_sizes=[(height, width)],
160
- )[0]
161
- assert "boxes" in results and isinstance(results["boxes"], torch.Tensor)
162
- bboxes = corners_to_pixels_format(results["boxes"].cpu(), width, height)
163
- return bbox_union(bboxes.numpy().tolist())
164
-
165
- def apply_mask(img: Image.Image, mask_img: Image.Image, defringe: bool = True) -> Image.Image:
166
- assert img.size == mask_img.size
167
- img = img.convert("RGB")
168
- mask_img = mask_img.convert("L")
169
- if defringe:
170
- rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
171
- foreground = cast(np.ndarray[Any, np.dtype[np.uint8]], estimate_foreground_ml(rgb, alpha))
172
- img = Image.fromarray((foreground * 255).astype("uint8"))
173
- result = Image.new("RGBA", img.size)
174
- result.paste(img, (0, 0), mask_img)
175
- return result
176
-
177
-
178
- def adjust_size_to_multiple_of_8(width: int, height: int) -> tuple[int, int]:
179
- """이미지 크기를 8의 배수로 조정하는 함수"""
180
- new_width = ((width + 7) // 8) * 8
181
- new_height = ((height + 7) // 8) * 8
182
- return new_width, new_height
183
-
184
- def calculate_dimensions(aspect_ratio: str, base_size: int = 512) -> tuple[int, int]:
185
- """선택된 비율에 따라 이미지 크기 계산"""
186
- if aspect_ratio == "1:1":
187
- return base_size, base_size
188
- elif aspect_ratio == "16:9":
189
- return base_size * 16 // 9, base_size
190
- elif aspect_ratio == "9:16":
191
- return base_size, base_size * 16 // 9
192
- elif aspect_ratio == "4:3":
193
- return base_size * 4 // 3, base_size
194
- return base_size, base_size
195
-
196
- @spaces.GPU(duration=20) # 40초에서 20초로 감소
197
- def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
198
- try:
199
- width, height = calculate_dimensions(aspect_ratio)
200
- width, height = adjust_size_to_multiple_of_8(width, height)
201
-
202
- max_size = 768
203
- if width > max_size or height > max_size:
204
- ratio = max_size / max(width, height)
205
- width = int(width * ratio)
206
- height = int(height * ratio)
207
- width, height = adjust_size_to_multiple_of_8(width, height)
208
-
209
- with timer("Background generation"):
210
- try:
211
- with torch.inference_mode():
212
- image = pipe(
213
- prompt=prompt,
214
- width=width,
215
- height=height,
216
- num_inference_steps=8,
217
- guidance_scale=4.0
218
- ).images[0]
219
- except Exception as e:
220
- print(f"Pipeline error: {str(e)}")
221
- return Image.new('RGB', (width, height), 'white')
222
-
223
- return image
224
- except Exception as e:
225
- print(f"Background generation error: {str(e)}")
226
- return Image.new('RGB', (512, 512), 'white')
227
-
228
- def create_position_grid():
229
- return """
230
- <div class="position-grid" style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; width: 150px; margin: auto;">
231
- <button class="position-btn" data-pos="top-left">↖</button>
232
- <button class="position-btn" data-pos="top-center">↑</button>
233
- <button class="position-btn" data-pos="top-right">↗</button>
234
- <button class="position-btn" data-pos="middle-left">←</button>
235
- <button class="position-btn" data-pos="middle-center">•</button>
236
- <button class="position-btn" data-pos="middle-right">→</button>
237
- <button class="position-btn" data-pos="bottom-left">↙</button>
238
- <button class="position-btn" data-pos="bottom-center" data-default="true">↓</button>
239
- <button class="position-btn" data-pos="bottom-right">↘</button>
240
- </div>
241
- """
242
-
243
- def calculate_object_position(position: str, bg_size: tuple[int, int], obj_size: tuple[int, int]) -> tuple[int, int]:
244
- """오브젝트의 위치 계산"""
245
- bg_width, bg_height = bg_size
246
- obj_width, obj_height = obj_size
247
-
248
- positions = {
249
- "top-left": (0, 0),
250
- "top-center": ((bg_width - obj_width) // 2, 0),
251
- "top-right": (bg_width - obj_width, 0),
252
- "middle-left": (0, (bg_height - obj_height) // 2),
253
- "middle-center": ((bg_width - obj_width) // 2, (bg_height - obj_height) // 2),
254
- "middle-right": (bg_width - obj_width, (bg_height - obj_height) // 2),
255
- "bottom-left": (0, bg_height - obj_height),
256
- "bottom-center": ((bg_width - obj_width) // 2, bg_height - obj_height),
257
- "bottom-right": (bg_width - obj_width, bg_height - obj_height)
258
- }
259
-
260
- return positions.get(position, positions["bottom-center"])
261
-
262
- def resize_object(image: Image.Image, scale_percent: float) -> Image.Image:
263
- """오브젝트 크기 조정"""
264
- width = int(image.width * scale_percent / 100)
265
- height = int(image.height * scale_percent / 100)
266
- return image.resize((width, height), Image.Resampling.LANCZOS)
267
-
268
- def combine_with_background(foreground: Image.Image, background: Image.Image,
269
- position: str = "bottom-center", scale_percent: float = 100) -> Image.Image:
270
- """전경과 배경 합성 함수"""
271
- print(f"Combining with position: {position}, scale: {scale_percent}")
272
-
273
- result = background.convert('RGBA')
274
- scaled_foreground = resize_object(foreground, scale_percent)
275
-
276
- x, y = calculate_object_position(position, result.size, scaled_foreground.size)
277
- print(f"Calculated position coordinates: ({x}, {y})")
278
-
279
- result.paste(scaled_foreground, (x, y), scaled_foreground)
280
- return result
281
-
282
- @spaces.GPU(duration=30) # 120초에서 30초로 감소
283
- def _gpu_process(img: Image.Image, prompt: str | BoundingBox | None) -> tuple[Image.Image, BoundingBox | None, list[str]]:
284
- time_log: list[str] = []
285
- try:
286
- if isinstance(prompt, str):
287
- t0 = time.time()
288
- bbox = gd_detect(img, prompt)
289
- time_log.append(f"detect: {time.time() - t0}")
290
- if not bbox:
291
- print(time_log[0])
292
- raise gr.Error("No object detected")
293
- else:
294
- bbox = prompt
295
- t0 = time.time()
296
- mask = segmenter(img, bbox)
297
- time_log.append(f"segment: {time.time() - t0}")
298
- return mask, bbox, time_log
299
- except Exception as e:
300
- print(f"GPU process error: {str(e)}")
301
- raise
302
-
303
- def _process(img: Image.Image, prompt: str | BoundingBox | None, bg_prompt: str | None = None, aspect_ratio: str = "1:1") -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
304
- try:
305
- # 입력 이미지 크기 제한
306
- max_size = 1024
307
- if img.width > max_size or img.height > max_size:
308
- ratio = max_size / max(img.width, img.height)
309
- new_size = (int(img.width * ratio), int(img.height * ratio))
310
- img = img.resize(new_size, Image.LANCZOS)
311
-
312
- # CUDA 메모리 관리 수정
313
- try:
314
- if torch.cuda.is_available():
315
- current_device = torch.cuda.current_device()
316
- with torch.cuda.device(current_device):
317
- torch.cuda.empty_cache()
318
- except Exception as e:
319
- print(f"CUDA memory management failed: {e}")
320
-
321
- with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
322
- mask, bbox, time_log = _gpu_process(img, prompt)
323
- masked_alpha = apply_mask(img, mask, defringe=True)
324
-
325
- if bg_prompt:
326
- background = generate_background(bg_prompt, aspect_ratio)
327
- combined = background
328
- else:
329
- combined = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)
330
-
331
- clear_memory()
332
-
333
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp:
334
- combined.save(temp.name)
335
- return (img, combined, masked_alpha), gr.DownloadButton(value=temp.name, interactive=True)
336
- except Exception as e:
337
- clear_memory()
338
- print(f"Processing error: {str(e)}")
339
- raise gr.Error(f"Processing failed: {str(e)}")
340
-
341
- def on_change_bbox(prompts: dict[str, Any] | None):
342
- return gr.update(interactive=prompts is not None)
343
-
344
-
345
- def on_change_prompt(img: Image.Image | None, prompt: str | None, bg_prompt: str | None = None):
346
- return gr.update(interactive=bool(img and prompt))
347
-
348
-
349
- def process_prompt(img: Image.Image, prompt: str, bg_prompt: str | None = None,
350
- aspect_ratio: str = "1:1", position: str = "bottom-center",
351
- scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
352
- try:
353
- if img is None or prompt.strip() == "":
354
- raise gr.Error("Please provide both image and prompt")
355
-
356
- print(f"Processing with position: {position}, scale: {scale_percent}") # 디버깅용
357
-
358
- try:
359
- prompt = translate_to_english(prompt)
360
- if bg_prompt:
361
- bg_prompt = translate_to_english(bg_prompt)
362
- except Exception as e:
363
- print(f"Translation error (continuing with original text): {str(e)}")
364
-
365
- results, _ = _process(img, prompt, bg_prompt, aspect_ratio)
366
-
367
- if bg_prompt:
368
- try:
369
- print(f"Using position: {position}") # 디버깅용
370
- # 위치 값 검증
371
- valid_positions = ["top-left", "top-center", "top-right",
372
- "middle-left", "middle-center", "middle-right",
373
- "bottom-left", "bottom-center", "bottom-right"]
374
- if position not in valid_positions:
375
- position = "bottom-center"
376
- print(f"Invalid position, using default: {position}")
377
-
378
- combined = combine_with_background(
379
- foreground=results[2],
380
- background=results[1],
381
- position=position,
382
- scale_percent=scale_percent
383
- )
384
- return combined, results[2]
385
- except Exception as e:
386
- print(f"Combination error: {str(e)}")
387
- return results[1], results[2]
388
-
389
- return results[1], results[2] # 기본 반환 추가
390
- except Exception as e:
391
- print(f"Error in process_prompt: {str(e)}")
392
- raise gr.Error(str(e))
393
- finally:
394
- clear_memory()
395
-
396
-
397
- def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
398
- try:
399
- if img is None or box_input.strip() == "":
400
- raise gr.Error("Please provide both image and bounding box coordinates")
401
-
402
- try:
403
- coords = eval(box_input)
404
- if not isinstance(coords, list) or len(coords) != 4:
405
- raise ValueError("Invalid box format")
406
- bbox = tuple(int(x) for x in coords)
407
- except:
408
- raise gr.Error("Invalid box format. Please provide [xmin, ymin, xmax, ymax]")
409
-
410
- # Process the image
411
- results, _ = _process(img, bbox)
412
-
413
- # 합성된 이미지와 추출된 이미지만 반환
414
- return results[1], results[2]
415
- except Exception as e:
416
- raise gr.Error(str(e))
417
-
418
- # Event handler functions 수정
419
- def update_process_button(img, prompt):
420
- return gr.update(
421
- interactive=bool(img and prompt),
422
- variant="primary" if bool(img and prompt) else "secondary"
423
- )
424
-
425
- def update_box_button(img, box_input):
426
- try:
427
- if img and box_input:
428
- coords = eval(box_input)
429
- if isinstance(coords, list) and len(coords) == 4:
430
- return gr.update(interactive=True, variant="primary")
431
- return gr.update(interactive=False, variant="secondary")
432
- except:
433
- return gr.update(interactive=False, variant="secondary")
434
-
435
-
436
- css = """
437
- footer {display: none}
438
- .main-title {
439
- text-align: center;
440
- margin: 1em 0;
441
- padding: 1.5em;
442
- background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
443
- border-radius: 15px;
444
- box-shadow: 0 4px 6px rgba(0,0,0,0.1);
445
- }
446
- .main-title h1 {
447
- color: #2196F3;
448
- font-size: 2.8em;
449
- margin-bottom: 0.3em;
450
- font-weight: 700;
451
- }
452
- .main-title p {
453
- color: #555;
454
- font-size: 1.3em;
455
- line-height: 1.4;
456
- }
457
- .container {
458
- max-width: 1200px;
459
- margin: auto;
460
- padding: 20px;
461
- }
462
- .input-panel, .output-panel {
463
- background: white;
464
- padding: 1.5em;
465
- border-radius: 12px;
466
- box-shadow: 0 2px 8px rgba(0,0,0,0.08);
467
- margin-bottom: 1em;
468
- }
469
- .controls-panel {
470
- background: #f8f9fa;
471
- padding: 1em;
472
- border-radius: 8px;
473
- margin: 1em 0;
474
- }
475
- .image-display {
476
- min-height: 512px;
477
- display: flex;
478
- align-items: center;
479
- justify-content: center;
480
- background: #fafafa;
481
- border-radius: 8px;
482
- margin: 1em 0;
483
- }
484
- .example-section {
485
- text-align: center;
486
- padding: 2em;
487
- background: #f5f5f5;
488
- border-radius: 12px;
489
- margin-top: 2em;
490
- }
491
- .example-section img {
492
- max-width: 100%;
493
- border-radius: 8px;
494
- box-shadow: 0 4px 8px rgba(0,0,0,0.1);
495
- }
496
- .accordion {
497
- border: 1px solid #e0e0e0;
498
- border-radius: 8px;
499
- margin: 1em 0;
500
- }
501
- .accordion-header {
502
- padding: 1em;
503
- background: #f5f5f5;
504
- cursor: pointer;
505
- }
506
- .accordion-content {
507
- padding: 1em;
508
- display: none;
509
- }
510
- .accordion.open .accordion-content {
511
- display: block;
512
- }
513
- .position-grid {
514
- display: grid;
515
- grid-template-columns: repeat(3, 1fr);
516
- gap: 8px;
517
- margin: 1em 0;
518
- }
519
-
520
-
521
- .position-btn {
522
- padding: 10px;
523
- border: 1px solid #ddd;
524
- border-radius: 4px;
525
- background: white;
526
- cursor: pointer;
527
- transition: all 0.3s ease;
528
- width: 40px;
529
- height: 40px;
530
- display: flex;
531
- align-items: center;
532
- justify-content: center;
533
- }
534
-
535
- .position-btn:hover {
536
- background: #e3f2fd;
537
- }
538
-
539
- .position-btn.selected {
540
- background-color: #2196F3;
541
- color: white;
542
- border-color: #1976D2;
543
- }
544
- """
545
-
546
-
547
- def add_text_with_stroke(draw, text, x, y, font, text_color, stroke_width):
548
- """Helper function to draw text with stroke"""
549
- # Draw the stroke/outline
550
- for adj_x in range(-stroke_width, stroke_width + 1):
551
- for adj_y in range(-stroke_width, stroke_width + 1):
552
- draw.text((x + adj_x, y + adj_y), text, font=font, fill=text_color)
553
-
554
- def remove_background(image):
555
- # Save the image to a specific location
556
- filename = f"image_{uuid.uuid4()}.png" # Generates a universally unique identifier (UUID) for the filename
557
- image.save(filename)
558
- # Call gradio client for background removal
559
- result = client.predict(images=handle_file(filename), api_name="/image")
560
- return Image.open(result[0])
561
-
562
- def superimpose(image_with_text, overlay_image):
563
- # Open image as RGBA to handle transparency
564
- overlay_image = overlay_image.convert("RGBA")
565
- # Paste overlay on the background
566
- image_with_text.paste(overlay_image, (0, 0), overlay_image)
567
- # Save the final image
568
- # image_with_text.save("output_image.png")
569
- return image_with_text
570
-
571
- def add_text_to_image(
572
- input_image,
573
- text,
574
- font_size,
575
- color,
576
- opacity,
577
- x_position,
578
- y_position,
579
- thickness,
580
- text_position_type,
581
- font_choice # 새로운 파라미터 추가
582
- ):
583
- """
584
- Add text to an image with customizable properties
585
- """
586
- try:
587
- if input_image is None:
588
- return None
589
-
590
- # PIL Image 객체로 변환
591
- if not isinstance(input_image, Image.Image):
592
- if isinstance(input_image, np.ndarray):
593
- image = Image.fromarray(input_image)
594
- else:
595
- raise ValueError("Unsupported image type")
596
- else:
597
- image = input_image.copy()
598
-
599
- # 이미지를 RGBA 모드로 변환
600
- if image.mode != 'RGBA':
601
- image = image.convert('RGBA')
602
-
603
- # Text Behind Image 처리
604
- if text_position_type == "Text Behind Image":
605
- # 원본 이미지의 배경 제거
606
- overlay_image = remove_background(image)
607
-
608
- # 텍스트 오버레이 생성
609
- txt_overlay = Image.new('RGBA', image.size, (255, 255, 255, 0))
610
- draw = ImageDraw.Draw(txt_overlay)
611
-
612
- # 폰트 설정
613
- font_files = {
614
- "Default": "DejaVuSans.ttf",
615
- "Korean Regular": "ko-Regular.ttf",
616
- "Korean Son": "ko-son.ttf"
617
- }
618
-
619
- try:
620
- font_file = font_files.get(font_choice, "DejaVuSans.ttf")
621
- font = ImageFont.truetype(font_file, int(font_size))
622
- except Exception as e:
623
- print(f"Font loading error ({font_choice}): {str(e)}")
624
- try:
625
- font = ImageFont.truetype("arial.ttf", int(font_size))
626
- except:
627
- print("Using default font")
628
- font = ImageFont.load_default()
629
-
630
- # 색상 설정
631
- color_map = {
632
- 'White': (255, 255, 255),
633
- 'Black': (0, 0, 0),
634
- 'Red': (255, 0, 0),
635
- 'Green': (0, 255, 0),
636
- 'Blue': (0, 0, 255),
637
- 'Yellow': (255, 255, 0),
638
- 'Purple': (128, 0, 128)
639
- }
640
- rgb_color = color_map.get(color, (255, 255, 255))
641
-
642
- # 텍스트 크기 계산
643
- text_bbox = draw.textbbox((0, 0), text, font=font)
644
- text_width = text_bbox[2] - text_bbox[0]
645
- text_height = text_bbox[3] - text_bbox[1]
646
-
647
- # 위치 계산
648
- actual_x = int((image.width - text_width) * (x_position / 100))
649
- actual_y = int((image.height - text_height) * (y_position / 100))
650
-
651
- # 텍스트 색상 설정
652
- text_color = (*rgb_color, int(opacity))
653
-
654
- # 텍스트 그리기
655
- add_text_with_stroke(
656
- draw,
657
- text,
658
- actual_x,
659
- actual_y,
660
- font,
661
- text_color,
662
- int(thickness)
663
- )
664
-
665
- if text_position_type == "Text Behind Image":
666
- # 텍스트를 먼저 그리고 그 위에 이미지 오버레이
667
- output_image = Image.alpha_composite(image, txt_overlay)
668
- output_image = superimpose(output_image, overlay_image)
669
- else:
670
- # 기존 방식대로 텍스트를 이미지 위에 그리기
671
- output_image = Image.alpha_composite(image, txt_overlay)
672
-
673
- # RGB로 변환
674
- output_image = output_image.convert('RGB')
675
-
676
- return output_image
677
-
678
- except Exception as e:
679
- print(f"Error in add_text_to_image: {str(e)}")
680
- return input_image
681
-
682
-
683
- def update_position(new_position):
684
- """위치 업데이트 함수"""
685
- print(f"Position updated to: {new_position}")
686
- return new_position
687
-
688
- def update_controls(bg_prompt):
689
- """배경 프롬프트 입력 여부에 따라 컨트롤 표시 업데이트"""
690
- is_visible = bool(bg_prompt)
691
- return [
692
- gr.update(visible=is_visible), # aspect_ratio
693
- gr.update(visible=is_visible), # object_controls
694
- ]
695
-
696
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
697
- position = gr.State(value="bottom-center") # 여기로 이동
698
-
699
- gr.HTML("""
700
- <div class="main-title">
701
- <h1>🎨 GiniGen Canvas-o3</h1>
702
- <p>Remove background of specified objects, generate new backgrounds, and insert text over or behind images with prompts.</p>
703
- </div>
704
- """)
705
-
706
- with gr.Row(equal_height=True):
707
- # 왼쪽 패널 (입력)
708
- with gr.Column(scale=1):
709
- with gr.Group(elem_classes="input-panel"):
710
- input_image = gr.Image(
711
- type="pil",
712
- label="Upload Image",
713
- interactive=True,
714
- height=400
715
- )
716
- text_prompt = gr.Textbox(
717
- label="Object to Extract",
718
- placeholder="Enter what you want to extract...",
719
- interactive=True
720
- )
721
- with gr.Row():
722
- bg_prompt = gr.Textbox(
723
- label="Background Prompt (optional)",
724
- placeholder="Describe the background...",
725
- interactive=True,
726
- scale=3
727
- )
728
- aspect_ratio = gr.Dropdown(
729
- choices=["1:1", "16:9", "9:16", "4:3"],
730
- value="1:1",
731
- label="Aspect Ratio",
732
- interactive=True,
733
- visible=True,
734
- scale=1
735
- )
736
-
737
- with gr.Group(elem_classes="controls-panel", visible=False) as object_controls:
738
- with gr.Column(scale=1):
739
- position = gr.State(value="bottom-center") # 초기값 설정
740
- with gr.Row():
741
- btn_top_left = gr.Button("↖", elem_classes="position-btn")
742
- btn_top_center = gr.Button("↑", elem_classes="position-btn")
743
- btn_top_right = gr.Button("↗", elem_classes="position-btn")
744
- with gr.Row():
745
- btn_middle_left = gr.Button("←", elem_classes="position-btn")
746
- btn_middle_center = gr.Button("•", elem_classes="position-btn")
747
- btn_middle_right = gr.Button("→", elem_classes="position-btn")
748
- with gr.Row():
749
- btn_bottom_left = gr.Button("↙", elem_classes="position-btn")
750
- btn_bottom_center = gr.Button("↓", elem_classes="position-btn", value="selected")
751
- btn_bottom_right = gr.Button("↘", elem_classes="position-btn")
752
- with gr.Column(scale=1):
753
- scale_slider = gr.Slider(
754
- minimum=10,
755
- maximum=200,
756
- value=50,
757
- step=5,
758
- label="Object Size (%)"
759
- )
760
-
761
- process_btn = gr.Button(
762
- "Process",
763
- variant="primary",
764
- interactive=False,
765
- size="lg"
766
- )
767
-
768
- # 오른쪽 패널 (출력)
769
- with gr.Column(scale=1):
770
- with gr.Group(elem_classes="output-panel"):
771
- with gr.Tab("Result"):
772
- combined_image = gr.Image(
773
- label="Combined Result",
774
- show_download_button=True,
775
- type="pil",
776
- height=400
777
- )
778
-
779
- # 텍스트 삽입 옵션을 Accordion으로 변경
780
- with gr.Accordion("Text Insertion Options", open=False):
781
- with gr.Group():
782
- with gr.Row():
783
- text_input = gr.Textbox(
784
- label="Text Content",
785
- placeholder="Enter text to add..."
786
- )
787
- text_position_type = gr.Radio(
788
- choices=["Text Over Image", "Text Behind Image"],
789
- value="Text Over Image",
790
- label="Text Position"
791
- )
792
-
793
- with gr.Row():
794
- with gr.Column(scale=1):
795
- font_choice = gr.Dropdown(
796
- choices=["Default", "Korean Regular", "Korean Son"],
797
- value="Default",
798
- label="Font Selection",
799
- interactive=True
800
- )
801
- font_size = gr.Slider(
802
- minimum=10,
803
- maximum=200,
804
- value=40,
805
- step=5,
806
- label="Font Size"
807
- )
808
- color_dropdown = gr.Dropdown(
809
- choices=["White", "Black", "Red", "Green", "Blue", "Yellow", "Purple"],
810
- value="White",
811
- label="Text Color"
812
- )
813
- thickness = gr.Slider(
814
- minimum=0,
815
- maximum=10,
816
- value=1,
817
- step=1,
818
- label="Text Thickness"
819
- )
820
- with gr.Column(scale=1):
821
- opacity_slider = gr.Slider(
822
- minimum=0,
823
- maximum=255,
824
- value=255,
825
- step=1,
826
- label="Opacity"
827
- )
828
- x_position = gr.Slider(
829
- minimum=0,
830
- maximum=100,
831
- value=50,
832
- step=1,
833
- label="X Position (%)"
834
- )
835
- y_position = gr.Slider(
836
- minimum=0,
837
- maximum=100,
838
- value=50,
839
- step=1,
840
- label="Y Position (%)"
841
- )
842
- add_text_btn = gr.Button("Apply Text", variant="primary")
843
-
844
- extracted_image = gr.Image(
845
- label="Extracted Object",
846
- show_download_button=True,
847
- type="pil",
848
- height=200
849
- )
850
-
851
- # CSS 클래스를 위한 스타일 추가
852
- gr.HTML("""
853
- <style>
854
- .position-btn.selected {
855
- background-color: #2196F3 !important;
856
- color: white !important;
857
- }
858
- </style>
859
- """)
860
-
861
- # 버튼 클릭 이벤트 바인딩
862
- position_mapping = {
863
- btn_top_left: "top-left",
864
- btn_top_center: "top-center",
865
- btn_top_right: "top-right",
866
- btn_middle_left: "middle-left",
867
- btn_middle_center: "middle-center",
868
- btn_middle_right: "middle-right",
869
- btn_bottom_left: "bottom-left",
870
- btn_bottom_center: "bottom-center",
871
- btn_bottom_right: "bottom-right"
872
- }
873
-
874
- for btn, pos in position_mapping.items():
875
- btn.click(
876
- fn=lambda pos=pos: update_position(pos), # 클로저 문제 해결을 위해 수정
877
- outputs=position
878
- )
879
-
880
-
881
- # 이벤트 바인딩
882
- bg_prompt.change(
883
- fn=update_controls,
884
- inputs=bg_prompt,
885
- outputs=[aspect_ratio, object_controls],
886
- queue=False
887
- )
888
-
889
- input_image.change(
890
- fn=update_process_button,
891
- inputs=[input_image, text_prompt],
892
- outputs=process_btn,
893
- queue=False
894
- )
895
-
896
- text_prompt.change(
897
- fn=update_process_button,
898
- inputs=[input_image, text_prompt],
899
- outputs=process_btn,
900
- queue=False
901
- )
902
-
903
- process_btn.click(
904
- fn=process_prompt,
905
- inputs=[
906
- input_image,
907
- text_prompt,
908
- bg_prompt,
909
- aspect_ratio,
910
- position,
911
- scale_slider
912
- ],
913
- outputs=[combined_image, extracted_image],
914
- queue=True
915
- )
916
-
917
- add_text_btn.click(
918
- fn=add_text_to_image,
919
- inputs=[
920
- combined_image,
921
- text_input,
922
- font_size,
923
- color_dropdown,
924
- opacity_slider,
925
- x_position,
926
- y_position,
927
- thickness,
928
- text_position_type,
929
- font_choice
930
- ],
931
- outputs=combined_image
932
- )
933
-
934
- demo.queue(max_size=5)
935
- demo.launch(
936
- server_name="0.0.0.0",
937
- server_port=7860,
938
- share=False,
939
- max_threads=2
940
- )