Spaces:
Running
on
Zero
Running
on
Zero
lixiang46
commited on
Commit
·
08f2519
1
Parent(s):
88a3aee
add ipa
Browse files- app.py +41 -16
- image/bird.png +0 -3
- image/dog.png +0 -3
app.py
CHANGED
@@ -23,15 +23,21 @@ device = "cuda"
|
|
23 |
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
|
24 |
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
|
25 |
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
|
|
|
26 |
|
27 |
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
|
28 |
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
|
29 |
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
|
30 |
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
|
31 |
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
|
|
|
32 |
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
|
33 |
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
|
34 |
|
|
|
|
|
|
|
|
|
35 |
pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
|
36 |
vae=vae,
|
37 |
controlnet = controlnet_depth,
|
@@ -52,6 +58,14 @@ pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
|
|
52 |
force_zeros_for_empty_prompt=False
|
53 |
)
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
@spaces.GPU
|
56 |
def process_canny_condition(image, canny_threods=[100,200]):
|
57 |
np_image = image.copy()
|
@@ -77,6 +91,7 @@ MAX_IMAGE_SIZE = 1024
|
|
77 |
@spaces.GPU
|
78 |
def infer_depth(prompt,
|
79 |
image = None,
|
|
|
80 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
81 |
seed = 397886929,
|
82 |
randomize_seed = False,
|
@@ -84,19 +99,22 @@ def infer_depth(prompt,
|
|
84 |
num_inference_steps = 50,
|
85 |
controlnet_conditioning_scale = 0.7,
|
86 |
control_guidance_end = 0.9,
|
87 |
-
strength = 1.0
|
|
|
88 |
):
|
89 |
if randomize_seed:
|
90 |
seed = random.randint(0, MAX_SEED)
|
91 |
generator = torch.Generator().manual_seed(seed)
|
92 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
93 |
-
pipe = pipe_depth.to("cuda")
|
|
|
94 |
condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
|
95 |
image = pipe(
|
96 |
prompt= prompt ,
|
97 |
image = init_image,
|
98 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
99 |
control_guidance_end = control_guidance_end,
|
|
|
100 |
strength= strength ,
|
101 |
control_image = condi_img,
|
102 |
negative_prompt= negative_prompt ,
|
@@ -110,6 +128,7 @@ def infer_depth(prompt,
|
|
110 |
@spaces.GPU
|
111 |
def infer_canny(prompt,
|
112 |
image = None,
|
|
|
113 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
114 |
seed = 397886929,
|
115 |
randomize_seed = False,
|
@@ -117,19 +136,22 @@ def infer_canny(prompt,
|
|
117 |
num_inference_steps = 50,
|
118 |
controlnet_conditioning_scale = 0.7,
|
119 |
control_guidance_end = 0.9,
|
120 |
-
strength = 1.0
|
|
|
121 |
):
|
122 |
if randomize_seed:
|
123 |
seed = random.randint(0, MAX_SEED)
|
124 |
generator = torch.Generator().manual_seed(seed)
|
125 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
126 |
-
pipe = pipe_canny.to("cuda")
|
|
|
127 |
condi_img = process_canny_condition(np.array(init_image))
|
128 |
image = pipe(
|
129 |
prompt= prompt ,
|
130 |
image = init_image,
|
131 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
132 |
control_guidance_end = control_guidance_end,
|
|
|
133 |
strength= strength ,
|
134 |
control_image = condi_img,
|
135 |
negative_prompt= negative_prompt ,
|
@@ -141,17 +163,13 @@ def infer_canny(prompt,
|
|
141 |
return [condi_img, image], seed
|
142 |
|
143 |
canny_examples = [
|
144 |
-
["
|
145 |
-
"image/
|
146 |
-
["全景,一只可爱的白色小狗坐在杯子里,看向镜头,动漫风格,3d渲染,辛烷值渲染",
|
147 |
-
"image/dog.png"]
|
148 |
]
|
149 |
|
150 |
depth_examples = [
|
151 |
-
["
|
152 |
-
"image/
|
153 |
-
["一只颜色鲜艳的小鸟,高品质,超清晰,色彩鲜艳,超高分辨率,最佳品质,8k,高清,4K",
|
154 |
-
"image/bird.png"]
|
155 |
]
|
156 |
|
157 |
css="""
|
@@ -239,6 +257,13 @@ with gr.Blocks(css=css) as Kolors:
|
|
239 |
step=0.1,
|
240 |
value=1.0,
|
241 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
with gr.Row():
|
243 |
canny_button = gr.Button("Canny", elem_id="button")
|
244 |
depth_button = gr.Button("Depth", elem_id="button")
|
@@ -251,7 +276,7 @@ with gr.Blocks(css=css) as Kolors:
|
|
251 |
gr.Examples(
|
252 |
fn = infer_canny,
|
253 |
examples = canny_examples,
|
254 |
-
inputs = [prompt, image],
|
255 |
outputs = [result, seed_used],
|
256 |
label = "Canny"
|
257 |
)
|
@@ -259,20 +284,20 @@ with gr.Blocks(css=css) as Kolors:
|
|
259 |
gr.Examples(
|
260 |
fn = infer_depth,
|
261 |
examples = depth_examples,
|
262 |
-
inputs = [prompt, image],
|
263 |
outputs = [result, seed_used],
|
264 |
label = "Depth"
|
265 |
)
|
266 |
|
267 |
canny_button.click(
|
268 |
fn = infer_canny,
|
269 |
-
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
|
270 |
outputs = [result, seed_used]
|
271 |
)
|
272 |
|
273 |
depth_button.click(
|
274 |
fn = infer_depth,
|
275 |
-
inputs = [prompt, image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
|
276 |
outputs = [result, seed_used]
|
277 |
)
|
278 |
|
|
|
23 |
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
|
24 |
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
|
25 |
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")
|
26 |
+
ckpt_dir_ipa = snapshot_download(repo_id="Kwai-Kolors/Kolors-IP-Adapter-Plus")
|
27 |
|
28 |
text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
|
29 |
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
|
30 |
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
|
31 |
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
|
32 |
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
|
33 |
+
|
34 |
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
|
35 |
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)
|
36 |
|
37 |
+
image_encoder = CLIPVisionModelWithProjection.from_pretrained(f'{ckpt_dir_ipa}/weights/Kolors-IP-Adapter-Plus/image_encoder', ignore_mismatched_sizes=True).to(dtype=torch.float16, device=device)
|
38 |
+
ip_img_size = 336
|
39 |
+
clip_image_processor = CLIPImageProcessor(size=ip_img_size, crop_size=ip_img_size )
|
40 |
+
|
41 |
pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
|
42 |
vae=vae,
|
43 |
controlnet = controlnet_depth,
|
|
|
58 |
force_zeros_for_empty_prompt=False
|
59 |
)
|
60 |
|
61 |
+
@spaces.GPU
|
62 |
+
def load_ipa(pipe):
|
63 |
+
if hasattr(pipe.unet, 'encoder_hid_proj'):
|
64 |
+
pipe.unet.text_encoder_hid_proj = pipe.unet.encoder_hid_proj
|
65 |
+
|
66 |
+
pipe.load_ip_adapter( f'{ckpt_dir_ipa}/weights/Kolors-IP-Adapter-Plus' , subfolder="", weight_name=["ip_adapter_plus_general.bin"])
|
67 |
+
return pipe
|
68 |
+
|
69 |
@spaces.GPU
|
70 |
def process_canny_condition(image, canny_threods=[100,200]):
|
71 |
np_image = image.copy()
|
|
|
91 |
@spaces.GPU
|
92 |
def infer_depth(prompt,
|
93 |
image = None,
|
94 |
+
ipa_img = None,
|
95 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
96 |
seed = 397886929,
|
97 |
randomize_seed = False,
|
|
|
99 |
num_inference_steps = 50,
|
100 |
controlnet_conditioning_scale = 0.7,
|
101 |
control_guidance_end = 0.9,
|
102 |
+
strength = 1.0,
|
103 |
+
ip_scale = 0.5,
|
104 |
):
|
105 |
if randomize_seed:
|
106 |
seed = random.randint(0, MAX_SEED)
|
107 |
generator = torch.Generator().manual_seed(seed)
|
108 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
109 |
+
pipe = load_ipa(pipe_depth).to("cuda")
|
110 |
+
pipe.set_ip_adapter_scale([ip_scale])
|
111 |
condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
|
112 |
image = pipe(
|
113 |
prompt= prompt ,
|
114 |
image = init_image,
|
115 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
116 |
control_guidance_end = control_guidance_end,
|
117 |
+
ip_adapter_image=[ipa_img],
|
118 |
strength= strength ,
|
119 |
control_image = condi_img,
|
120 |
negative_prompt= negative_prompt ,
|
|
|
128 |
@spaces.GPU
|
129 |
def infer_canny(prompt,
|
130 |
image = None,
|
131 |
+
ipa_img = None,
|
132 |
negative_prompt = "nsfw,脸部阴影,低分辨率,jpeg伪影、模糊、糟糕,黑脸,霓虹灯",
|
133 |
seed = 397886929,
|
134 |
randomize_seed = False,
|
|
|
136 |
num_inference_steps = 50,
|
137 |
controlnet_conditioning_scale = 0.7,
|
138 |
control_guidance_end = 0.9,
|
139 |
+
strength = 1.0,
|
140 |
+
ip_scale = 0.5,
|
141 |
):
|
142 |
if randomize_seed:
|
143 |
seed = random.randint(0, MAX_SEED)
|
144 |
generator = torch.Generator().manual_seed(seed)
|
145 |
init_image = resize_image(image, MAX_IMAGE_SIZE)
|
146 |
+
pipe = load_ipa(pipe_canny).to("cuda")
|
147 |
+
pipe.set_ip_adapter_scale([ip_scale])
|
148 |
condi_img = process_canny_condition(np.array(init_image))
|
149 |
image = pipe(
|
150 |
prompt= prompt ,
|
151 |
image = init_image,
|
152 |
controlnet_conditioning_scale = controlnet_conditioning_scale,
|
153 |
control_guidance_end = control_guidance_end,
|
154 |
+
ip_adapter_image=[ipa_img],
|
155 |
strength= strength ,
|
156 |
control_image = condi_img,
|
157 |
negative_prompt= negative_prompt ,
|
|
|
163 |
return [condi_img, image], seed
|
164 |
|
165 |
canny_examples = [
|
166 |
+
["一个红色头发的女孩,唯美风景,清新明亮,斑驳的光影,最好的质量,超细节,8K画质",
|
167 |
+
"image/woman_2.png", "image/2.png"],
|
|
|
|
|
168 |
]
|
169 |
|
170 |
depth_examples = [
|
171 |
+
["一个漂亮的女孩,最好的质量,超细节,8K画质",
|
172 |
+
"image/1.png","image/woman_1.png"],
|
|
|
|
|
173 |
]
|
174 |
|
175 |
css="""
|
|
|
257 |
step=0.1,
|
258 |
value=1.0,
|
259 |
)
|
260 |
+
ip_scale = gr.Slider(
|
261 |
+
label="IP_Scale",
|
262 |
+
minimum=0.0,
|
263 |
+
maximum=1.0,
|
264 |
+
step=0.1,
|
265 |
+
value=0.5,
|
266 |
+
)
|
267 |
with gr.Row():
|
268 |
canny_button = gr.Button("Canny", elem_id="button")
|
269 |
depth_button = gr.Button("Depth", elem_id="button")
|
|
|
276 |
gr.Examples(
|
277 |
fn = infer_canny,
|
278 |
examples = canny_examples,
|
279 |
+
inputs = [prompt, image, ipa_image],
|
280 |
outputs = [result, seed_used],
|
281 |
label = "Canny"
|
282 |
)
|
|
|
284 |
gr.Examples(
|
285 |
fn = infer_depth,
|
286 |
examples = depth_examples,
|
287 |
+
inputs = [prompt, image, ipa_image],
|
288 |
outputs = [result, seed_used],
|
289 |
label = "Depth"
|
290 |
)
|
291 |
|
292 |
canny_button.click(
|
293 |
fn = infer_canny,
|
294 |
+
inputs = [prompt, image, ipa_image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength, ip_scale],
|
295 |
outputs = [result, seed_used]
|
296 |
)
|
297 |
|
298 |
depth_button.click(
|
299 |
fn = infer_depth,
|
300 |
+
inputs = [prompt, image, ipa_image, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength, ip_scale],
|
301 |
outputs = [result, seed_used]
|
302 |
)
|
303 |
|
image/bird.png
DELETED
Git LFS Details
|
image/dog.png
DELETED
Git LFS Details
|