Spaces:
Running
on
Zero
Running
on
Zero
adaface-neurips
commited on
Commit
·
d2b3308
1
Parent(s):
a461e74
fix style switching bugs
Browse files
adaface/adaface_wrapper.py
CHANGED
@@ -4,6 +4,7 @@ from transformers import CLIPTextModel
|
|
4 |
from diffusers import (
|
5 |
StableDiffusionPipeline,
|
6 |
StableDiffusionImg2ImgPipeline,
|
|
|
7 |
StableDiffusion3Pipeline,
|
8 |
#FluxPipeline,
|
9 |
DDIMScheduler,
|
@@ -25,7 +26,7 @@ class AdaFaceWrapper(nn.Module):
|
|
25 |
main_unet_filepath=None, unet_types=None, extra_unet_dirpaths=None, unet_weights=None,
|
26 |
device='cuda', is_training=False):
|
27 |
'''
|
28 |
-
pipeline_name: "text2img", "img2img", "text2img3", "flux", or None.
|
29 |
If None, it's used only as a face encoder, and the unet and vae are
|
30 |
removed from the pipeline to release RAM.
|
31 |
'''
|
@@ -64,6 +65,13 @@ class AdaFaceWrapper(nn.Module):
|
|
64 |
self.encoders_num_id_vecs = self.id2ada_prompt_encoder.encoders_num_id_vecs
|
65 |
self.extend_tokenizer_and_text_encoder()
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def initialize_pipeline(self):
|
68 |
self.id2ada_prompt_encoder = create_id2ada_prompt_encoder(self.adaface_encoder_types,
|
69 |
self.adaface_ckpt_paths,
|
@@ -95,6 +103,8 @@ class AdaFaceWrapper(nn.Module):
|
|
95 |
PipelineClass = StableDiffusionImg2ImgPipeline
|
96 |
elif self.pipeline_name == "text2img":
|
97 |
PipelineClass = StableDiffusionPipeline
|
|
|
|
|
98 |
elif self.pipeline_name == "text2img3":
|
99 |
PipelineClass = StableDiffusion3Pipeline
|
100 |
#elif self.pipeline_name == "flux":
|
@@ -109,6 +119,7 @@ class AdaFaceWrapper(nn.Module):
|
|
109 |
if self.base_model_path is None:
|
110 |
base_model_path_dict = {
|
111 |
'text2img': 'models/sd15-dste8-vae.safetensors',
|
|
|
112 |
'text2img3': 'stabilityai/stable-diffusion-3-medium-diffusers',
|
113 |
'flux': 'black-forest-labs/FLUX.1-schnell',
|
114 |
}
|
@@ -156,7 +167,7 @@ class AdaFaceWrapper(nn.Module):
|
|
156 |
pipeline.vae = None
|
157 |
print("Removed UNet and VAE from the pipeline.")
|
158 |
|
159 |
-
if self.pipeline_name not in ["text2img3", "flux"]:
|
160 |
noise_scheduler = DDIMScheduler(
|
161 |
num_train_timesteps=1000,
|
162 |
beta_start=0.00085,
|
@@ -277,6 +288,8 @@ class AdaFaceWrapper(nn.Module):
|
|
277 |
|
278 |
return prompt
|
279 |
|
|
|
|
|
280 |
# avg_at_stage: 'id_emb', 'img_prompt_emb', or None.
|
281 |
# avg_at_stage == ada_prompt_emb usually produces the worst results.
|
282 |
# id_emb is slightly better than img_prompt_emb, but sometimes img_prompt_emb is better.
|
@@ -297,14 +310,18 @@ class AdaFaceWrapper(nn.Module):
|
|
297 |
if all_adaface_subj_embs is None:
|
298 |
return None
|
299 |
|
300 |
-
|
301 |
-
|
|
|
|
|
|
|
|
|
302 |
|
303 |
if update_text_encoder:
|
304 |
self.update_text_encoder_subj_embeddings(all_adaface_subj_embs)
|
305 |
return all_adaface_subj_embs
|
306 |
|
307 |
-
def diffusers_encode_prompts(self, prompt, negative_prompt, device):
|
308 |
# pooled_prompt_embeds_, negative_pooled_prompt_embeds_ are used by text2img3 and flux.
|
309 |
pooled_prompt_embeds_, negative_pooled_prompt_embeds_ = None, None
|
310 |
|
@@ -318,17 +335,26 @@ class AdaFaceWrapper(nn.Module):
|
|
318 |
prompt_embeds_ = prompt_embeds_.unsqueeze(0)
|
319 |
negative_prompt_embeds_ = negative_prompt_embeds_.unsqueeze(0)
|
320 |
else:
|
321 |
-
if self.pipeline_name in ["text2img3", "flux"]:
|
322 |
-
|
323 |
-
# pooled_prompt_embeds_, negative_pooled_prompt_embeds_: [1, 2048]
|
324 |
# CLIP Text Encoder prompt uses a maximum sequence length of 77.
|
325 |
# T5 Text Encoder prompt uses a maximum sequence length of 256.
|
326 |
# 333 = 256 + 77.
|
327 |
prompt_t5 = prompt + "".join([", "] * 256)
|
328 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
prompt_embeds_, negative_prompt_embeds_, \
|
330 |
pooled_prompt_embeds_, negative_pooled_prompt_embeds_ = \
|
331 |
-
self.pipeline.encode_prompt(prompt,
|
332 |
num_images_per_prompt=1,
|
333 |
do_classifier_free_guidance=True,
|
334 |
negative_prompt=negative_prompt)
|
@@ -362,6 +388,7 @@ class AdaFaceWrapper(nn.Module):
|
|
362 |
if device is None:
|
363 |
device = self.device
|
364 |
|
|
|
365 |
prompt = self.update_prompt(prompt, placeholder_tokens_pos=placeholder_tokens_pos)
|
366 |
if verbose:
|
367 |
print(f"Subject prompt:\n{prompt}")
|
@@ -373,11 +400,11 @@ class AdaFaceWrapper(nn.Module):
|
|
373 |
negative_prompt = self.update_prompt(negative_prompt0, placeholder_tokens_pos='prepend')
|
374 |
null_negative_prompt = self.update_prompt(negative_prompt0, placeholder_tokens_pos='prepend',
|
375 |
use_null_placeholders=True)
|
376 |
-
if verbose:
|
377 |
-
|
378 |
-
|
379 |
-
#print(f"Null negative prompt:\n{null_negative_prompt}")
|
380 |
|
|
|
381 |
else:
|
382 |
null_negative_prompt = None
|
383 |
|
@@ -386,11 +413,11 @@ class AdaFaceWrapper(nn.Module):
|
|
386 |
self.pipeline.text_encoder.to(device)
|
387 |
|
388 |
prompt_embeds_, negative_prompt_embeds_, pooled_prompt_embeds_, negative_pooled_prompt_embeds_ = \
|
389 |
-
self.diffusers_encode_prompts(prompt, negative_prompt, device)
|
390 |
|
391 |
if 0 < do_neg_id_prompt_weight < 1:
|
392 |
_, negative_prompt_embeds_null, _, _ = \
|
393 |
-
self.diffusers_encode_prompts(prompt, null_negative_prompt, device)
|
394 |
negative_prompt_embeds_ = negative_prompt_embeds_ * do_neg_id_prompt_weight + \
|
395 |
negative_prompt_embeds_null * (1 - do_neg_id_prompt_weight)
|
396 |
|
@@ -399,9 +426,8 @@ class AdaFaceWrapper(nn.Module):
|
|
399 |
# ref_img_strength is used only in the img2img pipeline.
|
400 |
def forward(self, noise, prompt, negative_prompt=None,
|
401 |
placeholder_tokens_pos='append',
|
402 |
-
guidance_scale=6.0,
|
403 |
do_neg_id_prompt_weight=0,
|
404 |
-
out_image_count=4,
|
405 |
ref_img_strength=0.8, generator=None, verbose=False):
|
406 |
noise = noise.to(device=self.device, dtype=torch.float16)
|
407 |
|
@@ -419,7 +445,7 @@ class AdaFaceWrapper(nn.Module):
|
|
419 |
if negative_prompt_embeds_ is not None:
|
420 |
negative_prompt_embeds_ = negative_prompt_embeds_.repeat(out_image_count, 1, 1)
|
421 |
|
422 |
-
if self.pipeline_name
|
423 |
pooled_prompt_embeds_ = pooled_prompt_embeds_.repeat(out_image_count, 1)
|
424 |
negative_pooled_prompt_embeds_ = negative_pooled_prompt_embeds_.repeat(out_image_count, 1)
|
425 |
|
|
|
4 |
from diffusers import (
|
5 |
StableDiffusionPipeline,
|
6 |
StableDiffusionImg2ImgPipeline,
|
7 |
+
StableDiffusionXLPipeline,
|
8 |
StableDiffusion3Pipeline,
|
9 |
#FluxPipeline,
|
10 |
DDIMScheduler,
|
|
|
26 |
main_unet_filepath=None, unet_types=None, extra_unet_dirpaths=None, unet_weights=None,
|
27 |
device='cuda', is_training=False):
|
28 |
'''
|
29 |
+
pipeline_name: "text2img", "text2imgxl", "img2img", "text2img3", "flux", or None.
|
30 |
If None, it's used only as a face encoder, and the unet and vae are
|
31 |
removed from the pipeline to release RAM.
|
32 |
'''
|
|
|
65 |
self.encoders_num_id_vecs = self.id2ada_prompt_encoder.encoders_num_id_vecs
|
66 |
self.extend_tokenizer_and_text_encoder()
|
67 |
|
68 |
+
def to(self, device):
|
69 |
+
self.device = device
|
70 |
+
self.id2ada_prompt_encoder.to(device)
|
71 |
+
self.pipeline.to(device)
|
72 |
+
print(f"Moved AdaFaceWrapper to {device}.")
|
73 |
+
return self
|
74 |
+
|
75 |
def initialize_pipeline(self):
|
76 |
self.id2ada_prompt_encoder = create_id2ada_prompt_encoder(self.adaface_encoder_types,
|
77 |
self.adaface_ckpt_paths,
|
|
|
103 |
PipelineClass = StableDiffusionImg2ImgPipeline
|
104 |
elif self.pipeline_name == "text2img":
|
105 |
PipelineClass = StableDiffusionPipeline
|
106 |
+
elif self.pipeline_name == "text2imgxl":
|
107 |
+
PipelineClass = StableDiffusionXLPipeline
|
108 |
elif self.pipeline_name == "text2img3":
|
109 |
PipelineClass = StableDiffusion3Pipeline
|
110 |
#elif self.pipeline_name == "flux":
|
|
|
119 |
if self.base_model_path is None:
|
120 |
base_model_path_dict = {
|
121 |
'text2img': 'models/sd15-dste8-vae.safetensors',
|
122 |
+
'text2imgxl': 'stabilityai/stable-diffusion-xl-base-1.0',
|
123 |
'text2img3': 'stabilityai/stable-diffusion-3-medium-diffusers',
|
124 |
'flux': 'black-forest-labs/FLUX.1-schnell',
|
125 |
}
|
|
|
167 |
pipeline.vae = None
|
168 |
print("Removed UNet and VAE from the pipeline.")
|
169 |
|
170 |
+
if self.pipeline_name not in ["text2imgxl", "text2img3", "flux"]:
|
171 |
noise_scheduler = DDIMScheduler(
|
172 |
num_train_timesteps=1000,
|
173 |
beta_start=0.00085,
|
|
|
288 |
|
289 |
return prompt
|
290 |
|
291 |
+
# If face_id_embs is None, then it extracts face_id_embs from the images,
|
292 |
+
# then map them to ada prompt embeddings.
|
293 |
# avg_at_stage: 'id_emb', 'img_prompt_emb', or None.
|
294 |
# avg_at_stage == ada_prompt_emb usually produces the worst results.
|
295 |
# id_emb is slightly better than img_prompt_emb, but sometimes img_prompt_emb is better.
|
|
|
310 |
if all_adaface_subj_embs is None:
|
311 |
return None
|
312 |
|
313 |
+
if all_adaface_subj_embs.ndim == 4:
|
314 |
+
# [1, 1, 16, 768] -> [16, 768]
|
315 |
+
all_adaface_subj_embs = all_adaface_subj_embs.squeeze(0).squeeze(0)
|
316 |
+
elif all_adaface_subj_embs.ndim == 3:
|
317 |
+
# [1, 16, 768] -> [16, 768]
|
318 |
+
all_adaface_subj_embs = all_adaface_subj_embs.squeeze(0)
|
319 |
|
320 |
if update_text_encoder:
|
321 |
self.update_text_encoder_subj_embeddings(all_adaface_subj_embs)
|
322 |
return all_adaface_subj_embs
|
323 |
|
324 |
+
def diffusers_encode_prompts(self, prompt, plain_prompt, negative_prompt, device):
|
325 |
# pooled_prompt_embeds_, negative_pooled_prompt_embeds_ are used by text2img3 and flux.
|
326 |
pooled_prompt_embeds_, negative_pooled_prompt_embeds_ = None, None
|
327 |
|
|
|
335 |
prompt_embeds_ = prompt_embeds_.unsqueeze(0)
|
336 |
negative_prompt_embeds_ = negative_prompt_embeds_.unsqueeze(0)
|
337 |
else:
|
338 |
+
if self.pipeline_name in ["text2imgxl", "text2img3", "flux"]:
|
339 |
+
prompt_2 = plain_prompt
|
|
|
340 |
# CLIP Text Encoder prompt uses a maximum sequence length of 77.
|
341 |
# T5 Text Encoder prompt uses a maximum sequence length of 256.
|
342 |
# 333 = 256 + 77.
|
343 |
prompt_t5 = prompt + "".join([", "] * 256)
|
344 |
+
|
345 |
+
# prompt_embeds_, negative_prompt_embeds_: [1, 333, 4096]
|
346 |
+
# pooled_prompt_embeds_, negative_pooled_prompt_embeds_: [1, 2048]
|
347 |
+
if self.pipeline_name == "text2imgxl":
|
348 |
+
prompt_embeds_, negative_prompt_embeds_, \
|
349 |
+
pooled_prompt_embeds_, negative_pooled_prompt_embeds_ = \
|
350 |
+
self.pipeline.encode_prompt(prompt, prompt_2, device=device,
|
351 |
+
num_images_per_prompt=1,
|
352 |
+
do_classifier_free_guidance=True,
|
353 |
+
negative_prompt=negative_prompt)
|
354 |
+
elif self.pipeline_name == "text2img3":
|
355 |
prompt_embeds_, negative_prompt_embeds_, \
|
356 |
pooled_prompt_embeds_, negative_pooled_prompt_embeds_ = \
|
357 |
+
self.pipeline.encode_prompt(prompt, prompt_2, prompt_t5, device=device,
|
358 |
num_images_per_prompt=1,
|
359 |
do_classifier_free_guidance=True,
|
360 |
negative_prompt=negative_prompt)
|
|
|
388 |
if device is None:
|
389 |
device = self.device
|
390 |
|
391 |
+
plain_prompt = prompt
|
392 |
prompt = self.update_prompt(prompt, placeholder_tokens_pos=placeholder_tokens_pos)
|
393 |
if verbose:
|
394 |
print(f"Subject prompt:\n{prompt}")
|
|
|
400 |
negative_prompt = self.update_prompt(negative_prompt0, placeholder_tokens_pos='prepend')
|
401 |
null_negative_prompt = self.update_prompt(negative_prompt0, placeholder_tokens_pos='prepend',
|
402 |
use_null_placeholders=True)
|
403 |
+
''' if verbose:
|
404 |
+
print(f"Negative prompt:\n{negative_prompt}")
|
405 |
+
print(f"Null negative prompt:\n{null_negative_prompt}")
|
|
|
406 |
|
407 |
+
'''
|
408 |
else:
|
409 |
null_negative_prompt = None
|
410 |
|
|
|
413 |
self.pipeline.text_encoder.to(device)
|
414 |
|
415 |
prompt_embeds_, negative_prompt_embeds_, pooled_prompt_embeds_, negative_pooled_prompt_embeds_ = \
|
416 |
+
self.diffusers_encode_prompts(prompt, plain_prompt, negative_prompt, device)
|
417 |
|
418 |
if 0 < do_neg_id_prompt_weight < 1:
|
419 |
_, negative_prompt_embeds_null, _, _ = \
|
420 |
+
self.diffusers_encode_prompts(prompt, plain_prompt, null_negative_prompt, device)
|
421 |
negative_prompt_embeds_ = negative_prompt_embeds_ * do_neg_id_prompt_weight + \
|
422 |
negative_prompt_embeds_null * (1 - do_neg_id_prompt_weight)
|
423 |
|
|
|
426 |
# ref_img_strength is used only in the img2img pipeline.
|
427 |
def forward(self, noise, prompt, negative_prompt=None,
|
428 |
placeholder_tokens_pos='append',
|
|
|
429 |
do_neg_id_prompt_weight=0,
|
430 |
+
guidance_scale=6.0, out_image_count=4,
|
431 |
ref_img_strength=0.8, generator=None, verbose=False):
|
432 |
noise = noise.to(device=self.device, dtype=torch.float16)
|
433 |
|
|
|
445 |
if negative_prompt_embeds_ is not None:
|
446 |
negative_prompt_embeds_ = negative_prompt_embeds_.repeat(out_image_count, 1, 1)
|
447 |
|
448 |
+
if self.pipeline_name in ["text2imgxl", "text2img3"]:
|
449 |
pooled_prompt_embeds_ = pooled_prompt_embeds_.repeat(out_image_count, 1)
|
450 |
negative_pooled_prompt_embeds_ = negative_pooled_prompt_embeds_.repeat(out_image_count, 1)
|
451 |
|
app.py
CHANGED
@@ -24,7 +24,7 @@ parser = argparse.ArgumentParser()
|
|
24 |
parser.add_argument("--adaface_encoder_types", type=str, nargs="+", default=["consistentID", "arc2face"],
|
25 |
choices=["arc2face", "consistentID"], help="Type(s) of the ID2Ada prompt encoders")
|
26 |
parser.add_argument('--adaface_ckpt_path', type=str,
|
27 |
-
default='models/adaface/VGGface2_HQ_masks2024-10-
|
28 |
parser.add_argument('--model_style_type', type=str, default='realistic',
|
29 |
choices=["realistic", "anime", "photorealistic"], help="Type of the base model")
|
30 |
parser.add_argument("--guidance_scale", type=float, default=6.0,
|
@@ -51,10 +51,10 @@ device = "cuda" if args.gpu is None else f"cuda:{args.gpu}"
|
|
51 |
global adaface, id_animator
|
52 |
|
53 |
base_model_path = model_style_type2base_model_path[args.model_style_type]
|
54 |
-
id_animator = load_model(model_style_type=args.model_style_type, device=
|
55 |
adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
|
56 |
adaface_encoder_types=args.adaface_encoder_types,
|
57 |
-
adaface_ckpt_paths=[args.adaface_ckpt_path], device=
|
58 |
|
59 |
basedir = os.getcwd()
|
60 |
savedir = os.path.join(basedir,'samples')
|
@@ -80,11 +80,14 @@ def get_clicked_image(data: gr.SelectData):
|
|
80 |
|
81 |
@spaces.GPU
|
82 |
def gen_init_images(uploaded_image_paths, prompt, guidance_scale, do_neg_id_prompt_weight, out_image_count=4):
|
83 |
-
global adaface, id_animator
|
84 |
if uploaded_image_paths is None:
|
85 |
print("No image uploaded")
|
86 |
return None, None, None
|
87 |
|
|
|
|
|
|
|
|
|
88 |
# uploaded_image_paths is a list of tuples:
|
89 |
# [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
|
90 |
# Extract the file paths.
|
@@ -132,7 +135,10 @@ def generate_video(image_container, uploaded_image_paths, init_img_file_paths, i
|
|
132 |
seed, attn_scale, image_embed_cfg_begin_scale, image_embed_cfg_end_scale,
|
133 |
is_adaface_enabled, adaface_ckpt_path, adaface_power_scale,
|
134 |
id_animator_anneal_steps, progress=gr.Progress(track_tqdm=True)):
|
|
|
135 |
global adaface, id_animator
|
|
|
|
|
136 |
|
137 |
if prompt is None:
|
138 |
prompt = ""
|
@@ -220,7 +226,7 @@ def check_prompt_and_model_type(prompt, model_style_type):
|
|
220 |
id_animator = load_model(model_style_type=model_style_type, device=device)
|
221 |
adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
|
222 |
adaface_encoder_types=args.adaface_encoder_types,
|
223 |
-
adaface_ckpt_paths=[args.adaface_ckpt_path], device=
|
224 |
# Update base model type.
|
225 |
args.model_style_type = model_style_type
|
226 |
|
@@ -243,7 +249,7 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as demo:
|
|
243 |
|
244 |
❗️**Tips**❗️
|
245 |
- You can upload one or more subject images for generating ID-specific video.
|
246 |
-
- If the face dominates the video frames, try
|
247 |
- If the face loses focus, try increasing the guidance scale.
|
248 |
- If the motion is weird, e.g., the prompt is "... running", try increasing the number of sampling steps.
|
249 |
- Usage explanations and demos: [Readme](https://huggingface.co/spaces/adaface-neurips/adaface-animate/blob/main/README2.md).
|
@@ -344,7 +350,7 @@ with gr.Blocks(css=css, theme=gr.themes.Origin()) as demo:
|
|
344 |
maximum=0.9,
|
345 |
step=0.1,
|
346 |
value=args.do_neg_id_prompt_weight,
|
347 |
-
visible=
|
348 |
)
|
349 |
|
350 |
seed = gr.Slider(
|
|
|
24 |
parser.add_argument("--adaface_encoder_types", type=str, nargs="+", default=["consistentID", "arc2face"],
|
25 |
choices=["arc2face", "consistentID"], help="Type(s) of the ID2Ada prompt encoders")
|
26 |
parser.add_argument('--adaface_ckpt_path', type=str,
|
27 |
+
default='models/adaface/VGGface2_HQ_masks2024-10-14T16-09-24_zero3-ada-3500.pt')
|
28 |
parser.add_argument('--model_style_type', type=str, default='realistic',
|
29 |
choices=["realistic", "anime", "photorealistic"], help="Type of the base model")
|
30 |
parser.add_argument("--guidance_scale", type=float, default=6.0,
|
|
|
51 |
global adaface, id_animator
|
52 |
|
53 |
base_model_path = model_style_type2base_model_path[args.model_style_type]
|
54 |
+
id_animator = load_model(model_style_type=args.model_style_type, device='cpu')
|
55 |
adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
|
56 |
adaface_encoder_types=args.adaface_encoder_types,
|
57 |
+
adaface_ckpt_paths=[args.adaface_ckpt_path], device='cpu')
|
58 |
|
59 |
basedir = os.getcwd()
|
60 |
savedir = os.path.join(basedir,'samples')
|
|
|
80 |
|
81 |
@spaces.GPU
|
82 |
def gen_init_images(uploaded_image_paths, prompt, guidance_scale, do_neg_id_prompt_weight, out_image_count=4):
|
|
|
83 |
if uploaded_image_paths is None:
|
84 |
print("No image uploaded")
|
85 |
return None, None, None
|
86 |
|
87 |
+
global adaface, id_animator
|
88 |
+
adaface.to(device)
|
89 |
+
id_animator.to(device)
|
90 |
+
|
91 |
# uploaded_image_paths is a list of tuples:
|
92 |
# [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
|
93 |
# Extract the file paths.
|
|
|
135 |
seed, attn_scale, image_embed_cfg_begin_scale, image_embed_cfg_end_scale,
|
136 |
is_adaface_enabled, adaface_ckpt_path, adaface_power_scale,
|
137 |
id_animator_anneal_steps, progress=gr.Progress(track_tqdm=True)):
|
138 |
+
|
139 |
global adaface, id_animator
|
140 |
+
adaface.to(device)
|
141 |
+
id_animator.to(device)
|
142 |
|
143 |
if prompt is None:
|
144 |
prompt = ""
|
|
|
226 |
id_animator = load_model(model_style_type=model_style_type, device=device)
|
227 |
adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
|
228 |
adaface_encoder_types=args.adaface_encoder_types,
|
229 |
+
adaface_ckpt_paths=[args.adaface_ckpt_path], device='cpu')
|
230 |
# Update base model type.
|
231 |
args.model_style_type = model_style_type
|
232 |
|
|
|
249 |
|
250 |
❗️**Tips**❗️
|
251 |
- You can upload one or more subject images for generating ID-specific video.
|
252 |
+
- If the face dominates the video frames, try increasing the 'Weight of ID prompt in the negative prompt'.
|
253 |
- If the face loses focus, try increasing the guidance scale.
|
254 |
- If the motion is weird, e.g., the prompt is "... running", try increasing the number of sampling steps.
|
255 |
- Usage explanations and demos: [Readme](https://huggingface.co/spaces/adaface-neurips/adaface-animate/blob/main/README2.md).
|
|
|
350 |
maximum=0.9,
|
351 |
step=0.1,
|
352 |
value=args.do_neg_id_prompt_weight,
|
353 |
+
visible=True
|
354 |
)
|
355 |
|
356 |
seed = gr.Slider(
|
faceadapter/face_adapter.py
CHANGED
@@ -1,11 +1,8 @@
|
|
1 |
-
import os
|
2 |
from typing import List
|
3 |
|
4 |
import torch
|
5 |
-
from
|
6 |
-
from diffusers.pipelines.controlnet import MultiControlNetModel
|
7 |
from PIL import Image
|
8 |
-
from safetensors import safe_open
|
9 |
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
|
10 |
from .attention_processor import LoRAFaceAttnProcessor
|
11 |
|
@@ -78,6 +75,13 @@ class FaceAdapterLora:
|
|
78 |
|
79 |
self.load_face_adapter()
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
def init_proj(self):
|
82 |
image_proj_model = ImageProjModel(
|
83 |
cross_attention_dim=self.pipe.unet.config.cross_attention_dim,
|
|
|
|
|
1 |
from typing import List
|
2 |
|
3 |
import torch
|
4 |
+
from torch import nn
|
|
|
5 |
from PIL import Image
|
|
|
6 |
from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
|
7 |
from .attention_processor import LoRAFaceAttnProcessor
|
8 |
|
|
|
75 |
|
76 |
self.load_face_adapter()
|
77 |
|
78 |
+
def to(self, device):
|
79 |
+
self.device = device
|
80 |
+
self.pipe = self.pipe.to(device)
|
81 |
+
self.image_encoder = self.image_encoder.to(device)
|
82 |
+
self.image_proj_model = self.image_proj_model.to(device)
|
83 |
+
return self
|
84 |
+
|
85 |
def init_proj(self):
|
86 |
image_proj_model = ImageProjModel(
|
87 |
cross_attention_dim=self.pipe.unet.config.cross_attention_dim,
|
models/adaface/{VGGface2_HQ_masks2024-10-13T11-21-07_zero3-ada-9000.pt → VGGface2_HQ_masks2024-10-14T16-09-24_zero3-ada-3500.pt}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d991773c169ecfa01ce1ec365d7791d9b732212b35118d0795f21cc0c1a99e77
|
3 |
+
size 1814921754
|