Spaces:
Running
on
Zero
Running
on
Zero
adaface-neurips
commited on
Commit
·
61fbdeb
1
Parent(s):
d890920
Increase ID-Animator Image Embedding scales
Browse files- adaface/adaface-infer.py +0 -131
- adaface/adaface-translate.py +0 -208
- adaface/adaface_wrapper.py +2 -6
- adaface/arc2face_models.py +1 -1
- adaface/face_id_to_ada_prompt.py +1 -3
- adaface/unet_teachers.py +1 -1
- adaface/util.py +1 -1
- app.py +4 -4
adaface/adaface-infer.py
DELETED
@@ -1,131 +0,0 @@
|
|
1 |
-
from adaface.adaface_wrapper import AdaFaceWrapper
|
2 |
-
import torch
|
3 |
-
#import torch.nn.functional as F
|
4 |
-
from PIL import Image
|
5 |
-
import numpy as np
|
6 |
-
import os, argparse, glob, re
|
7 |
-
|
8 |
-
def save_images(images, num_images_per_row, subject_name, prompt, noise_level, save_dir = "samples-ada"):
|
9 |
-
if num_images_per_row > len(images):
|
10 |
-
num_images_per_row = len(images)
|
11 |
-
|
12 |
-
os.makedirs(save_dir, exist_ok=True)
|
13 |
-
|
14 |
-
num_columns = int(np.ceil(len(images) / num_images_per_row))
|
15 |
-
# Save 4 images as a grid image in save_dir
|
16 |
-
grid_image = Image.new('RGB', (512 * num_images_per_row, 512 * num_columns))
|
17 |
-
for i, image in enumerate(images):
|
18 |
-
image = image.resize((512, 512))
|
19 |
-
grid_image.paste(image, (512 * (i % num_images_per_row), 512 * (i // num_images_per_row)))
|
20 |
-
|
21 |
-
prompt_sig = prompt.replace(" ", "_").replace(",", "_")
|
22 |
-
grid_filepath = os.path.join(save_dir, f"{subject_name}-{prompt_sig}-noise{noise_level:.02f}.png")
|
23 |
-
if os.path.exists(grid_filepath):
|
24 |
-
grid_count = 2
|
25 |
-
grid_filepath = os.path.join(save_dir, f'{subject_name}-{prompt_sig}-noise{noise_level:.02f}-{grid_count}.jpg')
|
26 |
-
while os.path.exists(grid_filepath):
|
27 |
-
grid_count += 1
|
28 |
-
grid_filepath = os.path.join(save_dir, f'{subject_name}-{prompt_sig}-noise{noise_level:.02f}-{grid_count}.jpg')
|
29 |
-
|
30 |
-
grid_image.save(grid_filepath)
|
31 |
-
print(f"Saved to {grid_filepath}")
|
32 |
-
|
33 |
-
def seed_everything(seed):
|
34 |
-
np.random.seed(seed)
|
35 |
-
torch.manual_seed(seed)
|
36 |
-
torch.cuda.manual_seed_all(seed)
|
37 |
-
torch.backends.cudnn.deterministic = True
|
38 |
-
torch.backends.cudnn.benchmark = False
|
39 |
-
os.environ["PL_GLOBAL_SEED"] = str(seed)
|
40 |
-
|
41 |
-
def parse_args():
|
42 |
-
parser = argparse.ArgumentParser()
|
43 |
-
parser.add_argument("--base_model_path", type=str, default='models/sd15-dste8-vae.safetensors',
|
44 |
-
help="Type of checkpoints to use (default: SD 1.5)")
|
45 |
-
parser.add_argument("--embman_ckpt", type=str, required=True,
|
46 |
-
help="Path to the checkpoint of the embedding manager")
|
47 |
-
parser.add_argument("--subject", type=str, required=True)
|
48 |
-
parser.add_argument("--example_image_count", type=int, default=-1, help="Number of example images to use")
|
49 |
-
parser.add_argument("--out_image_count", type=int, default=4, help="Number of images to generate")
|
50 |
-
parser.add_argument("--prompt", type=str, default="a woman z in superman costume")
|
51 |
-
parser.add_argument("--noise", dest='noise_level', type=float, default=0)
|
52 |
-
parser.add_argument("--randface", action="store_true")
|
53 |
-
parser.add_argument("--scale", dest='guidance_scale', type=float, default=4,
|
54 |
-
help="Guidance scale for the diffusion model")
|
55 |
-
parser.add_argument("--id_cfg_scale", type=float, default=1,
|
56 |
-
help="CFG scale when generating the identity embeddings")
|
57 |
-
|
58 |
-
parser.add_argument("--subject_string",
|
59 |
-
type=str, default="z",
|
60 |
-
help="Subject placeholder string used in prompts to denote the concept.")
|
61 |
-
parser.add_argument("--num_vectors", type=int, default=16,
|
62 |
-
help="Number of vectors used to represent the subject.")
|
63 |
-
parser.add_argument("--num_images_per_row", type=int, default=4,
|
64 |
-
help="Number of images to display in a row in the output grid image.")
|
65 |
-
parser.add_argument("--num_inference_steps", type=int, default=50,
|
66 |
-
help="Number of DDIM inference steps")
|
67 |
-
parser.add_argument("--device", type=str, default="cuda", help="Device to run the model on")
|
68 |
-
parser.add_argument("--seed", type=int, default=42,
|
69 |
-
help="the seed (for reproducible sampling). Set to -1 to disable.")
|
70 |
-
args = parser.parse_args()
|
71 |
-
|
72 |
-
return args
|
73 |
-
|
74 |
-
if __name__ == "__main__":
|
75 |
-
args = parse_args()
|
76 |
-
if args.seed != -1:
|
77 |
-
seed_everything(args.seed)
|
78 |
-
|
79 |
-
if re.match(r"^\d+$", args.device):
|
80 |
-
args.device = f"cuda:{args.device}"
|
81 |
-
print(f"Using device {args.device}")
|
82 |
-
|
83 |
-
adaface = AdaFaceWrapper("text2img", args.base_model_path, args.embman_ckpt, args.device,
|
84 |
-
args.subject_string, args.num_vectors, args.num_inference_steps)
|
85 |
-
|
86 |
-
if not args.randface:
|
87 |
-
image_folder = args.subject
|
88 |
-
if image_folder.endswith("/"):
|
89 |
-
image_folder = image_folder[:-1]
|
90 |
-
|
91 |
-
if os.path.isfile(image_folder):
|
92 |
-
# Get the second to the last part of the path
|
93 |
-
subject_name = os.path.basename(os.path.dirname(image_folder))
|
94 |
-
image_paths = [image_folder]
|
95 |
-
|
96 |
-
else:
|
97 |
-
subject_name = os.path.basename(image_folder)
|
98 |
-
image_types = ["*.jpg", "*.png", "*.jpeg"]
|
99 |
-
alltype_image_paths = []
|
100 |
-
for image_type in image_types:
|
101 |
-
# glob returns the full path.
|
102 |
-
image_paths = glob.glob(os.path.join(image_folder, image_type))
|
103 |
-
if len(image_paths) > 0:
|
104 |
-
alltype_image_paths.extend(image_paths)
|
105 |
-
|
106 |
-
# Filter out images of "*_mask.png"
|
107 |
-
alltype_image_paths = [image_path for image_path in alltype_image_paths if "_mask.png" not in image_path]
|
108 |
-
|
109 |
-
# image_paths contain at most args.example_image_count full image paths.
|
110 |
-
if args.example_image_count > 0:
|
111 |
-
image_paths = alltype_image_paths[:args.example_image_count]
|
112 |
-
else:
|
113 |
-
image_paths = alltype_image_paths
|
114 |
-
else:
|
115 |
-
subject_name = None
|
116 |
-
image_paths = None
|
117 |
-
image_folder = None
|
118 |
-
|
119 |
-
subject_name = "randface-" + str(torch.seed()) if args.randface else subject_name
|
120 |
-
rand_face_embs = torch.randn(1, 512)
|
121 |
-
|
122 |
-
pre_face_embs = rand_face_embs if args.randface else None
|
123 |
-
noise = torch.randn(args.out_image_count, 4, 64, 64).cuda()
|
124 |
-
# args.noise_level: the *relative* std of the noise added to the face embeddings.
|
125 |
-
# A noise level of 0.08 could change gender, but 0.06 is usually safe.
|
126 |
-
# adaface_subj_embs is not used. It is generated for the purpose of updating the text encoder (within this function call).
|
127 |
-
adaface_subj_embs = adaface.generate_adaface_embeddings(image_paths, image_folder, pre_face_embs, args.randface,
|
128 |
-
out_id_embs_scale=args.id_cfg_scale, noise_level=args.noise_level,
|
129 |
-
update_text_encoder=True)
|
130 |
-
images = adaface(noise, args.prompt, args.guidance_scale, args.out_image_count, verbose=True)
|
131 |
-
save_images(images, args.num_images_per_row, subject_name, f"guide{args.guidance_scale}", args.noise_level)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
adaface/adaface-translate.py
DELETED
@@ -1,208 +0,0 @@
|
|
1 |
-
from adaface.adaface_wrapper import AdaFaceWrapper
|
2 |
-
import torch
|
3 |
-
#import torch.nn.functional as F
|
4 |
-
from PIL import Image
|
5 |
-
import numpy as np
|
6 |
-
import os, argparse, glob, re, shutil
|
7 |
-
|
8 |
-
def str2bool(v):
|
9 |
-
if isinstance(v, bool):
|
10 |
-
return v
|
11 |
-
if v.lower() in ("yes", "true", "t", "y", "1"):
|
12 |
-
return True
|
13 |
-
elif v.lower() in ("no", "false", "f", "n", "0"):
|
14 |
-
return False
|
15 |
-
else:
|
16 |
-
raise argparse.ArgumentTypeError("Boolean value expected.")
|
17 |
-
|
18 |
-
def seed_everything(seed):
|
19 |
-
np.random.seed(seed)
|
20 |
-
torch.manual_seed(seed)
|
21 |
-
torch.cuda.manual_seed_all(seed)
|
22 |
-
torch.backends.cudnn.deterministic = True
|
23 |
-
torch.backends.cudnn.benchmark = False
|
24 |
-
os.environ["PL_GLOBAL_SEED"] = str(seed)
|
25 |
-
|
26 |
-
def parse_args():
|
27 |
-
parser = argparse.ArgumentParser()
|
28 |
-
parser.add_argument("--base_model_path", type=str, default='models/realisticvision/realisticVisionV40_v40VAE.safetensors',
|
29 |
-
help="Path to the UNet checkpoint (default: RealisticVision 4.0)")
|
30 |
-
parser.add_argument("--embman_ckpt", type=str, required=True,
|
31 |
-
help="Path to the checkpoint of the embedding manager")
|
32 |
-
parser.add_argument("--in_folder", type=str, required=True, help="Path to the folder containing input images")
|
33 |
-
# If True, the input folder contains images of mixed subjects.
|
34 |
-
# If False, the input folder contains multiple subfolders, each of which contains images of the same subject.
|
35 |
-
parser.add_argument("--is_mix_subj_folder", type=str2bool, const=True, default=False, nargs="?",
|
36 |
-
help="Whether the input folder contains images of mixed subjects")
|
37 |
-
parser.add_argument("--max_images_per_subject", type=int, default=5, help="Number of example images used per subject")
|
38 |
-
parser.add_argument("--trans_subject_count", type=int, default=-1, help="Number of example images to be translated")
|
39 |
-
parser.add_argument("--out_folder", type=str, required=True, help="Path to the folder saving output images")
|
40 |
-
parser.add_argument("--out_count_per_input_image", type=int, default=1, help="Number of output images to generate per input image")
|
41 |
-
parser.add_argument("--copy_masks", action="store_true", help="Copy the mask images to the output folder")
|
42 |
-
parser.add_argument("--noise", dest='noise_level', type=float, default=0)
|
43 |
-
parser.add_argument("--scale", dest='guidance_scale', type=float, default=4,
|
44 |
-
help="Guidance scale for the diffusion model")
|
45 |
-
parser.add_argument("--ref_img_strength", type=float, default=0.8,
|
46 |
-
help="Strength of the reference image in the output image.")
|
47 |
-
parser.add_argument("--subject_string",
|
48 |
-
type=str, default="z",
|
49 |
-
help="Subject placeholder string used in prompts to denote the concept.")
|
50 |
-
parser.add_argument("--num_vectors", type=int, default=16,
|
51 |
-
help="Number of vectors used to represent the subject.")
|
52 |
-
parser.add_argument("--prompt", type=str, default="a person z")
|
53 |
-
parser.add_argument("--num_images_per_row", type=int, default=4,
|
54 |
-
help="Number of images to display in a row in the output grid image.")
|
55 |
-
parser.add_argument("--num_inference_steps", type=int, default=50,
|
56 |
-
help="Number of DDIM inference steps")
|
57 |
-
parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use. If num_gpus > 1, use accelerate for distributed execution.")
|
58 |
-
parser.add_argument("--device", type=str, default="cuda", help="Device to run the model on")
|
59 |
-
parser.add_argument("--seed", type=int, default=42,
|
60 |
-
help="the seed (for reproducible sampling). Set to -1 to disable.")
|
61 |
-
args = parser.parse_args()
|
62 |
-
|
63 |
-
return args
|
64 |
-
|
65 |
-
if __name__ == "__main__":
|
66 |
-
args = parse_args()
|
67 |
-
if args.seed != -1:
|
68 |
-
seed_everything(args.seed)
|
69 |
-
|
70 |
-
# screen -dm -L -Logfile trans_rv4-2.txt accelerate launch --multi_gpu --num_processes=2 scripts/adaface-translate.py
|
71 |
-
# --embman_ckpt logs/subjects-celebrity2024-05-16T17-22-46_zero3-ada/checkpoints/embeddings_gs-30000.pt
|
72 |
-
# --base_model_path models/realisticvision/realisticVisionV40_v40VAE.safetensors --in_folder /data/username/VGGface2_HQ_masks/
|
73 |
-
# --is_mix_subj_folder 0 --out_folder /data/username/VGGface2_HQ_masks_rv4a --copy_masks --num_gpus 2
|
74 |
-
if args.num_gpus > 1:
|
75 |
-
from accelerate import PartialState
|
76 |
-
distributed_state = PartialState()
|
77 |
-
args.device = distributed_state.device
|
78 |
-
process_index = distributed_state.process_index
|
79 |
-
elif re.match(r"^\d+$", args.device):
|
80 |
-
args.device = f"cuda:{args.device}"
|
81 |
-
distributed_state = None
|
82 |
-
process_index = 0
|
83 |
-
|
84 |
-
adaface = AdaFaceWrapper("img2img", args.base_model_path, args.embman_ckpt, args.device,
|
85 |
-
args.subject_string, args.num_vectors, args.num_inference_steps)
|
86 |
-
|
87 |
-
in_folder = args.in_folder
|
88 |
-
if os.path.isfile(in_folder):
|
89 |
-
subject_folders = [ os.path.dirname(in_folder) ]
|
90 |
-
images_by_subject = [[in_folder]]
|
91 |
-
else:
|
92 |
-
if not args.is_mix_subj_folder:
|
93 |
-
in_folders = [in_folder]
|
94 |
-
else:
|
95 |
-
in_folders = [ os.path.join(in_folder, subfolder) for subfolder in sorted(os.listdir(in_folder)) ]
|
96 |
-
|
97 |
-
images_by_subject = []
|
98 |
-
subject_folders = []
|
99 |
-
for in_folder in in_folders:
|
100 |
-
image_types = ["*.jpg", "*.png", "*.jpeg"]
|
101 |
-
alltype_image_paths = []
|
102 |
-
for image_type in image_types:
|
103 |
-
# glob returns the full path.
|
104 |
-
image_paths = glob.glob(os.path.join(in_folder, image_type))
|
105 |
-
if len(image_paths) > 0:
|
106 |
-
alltype_image_paths.extend(image_paths)
|
107 |
-
|
108 |
-
# Filter out images of "*_mask.png"
|
109 |
-
alltype_image_paths = [image_path for image_path in alltype_image_paths if "_mask.png" not in image_path]
|
110 |
-
alltype_image_paths = sorted(alltype_image_paths)
|
111 |
-
|
112 |
-
if not args.is_mix_subj_folder:
|
113 |
-
# image_paths contain at most args.max_images_per_subject full image paths.
|
114 |
-
if args.max_images_per_subject > 0:
|
115 |
-
image_paths = alltype_image_paths[:args.max_images_per_subject]
|
116 |
-
else:
|
117 |
-
image_paths = alltype_image_paths
|
118 |
-
|
119 |
-
images_by_subject.append(image_paths)
|
120 |
-
subject_folders.append(in_folder)
|
121 |
-
else:
|
122 |
-
# Each image in the folder is treated as an individual subject.
|
123 |
-
images_by_subject.extend([[image_path] for image_path in alltype_image_paths])
|
124 |
-
subject_folders.extend([in_folder] * len(alltype_image_paths))
|
125 |
-
|
126 |
-
if args.trans_subject_count > 0 and len(subject_folders) >= args.trans_subject_count:
|
127 |
-
break
|
128 |
-
|
129 |
-
if args.trans_subject_count > 0:
|
130 |
-
images_by_subject = images_by_subject[:args.trans_subject_count]
|
131 |
-
subject_folders = subject_folders[:args.trans_subject_count]
|
132 |
-
|
133 |
-
out_image_count = 0
|
134 |
-
out_mask_count = 0
|
135 |
-
if not args.out_folder.endswith("/"):
|
136 |
-
args.out_folder += "/"
|
137 |
-
|
138 |
-
if args.num_gpus > 1:
|
139 |
-
# Split the subjects across the GPUs.
|
140 |
-
subject_folders = subject_folders[process_index::args.num_gpus]
|
141 |
-
images_by_subject = images_by_subject[process_index::args.num_gpus]
|
142 |
-
#subject_folders, images_by_subject = distributed_state.split_between_processes(zip(subject_folders, images_by_subject))
|
143 |
-
|
144 |
-
for (subject_folder, image_paths) in zip(subject_folders, images_by_subject):
|
145 |
-
# If is_mix_subj_folder, then image_paths only contains 1 image, and we use the file name as the signature of the image.
|
146 |
-
# Otherwise, we use the folder name as the signature of the images.
|
147 |
-
images_sig = subject_folder if not args.is_mix_subj_folder else os.path.basename(image_paths[0])
|
148 |
-
|
149 |
-
print(f"Translating {images_sig}...")
|
150 |
-
with torch.no_grad():
|
151 |
-
adaface_subj_embs = adaface.generate_adaface_embeddings(image_paths, subject_folder, None, False,
|
152 |
-
out_id_embs_scale=1, noise_level=args.noise_level,
|
153 |
-
update_text_encoder=True)
|
154 |
-
|
155 |
-
# Replace the first occurrence of "in_folder" with "out_folder" in the path of the subject_folder.
|
156 |
-
subject_out_folder = subject_folder.replace(args.in_folder, args.out_folder, 1)
|
157 |
-
if not os.path.exists(subject_out_folder):
|
158 |
-
os.makedirs(subject_out_folder)
|
159 |
-
print(f"Output images will be saved to {subject_out_folder}")
|
160 |
-
|
161 |
-
in_images = []
|
162 |
-
for image_path in image_paths:
|
163 |
-
image = Image.open(image_path).convert("RGB").resize((512, 512))
|
164 |
-
# [512, 512, 3] -> [3, 512, 512].
|
165 |
-
image = np.array(image).transpose(2, 0, 1)
|
166 |
-
# Convert the image to a tensor of shape (1, 3, 512, 512) and move it to the GPU.
|
167 |
-
image = torch.tensor(image).unsqueeze(0).float().cuda()
|
168 |
-
in_images.append(image)
|
169 |
-
|
170 |
-
# Put all input images of the subject into a batch. This assumes max_images_per_subject is small.
|
171 |
-
# NOTE: For simplicity, we do not check overly large batch sizes.
|
172 |
-
in_images = torch.cat(in_images, dim=0)
|
173 |
-
# in_images: [5, 3, 512, 512].
|
174 |
-
# Normalize the pixel values to [0, 1].
|
175 |
-
in_images = in_images / 255.0
|
176 |
-
num_out_images = len(in_images) * args.out_count_per_input_image
|
177 |
-
|
178 |
-
with torch.no_grad():
|
179 |
-
# args.noise_level: the *relative* std of the noise added to the face embeddings.
|
180 |
-
# A noise level of 0.08 could change gender, but 0.06 is usually safe.
|
181 |
-
# The returned adaface_subj_embs are already incorporated in the text encoder, and not used explicitly.
|
182 |
-
# NOTE: We assume out_count_per_input_image == 1, so that the output images are of the same number as the input images.
|
183 |
-
out_images = adaface(in_images, args.prompt, args.guidance_scale, num_out_images, ref_img_strength=args.ref_img_strength)
|
184 |
-
|
185 |
-
for img_i, img in enumerate(out_images):
|
186 |
-
# out_images: subj_1, subj_2, ..., subj_n, subj_1, subj_2, ..., subj_n, ...
|
187 |
-
subj_i = img_i % len(in_images)
|
188 |
-
copy_i = img_i // len(in_images)
|
189 |
-
image_filename_stem, image_fileext = os.path.splitext(os.path.basename(image_paths[subj_i]))
|
190 |
-
if copy_i == 0:
|
191 |
-
img.save(os.path.join(subject_out_folder, f"{image_filename_stem}{image_fileext}"))
|
192 |
-
else:
|
193 |
-
img.save(os.path.join(subject_out_folder, f"{image_filename_stem}_{copy_i}{image_fileext}"))
|
194 |
-
|
195 |
-
if args.copy_masks:
|
196 |
-
mask_path = image_paths[subj_i].replace(image_fileext, "_mask.png")
|
197 |
-
if os.path.exists(mask_path):
|
198 |
-
if copy_i == 0:
|
199 |
-
shutil.copy(mask_path, subject_out_folder)
|
200 |
-
else:
|
201 |
-
mask_filename_stem = image_filename_stem
|
202 |
-
shutil.copy(mask_path, os.path.join(subject_out_folder, f"{mask_filename_stem}_{copy_i}_mask.png"))
|
203 |
-
|
204 |
-
out_mask_count += 1
|
205 |
-
|
206 |
-
out_image_count += len(out_images)
|
207 |
-
|
208 |
-
print(f"{out_image_count} output images and {out_mask_count} masks saved to {args.out_folder}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
adaface/adaface_wrapper.py
CHANGED
@@ -15,10 +15,6 @@ from adaface.face_id_to_ada_prompt import create_id2ada_prompt_encoder
|
|
15 |
from safetensors.torch import load_file as safetensors_load_file
|
16 |
import re, os
|
17 |
import numpy as np
|
18 |
-
import sys
|
19 |
-
# Monkey patch the missing ldm module in the old arc2face adaface checkpoint.
|
20 |
-
sys.modules['ldm'] = sys.modules['adaface']
|
21 |
-
sys.modules['ldm.modules'] = sys.modules['adaface']
|
22 |
|
23 |
class AdaFaceWrapper(nn.Module):
|
24 |
def __init__(self, pipeline_name, base_model_path, adaface_encoder_types,
|
@@ -101,8 +97,8 @@ class AdaFaceWrapper(nn.Module):
|
|
101 |
PipelineClass = StableDiffusionPipeline
|
102 |
elif self.pipeline_name == "text2img3":
|
103 |
PipelineClass = StableDiffusion3Pipeline
|
104 |
-
elif self.pipeline_name == "flux":
|
105 |
-
|
106 |
# pipeline_name is None means only use this instance to generate adaface embeddings, not to generate images.
|
107 |
elif self.pipeline_name is None:
|
108 |
PipelineClass = StableDiffusionPipeline
|
|
|
15 |
from safetensors.torch import load_file as safetensors_load_file
|
16 |
import re, os
|
17 |
import numpy as np
|
|
|
|
|
|
|
|
|
18 |
|
19 |
class AdaFaceWrapper(nn.Module):
|
20 |
def __init__(self, pipeline_name, base_model_path, adaface_encoder_types,
|
|
|
97 |
PipelineClass = StableDiffusionPipeline
|
98 |
elif self.pipeline_name == "text2img3":
|
99 |
PipelineClass = StableDiffusion3Pipeline
|
100 |
+
#elif self.pipeline_name == "flux":
|
101 |
+
# PipelineClass = FluxPipeline
|
102 |
# pipeline_name is None means only use this instance to generate adaface embeddings, not to generate images.
|
103 |
elif self.pipeline_name is None:
|
104 |
PipelineClass = StableDiffusionPipeline
|
adaface/arc2face_models.py
CHANGED
@@ -16,7 +16,7 @@ _expand_mask = AttentionMaskConverter._expand_mask
|
|
16 |
|
17 |
from .util import perturb_tensor
|
18 |
|
19 |
-
def create_arc2face_pipeline(base_model_path="models/
|
20 |
dtype=torch.float16, unet_only=False):
|
21 |
unet = UNet2DConditionModel.from_pretrained(
|
22 |
'models/arc2face', subfolder="arc2face", torch_dtype=dtype
|
|
|
16 |
|
17 |
from .util import perturb_tensor
|
18 |
|
19 |
+
def create_arc2face_pipeline(base_model_path="models/sd15-dste8-vae.safetensors",
|
20 |
dtype=torch.float16, unet_only=False):
|
21 |
unet = UNet2DConditionModel.from_pretrained(
|
22 |
'models/arc2face', subfolder="arc2face", torch_dtype=dtype
|
adaface/face_id_to_ada_prompt.py
CHANGED
@@ -672,9 +672,7 @@ class ConsistentID_ID2AdaPrompt(FaceID2AdaPrompt):
|
|
672 |
# are not used and will be released soon.
|
673 |
# Only the consistentID modules and bise_net are used.
|
674 |
assert base_model_path is not None, "base_model_path should be provided."
|
675 |
-
pipe = ConsistentIDPipeline.from_single_file(
|
676 |
-
base_model_path,
|
677 |
-
)
|
678 |
pipe.load_ConsistentID_model(consistentID_weight_path="./models/ConsistentID/ConsistentID-v1.bin",
|
679 |
bise_net_weight_path="./models/ConsistentID/BiSeNet_pretrained_for_ConsistentID.pth")
|
680 |
pipe.to(dtype=self.dtype)
|
|
|
672 |
# are not used and will be released soon.
|
673 |
# Only the consistentID modules and bise_net are used.
|
674 |
assert base_model_path is not None, "base_model_path should be provided."
|
675 |
+
pipe = ConsistentIDPipeline.from_single_file(base_model_path)
|
|
|
|
|
676 |
pipe.load_ConsistentID_model(consistentID_weight_path="./models/ConsistentID/ConsistentID-v1.bin",
|
677 |
bise_net_weight_path="./models/ConsistentID/BiSeNet_pretrained_for_ConsistentID.pth")
|
678 |
pipe.to(dtype=self.dtype)
|
adaface/unet_teachers.py
CHANGED
@@ -192,7 +192,7 @@ class UNetEnsembleTeacher(UNetTeacher):
|
|
192 |
self.unet = UNetEnsemble(unets, unet_types, extra_unet_dirpaths, unet_weights, device)
|
193 |
|
194 |
class ConsistentIDTeacher(UNetTeacher):
|
195 |
-
def __init__(self, base_model_path="models/
|
196 |
super().__init__(**kwargs)
|
197 |
self.name = "consistentID"
|
198 |
### Load base model
|
|
|
192 |
self.unet = UNetEnsemble(unets, unet_types, extra_unet_dirpaths, unet_weights, device)
|
193 |
|
194 |
class ConsistentIDTeacher(UNetTeacher):
|
195 |
+
def __init__(self, base_model_path="models/sd15-dste8-vae.safetensors", **kwargs):
|
196 |
super().__init__(**kwargs)
|
197 |
self.name = "consistentID"
|
198 |
### Load base model
|
adaface/util.py
CHANGED
@@ -223,7 +223,7 @@ class UNetEnsemble(nn.Module):
|
|
223 |
else:
|
224 |
return UNet2DConditionOutput(sample=sample)
|
225 |
|
226 |
-
def create_consistentid_pipeline(base_model_path="models/
|
227 |
dtype=torch.float16, unet_only=False):
|
228 |
pipe = ConsistentIDPipeline.from_single_file(
|
229 |
base_model_path,
|
|
|
223 |
else:
|
224 |
return UNet2DConditionOutput(sample=sample)
|
225 |
|
226 |
+
def create_consistentid_pipeline(base_model_path="models/sd15-dste8-vae.safetensors",
|
227 |
dtype=torch.float16, unet_only=False):
|
228 |
pipe = ConsistentIDPipeline.from_single_file(
|
229 |
base_model_path,
|
app.py
CHANGED
@@ -315,7 +315,7 @@ with gr.Blocks(css=css) as demo:
|
|
315 |
minimum=30,
|
316 |
maximum=80,
|
317 |
step=1,
|
318 |
-
value=
|
319 |
)
|
320 |
|
321 |
submit = gr.Button("Generate Video")
|
@@ -355,15 +355,15 @@ with gr.Blocks(css=css) as demo:
|
|
355 |
minimum=0.3,
|
356 |
maximum=1.5,
|
357 |
step=0.1,
|
358 |
-
value=1,
|
359 |
)
|
360 |
image_embed_cfg_end_scale = gr.Slider(
|
361 |
label="ID-Animator Image Embedding Final Scale",
|
362 |
info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
|
363 |
minimum=0.0,
|
364 |
-
maximum=
|
365 |
step=0.1,
|
366 |
-
value=0.
|
367 |
)
|
368 |
|
369 |
id_animator_anneal_steps = gr.Slider(
|
|
|
315 |
minimum=30,
|
316 |
maximum=80,
|
317 |
step=1,
|
318 |
+
value=40,
|
319 |
)
|
320 |
|
321 |
submit = gr.Button("Generate Video")
|
|
|
355 |
minimum=0.3,
|
356 |
maximum=1.5,
|
357 |
step=0.1,
|
358 |
+
value=1.2,
|
359 |
)
|
360 |
image_embed_cfg_end_scale = gr.Slider(
|
361 |
label="ID-Animator Image Embedding Final Scale",
|
362 |
info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
|
363 |
minimum=0.0,
|
364 |
+
maximum=1.5,
|
365 |
step=0.1,
|
366 |
+
value=0.8,
|
367 |
)
|
368 |
|
369 |
id_animator_anneal_steps = gr.Slider(
|