adaface-neurips commited on
Commit
61fbdeb
·
1 Parent(s): d890920

Increase ID-Animator Image Embedding scales

Browse files
adaface/adaface-infer.py DELETED
@@ -1,131 +0,0 @@
1
- from adaface.adaface_wrapper import AdaFaceWrapper
2
- import torch
3
- #import torch.nn.functional as F
4
- from PIL import Image
5
- import numpy as np
6
- import os, argparse, glob, re
7
-
8
- def save_images(images, num_images_per_row, subject_name, prompt, noise_level, save_dir = "samples-ada"):
9
- if num_images_per_row > len(images):
10
- num_images_per_row = len(images)
11
-
12
- os.makedirs(save_dir, exist_ok=True)
13
-
14
- num_columns = int(np.ceil(len(images) / num_images_per_row))
15
- # Save 4 images as a grid image in save_dir
16
- grid_image = Image.new('RGB', (512 * num_images_per_row, 512 * num_columns))
17
- for i, image in enumerate(images):
18
- image = image.resize((512, 512))
19
- grid_image.paste(image, (512 * (i % num_images_per_row), 512 * (i // num_images_per_row)))
20
-
21
- prompt_sig = prompt.replace(" ", "_").replace(",", "_")
22
- grid_filepath = os.path.join(save_dir, f"{subject_name}-{prompt_sig}-noise{noise_level:.02f}.png")
23
- if os.path.exists(grid_filepath):
24
- grid_count = 2
25
- grid_filepath = os.path.join(save_dir, f'{subject_name}-{prompt_sig}-noise{noise_level:.02f}-{grid_count}.jpg')
26
- while os.path.exists(grid_filepath):
27
- grid_count += 1
28
- grid_filepath = os.path.join(save_dir, f'{subject_name}-{prompt_sig}-noise{noise_level:.02f}-{grid_count}.jpg')
29
-
30
- grid_image.save(grid_filepath)
31
- print(f"Saved to {grid_filepath}")
32
-
33
- def seed_everything(seed):
34
- np.random.seed(seed)
35
- torch.manual_seed(seed)
36
- torch.cuda.manual_seed_all(seed)
37
- torch.backends.cudnn.deterministic = True
38
- torch.backends.cudnn.benchmark = False
39
- os.environ["PL_GLOBAL_SEED"] = str(seed)
40
-
41
- def parse_args():
42
- parser = argparse.ArgumentParser()
43
- parser.add_argument("--base_model_path", type=str, default='models/sd15-dste8-vae.safetensors',
44
- help="Type of checkpoints to use (default: SD 1.5)")
45
- parser.add_argument("--embman_ckpt", type=str, required=True,
46
- help="Path to the checkpoint of the embedding manager")
47
- parser.add_argument("--subject", type=str, required=True)
48
- parser.add_argument("--example_image_count", type=int, default=-1, help="Number of example images to use")
49
- parser.add_argument("--out_image_count", type=int, default=4, help="Number of images to generate")
50
- parser.add_argument("--prompt", type=str, default="a woman z in superman costume")
51
- parser.add_argument("--noise", dest='noise_level', type=float, default=0)
52
- parser.add_argument("--randface", action="store_true")
53
- parser.add_argument("--scale", dest='guidance_scale', type=float, default=4,
54
- help="Guidance scale for the diffusion model")
55
- parser.add_argument("--id_cfg_scale", type=float, default=1,
56
- help="CFG scale when generating the identity embeddings")
57
-
58
- parser.add_argument("--subject_string",
59
- type=str, default="z",
60
- help="Subject placeholder string used in prompts to denote the concept.")
61
- parser.add_argument("--num_vectors", type=int, default=16,
62
- help="Number of vectors used to represent the subject.")
63
- parser.add_argument("--num_images_per_row", type=int, default=4,
64
- help="Number of images to display in a row in the output grid image.")
65
- parser.add_argument("--num_inference_steps", type=int, default=50,
66
- help="Number of DDIM inference steps")
67
- parser.add_argument("--device", type=str, default="cuda", help="Device to run the model on")
68
- parser.add_argument("--seed", type=int, default=42,
69
- help="the seed (for reproducible sampling). Set to -1 to disable.")
70
- args = parser.parse_args()
71
-
72
- return args
73
-
74
- if __name__ == "__main__":
75
- args = parse_args()
76
- if args.seed != -1:
77
- seed_everything(args.seed)
78
-
79
- if re.match(r"^\d+$", args.device):
80
- args.device = f"cuda:{args.device}"
81
- print(f"Using device {args.device}")
82
-
83
- adaface = AdaFaceWrapper("text2img", args.base_model_path, args.embman_ckpt, args.device,
84
- args.subject_string, args.num_vectors, args.num_inference_steps)
85
-
86
- if not args.randface:
87
- image_folder = args.subject
88
- if image_folder.endswith("/"):
89
- image_folder = image_folder[:-1]
90
-
91
- if os.path.isfile(image_folder):
92
- # Get the second to the last part of the path
93
- subject_name = os.path.basename(os.path.dirname(image_folder))
94
- image_paths = [image_folder]
95
-
96
- else:
97
- subject_name = os.path.basename(image_folder)
98
- image_types = ["*.jpg", "*.png", "*.jpeg"]
99
- alltype_image_paths = []
100
- for image_type in image_types:
101
- # glob returns the full path.
102
- image_paths = glob.glob(os.path.join(image_folder, image_type))
103
- if len(image_paths) > 0:
104
- alltype_image_paths.extend(image_paths)
105
-
106
- # Filter out images of "*_mask.png"
107
- alltype_image_paths = [image_path for image_path in alltype_image_paths if "_mask.png" not in image_path]
108
-
109
- # image_paths contain at most args.example_image_count full image paths.
110
- if args.example_image_count > 0:
111
- image_paths = alltype_image_paths[:args.example_image_count]
112
- else:
113
- image_paths = alltype_image_paths
114
- else:
115
- subject_name = None
116
- image_paths = None
117
- image_folder = None
118
-
119
- subject_name = "randface-" + str(torch.seed()) if args.randface else subject_name
120
- rand_face_embs = torch.randn(1, 512)
121
-
122
- pre_face_embs = rand_face_embs if args.randface else None
123
- noise = torch.randn(args.out_image_count, 4, 64, 64).cuda()
124
- # args.noise_level: the *relative* std of the noise added to the face embeddings.
125
- # A noise level of 0.08 could change gender, but 0.06 is usually safe.
126
- # adaface_subj_embs is not used. It is generated for the purpose of updating the text encoder (within this function call).
127
- adaface_subj_embs = adaface.generate_adaface_embeddings(image_paths, image_folder, pre_face_embs, args.randface,
128
- out_id_embs_scale=args.id_cfg_scale, noise_level=args.noise_level,
129
- update_text_encoder=True)
130
- images = adaface(noise, args.prompt, args.guidance_scale, args.out_image_count, verbose=True)
131
- save_images(images, args.num_images_per_row, subject_name, f"guide{args.guidance_scale}", args.noise_level)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adaface/adaface-translate.py DELETED
@@ -1,208 +0,0 @@
1
- from adaface.adaface_wrapper import AdaFaceWrapper
2
- import torch
3
- #import torch.nn.functional as F
4
- from PIL import Image
5
- import numpy as np
6
- import os, argparse, glob, re, shutil
7
-
8
- def str2bool(v):
9
- if isinstance(v, bool):
10
- return v
11
- if v.lower() in ("yes", "true", "t", "y", "1"):
12
- return True
13
- elif v.lower() in ("no", "false", "f", "n", "0"):
14
- return False
15
- else:
16
- raise argparse.ArgumentTypeError("Boolean value expected.")
17
-
18
- def seed_everything(seed):
19
- np.random.seed(seed)
20
- torch.manual_seed(seed)
21
- torch.cuda.manual_seed_all(seed)
22
- torch.backends.cudnn.deterministic = True
23
- torch.backends.cudnn.benchmark = False
24
- os.environ["PL_GLOBAL_SEED"] = str(seed)
25
-
26
- def parse_args():
27
- parser = argparse.ArgumentParser()
28
- parser.add_argument("--base_model_path", type=str, default='models/realisticvision/realisticVisionV40_v40VAE.safetensors',
29
- help="Path to the UNet checkpoint (default: RealisticVision 4.0)")
30
- parser.add_argument("--embman_ckpt", type=str, required=True,
31
- help="Path to the checkpoint of the embedding manager")
32
- parser.add_argument("--in_folder", type=str, required=True, help="Path to the folder containing input images")
33
- # If True, the input folder contains images of mixed subjects.
34
- # If False, the input folder contains multiple subfolders, each of which contains images of the same subject.
35
- parser.add_argument("--is_mix_subj_folder", type=str2bool, const=True, default=False, nargs="?",
36
- help="Whether the input folder contains images of mixed subjects")
37
- parser.add_argument("--max_images_per_subject", type=int, default=5, help="Number of example images used per subject")
38
- parser.add_argument("--trans_subject_count", type=int, default=-1, help="Number of example images to be translated")
39
- parser.add_argument("--out_folder", type=str, required=True, help="Path to the folder saving output images")
40
- parser.add_argument("--out_count_per_input_image", type=int, default=1, help="Number of output images to generate per input image")
41
- parser.add_argument("--copy_masks", action="store_true", help="Copy the mask images to the output folder")
42
- parser.add_argument("--noise", dest='noise_level', type=float, default=0)
43
- parser.add_argument("--scale", dest='guidance_scale', type=float, default=4,
44
- help="Guidance scale for the diffusion model")
45
- parser.add_argument("--ref_img_strength", type=float, default=0.8,
46
- help="Strength of the reference image in the output image.")
47
- parser.add_argument("--subject_string",
48
- type=str, default="z",
49
- help="Subject placeholder string used in prompts to denote the concept.")
50
- parser.add_argument("--num_vectors", type=int, default=16,
51
- help="Number of vectors used to represent the subject.")
52
- parser.add_argument("--prompt", type=str, default="a person z")
53
- parser.add_argument("--num_images_per_row", type=int, default=4,
54
- help="Number of images to display in a row in the output grid image.")
55
- parser.add_argument("--num_inference_steps", type=int, default=50,
56
- help="Number of DDIM inference steps")
57
- parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use. If num_gpus > 1, use accelerate for distributed execution.")
58
- parser.add_argument("--device", type=str, default="cuda", help="Device to run the model on")
59
- parser.add_argument("--seed", type=int, default=42,
60
- help="the seed (for reproducible sampling). Set to -1 to disable.")
61
- args = parser.parse_args()
62
-
63
- return args
64
-
65
- if __name__ == "__main__":
66
- args = parse_args()
67
- if args.seed != -1:
68
- seed_everything(args.seed)
69
-
70
- # screen -dm -L -Logfile trans_rv4-2.txt accelerate launch --multi_gpu --num_processes=2 scripts/adaface-translate.py
71
- # --embman_ckpt logs/subjects-celebrity2024-05-16T17-22-46_zero3-ada/checkpoints/embeddings_gs-30000.pt
72
- # --base_model_path models/realisticvision/realisticVisionV40_v40VAE.safetensors --in_folder /data/username/VGGface2_HQ_masks/
73
- # --is_mix_subj_folder 0 --out_folder /data/username/VGGface2_HQ_masks_rv4a --copy_masks --num_gpus 2
74
- if args.num_gpus > 1:
75
- from accelerate import PartialState
76
- distributed_state = PartialState()
77
- args.device = distributed_state.device
78
- process_index = distributed_state.process_index
79
- elif re.match(r"^\d+$", args.device):
80
- args.device = f"cuda:{args.device}"
81
- distributed_state = None
82
- process_index = 0
83
-
84
- adaface = AdaFaceWrapper("img2img", args.base_model_path, args.embman_ckpt, args.device,
85
- args.subject_string, args.num_vectors, args.num_inference_steps)
86
-
87
- in_folder = args.in_folder
88
- if os.path.isfile(in_folder):
89
- subject_folders = [ os.path.dirname(in_folder) ]
90
- images_by_subject = [[in_folder]]
91
- else:
92
- if not args.is_mix_subj_folder:
93
- in_folders = [in_folder]
94
- else:
95
- in_folders = [ os.path.join(in_folder, subfolder) for subfolder in sorted(os.listdir(in_folder)) ]
96
-
97
- images_by_subject = []
98
- subject_folders = []
99
- for in_folder in in_folders:
100
- image_types = ["*.jpg", "*.png", "*.jpeg"]
101
- alltype_image_paths = []
102
- for image_type in image_types:
103
- # glob returns the full path.
104
- image_paths = glob.glob(os.path.join(in_folder, image_type))
105
- if len(image_paths) > 0:
106
- alltype_image_paths.extend(image_paths)
107
-
108
- # Filter out images of "*_mask.png"
109
- alltype_image_paths = [image_path for image_path in alltype_image_paths if "_mask.png" not in image_path]
110
- alltype_image_paths = sorted(alltype_image_paths)
111
-
112
- if not args.is_mix_subj_folder:
113
- # image_paths contain at most args.max_images_per_subject full image paths.
114
- if args.max_images_per_subject > 0:
115
- image_paths = alltype_image_paths[:args.max_images_per_subject]
116
- else:
117
- image_paths = alltype_image_paths
118
-
119
- images_by_subject.append(image_paths)
120
- subject_folders.append(in_folder)
121
- else:
122
- # Each image in the folder is treated as an individual subject.
123
- images_by_subject.extend([[image_path] for image_path in alltype_image_paths])
124
- subject_folders.extend([in_folder] * len(alltype_image_paths))
125
-
126
- if args.trans_subject_count > 0 and len(subject_folders) >= args.trans_subject_count:
127
- break
128
-
129
- if args.trans_subject_count > 0:
130
- images_by_subject = images_by_subject[:args.trans_subject_count]
131
- subject_folders = subject_folders[:args.trans_subject_count]
132
-
133
- out_image_count = 0
134
- out_mask_count = 0
135
- if not args.out_folder.endswith("/"):
136
- args.out_folder += "/"
137
-
138
- if args.num_gpus > 1:
139
- # Split the subjects across the GPUs.
140
- subject_folders = subject_folders[process_index::args.num_gpus]
141
- images_by_subject = images_by_subject[process_index::args.num_gpus]
142
- #subject_folders, images_by_subject = distributed_state.split_between_processes(zip(subject_folders, images_by_subject))
143
-
144
- for (subject_folder, image_paths) in zip(subject_folders, images_by_subject):
145
- # If is_mix_subj_folder, then image_paths only contains 1 image, and we use the file name as the signature of the image.
146
- # Otherwise, we use the folder name as the signature of the images.
147
- images_sig = subject_folder if not args.is_mix_subj_folder else os.path.basename(image_paths[0])
148
-
149
- print(f"Translating {images_sig}...")
150
- with torch.no_grad():
151
- adaface_subj_embs = adaface.generate_adaface_embeddings(image_paths, subject_folder, None, False,
152
- out_id_embs_scale=1, noise_level=args.noise_level,
153
- update_text_encoder=True)
154
-
155
- # Replace the first occurrence of "in_folder" with "out_folder" in the path of the subject_folder.
156
- subject_out_folder = subject_folder.replace(args.in_folder, args.out_folder, 1)
157
- if not os.path.exists(subject_out_folder):
158
- os.makedirs(subject_out_folder)
159
- print(f"Output images will be saved to {subject_out_folder}")
160
-
161
- in_images = []
162
- for image_path in image_paths:
163
- image = Image.open(image_path).convert("RGB").resize((512, 512))
164
- # [512, 512, 3] -> [3, 512, 512].
165
- image = np.array(image).transpose(2, 0, 1)
166
- # Convert the image to a tensor of shape (1, 3, 512, 512) and move it to the GPU.
167
- image = torch.tensor(image).unsqueeze(0).float().cuda()
168
- in_images.append(image)
169
-
170
- # Put all input images of the subject into a batch. This assumes max_images_per_subject is small.
171
- # NOTE: For simplicity, we do not check overly large batch sizes.
172
- in_images = torch.cat(in_images, dim=0)
173
- # in_images: [5, 3, 512, 512].
174
- # Normalize the pixel values to [0, 1].
175
- in_images = in_images / 255.0
176
- num_out_images = len(in_images) * args.out_count_per_input_image
177
-
178
- with torch.no_grad():
179
- # args.noise_level: the *relative* std of the noise added to the face embeddings.
180
- # A noise level of 0.08 could change gender, but 0.06 is usually safe.
181
- # The returned adaface_subj_embs are already incorporated in the text encoder, and not used explicitly.
182
- # NOTE: We assume out_count_per_input_image == 1, so that the output images are of the same number as the input images.
183
- out_images = adaface(in_images, args.prompt, args.guidance_scale, num_out_images, ref_img_strength=args.ref_img_strength)
184
-
185
- for img_i, img in enumerate(out_images):
186
- # out_images: subj_1, subj_2, ..., subj_n, subj_1, subj_2, ..., subj_n, ...
187
- subj_i = img_i % len(in_images)
188
- copy_i = img_i // len(in_images)
189
- image_filename_stem, image_fileext = os.path.splitext(os.path.basename(image_paths[subj_i]))
190
- if copy_i == 0:
191
- img.save(os.path.join(subject_out_folder, f"{image_filename_stem}{image_fileext}"))
192
- else:
193
- img.save(os.path.join(subject_out_folder, f"{image_filename_stem}_{copy_i}{image_fileext}"))
194
-
195
- if args.copy_masks:
196
- mask_path = image_paths[subj_i].replace(image_fileext, "_mask.png")
197
- if os.path.exists(mask_path):
198
- if copy_i == 0:
199
- shutil.copy(mask_path, subject_out_folder)
200
- else:
201
- mask_filename_stem = image_filename_stem
202
- shutil.copy(mask_path, os.path.join(subject_out_folder, f"{mask_filename_stem}_{copy_i}_mask.png"))
203
-
204
- out_mask_count += 1
205
-
206
- out_image_count += len(out_images)
207
-
208
- print(f"{out_image_count} output images and {out_mask_count} masks saved to {args.out_folder}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adaface/adaface_wrapper.py CHANGED
@@ -15,10 +15,6 @@ from adaface.face_id_to_ada_prompt import create_id2ada_prompt_encoder
15
  from safetensors.torch import load_file as safetensors_load_file
16
  import re, os
17
  import numpy as np
18
- import sys
19
- # Monkey patch the missing ldm module in the old arc2face adaface checkpoint.
20
- sys.modules['ldm'] = sys.modules['adaface']
21
- sys.modules['ldm.modules'] = sys.modules['adaface']
22
 
23
  class AdaFaceWrapper(nn.Module):
24
  def __init__(self, pipeline_name, base_model_path, adaface_encoder_types,
@@ -101,8 +97,8 @@ class AdaFaceWrapper(nn.Module):
101
  PipelineClass = StableDiffusionPipeline
102
  elif self.pipeline_name == "text2img3":
103
  PipelineClass = StableDiffusion3Pipeline
104
- elif self.pipeline_name == "flux":
105
- PipelineClass = FluxPipeline
106
  # pipeline_name is None means only use this instance to generate adaface embeddings, not to generate images.
107
  elif self.pipeline_name is None:
108
  PipelineClass = StableDiffusionPipeline
 
15
  from safetensors.torch import load_file as safetensors_load_file
16
  import re, os
17
  import numpy as np
 
 
 
 
18
 
19
  class AdaFaceWrapper(nn.Module):
20
  def __init__(self, pipeline_name, base_model_path, adaface_encoder_types,
 
97
  PipelineClass = StableDiffusionPipeline
98
  elif self.pipeline_name == "text2img3":
99
  PipelineClass = StableDiffusion3Pipeline
100
+ #elif self.pipeline_name == "flux":
101
+ # PipelineClass = FluxPipeline
102
  # pipeline_name is None means only use this instance to generate adaface embeddings, not to generate images.
103
  elif self.pipeline_name is None:
104
  PipelineClass = StableDiffusionPipeline
adaface/arc2face_models.py CHANGED
@@ -16,7 +16,7 @@ _expand_mask = AttentionMaskConverter._expand_mask
16
 
17
  from .util import perturb_tensor
18
 
19
- def create_arc2face_pipeline(base_model_path="models/ensemble/sd15-dste8-vae.safetensors",
20
  dtype=torch.float16, unet_only=False):
21
  unet = UNet2DConditionModel.from_pretrained(
22
  'models/arc2face', subfolder="arc2face", torch_dtype=dtype
 
16
 
17
  from .util import perturb_tensor
18
 
19
+ def create_arc2face_pipeline(base_model_path="models/sd15-dste8-vae.safetensors",
20
  dtype=torch.float16, unet_only=False):
21
  unet = UNet2DConditionModel.from_pretrained(
22
  'models/arc2face', subfolder="arc2face", torch_dtype=dtype
adaface/face_id_to_ada_prompt.py CHANGED
@@ -672,9 +672,7 @@ class ConsistentID_ID2AdaPrompt(FaceID2AdaPrompt):
672
  # are not used and will be released soon.
673
  # Only the consistentID modules and bise_net are used.
674
  assert base_model_path is not None, "base_model_path should be provided."
675
- pipe = ConsistentIDPipeline.from_single_file(
676
- base_model_path,
677
- )
678
  pipe.load_ConsistentID_model(consistentID_weight_path="./models/ConsistentID/ConsistentID-v1.bin",
679
  bise_net_weight_path="./models/ConsistentID/BiSeNet_pretrained_for_ConsistentID.pth")
680
  pipe.to(dtype=self.dtype)
 
672
  # are not used and will be released soon.
673
  # Only the consistentID modules and bise_net are used.
674
  assert base_model_path is not None, "base_model_path should be provided."
675
+ pipe = ConsistentIDPipeline.from_single_file(base_model_path)
 
 
676
  pipe.load_ConsistentID_model(consistentID_weight_path="./models/ConsistentID/ConsistentID-v1.bin",
677
  bise_net_weight_path="./models/ConsistentID/BiSeNet_pretrained_for_ConsistentID.pth")
678
  pipe.to(dtype=self.dtype)
adaface/unet_teachers.py CHANGED
@@ -192,7 +192,7 @@ class UNetEnsembleTeacher(UNetTeacher):
192
  self.unet = UNetEnsemble(unets, unet_types, extra_unet_dirpaths, unet_weights, device)
193
 
194
  class ConsistentIDTeacher(UNetTeacher):
195
- def __init__(self, base_model_path="models/ensemble/sd15-dste8-vae.safetensors", **kwargs):
196
  super().__init__(**kwargs)
197
  self.name = "consistentID"
198
  ### Load base model
 
192
  self.unet = UNetEnsemble(unets, unet_types, extra_unet_dirpaths, unet_weights, device)
193
 
194
  class ConsistentIDTeacher(UNetTeacher):
195
+ def __init__(self, base_model_path="models/sd15-dste8-vae.safetensors", **kwargs):
196
  super().__init__(**kwargs)
197
  self.name = "consistentID"
198
  ### Load base model
adaface/util.py CHANGED
@@ -223,7 +223,7 @@ class UNetEnsemble(nn.Module):
223
  else:
224
  return UNet2DConditionOutput(sample=sample)
225
 
226
- def create_consistentid_pipeline(base_model_path="models/ensemble/sd15-dste8-vae.safetensors",
227
  dtype=torch.float16, unet_only=False):
228
  pipe = ConsistentIDPipeline.from_single_file(
229
  base_model_path,
 
223
  else:
224
  return UNet2DConditionOutput(sample=sample)
225
 
226
+ def create_consistentid_pipeline(base_model_path="models/sd15-dste8-vae.safetensors",
227
  dtype=torch.float16, unet_only=False):
228
  pipe = ConsistentIDPipeline.from_single_file(
229
  base_model_path,
app.py CHANGED
@@ -315,7 +315,7 @@ with gr.Blocks(css=css) as demo:
315
  minimum=30,
316
  maximum=80,
317
  step=1,
318
- value=50,
319
  )
320
 
321
  submit = gr.Button("Generate Video")
@@ -355,15 +355,15 @@ with gr.Blocks(css=css) as demo:
355
  minimum=0.3,
356
  maximum=1.5,
357
  step=0.1,
358
- value=1,
359
  )
360
  image_embed_cfg_end_scale = gr.Slider(
361
  label="ID-Animator Image Embedding Final Scale",
362
  info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
363
  minimum=0.0,
364
- maximum=0.6,
365
  step=0.1,
366
- value=0.5,
367
  )
368
 
369
  id_animator_anneal_steps = gr.Slider(
 
315
  minimum=30,
316
  maximum=80,
317
  step=1,
318
+ value=40,
319
  )
320
 
321
  submit = gr.Button("Generate Video")
 
355
  minimum=0.3,
356
  maximum=1.5,
357
  step=0.1,
358
+ value=1.2,
359
  )
360
  image_embed_cfg_end_scale = gr.Slider(
361
  label="ID-Animator Image Embedding Final Scale",
362
  info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
363
  minimum=0.0,
364
+ maximum=1.5,
365
  step=0.1,
366
+ value=0.8,
367
  )
368
 
369
  id_animator_anneal_steps = gr.Slider(