Spaces:

adaface-neurips
/

adaface-animate

Running on Zero

App Files Files Community

adaface-neurips commited on Oct 9, 2024

Commit

a29cf91

1 Parent(s): 61fbdeb

Allow dynamically changing base model style type, support anime style, upgrade adaface model

Browse files

Files changed (7) hide show

adaface/adaface_wrapper.py +16 -7
adaface/face_id_to_ada_prompt.py +11 -2
app.py +81 -40
infer.py +6 -6
models/adaface/{VGGface2_HQ_masks2024-10-05T09-28-53_zero3-ada-28000.pt → VGGface2_HQ_masks2024-10-08T14-42-05_zero3-ada-24500.pt} +2 -2
models/{rv51/realisticVisionV51_v51VAE.safetensors → aingdiffusion/aingdiffusion_v170_ar.safetensors} +1 -1
models/rv51/realisticVisionV51_v51VAE_dste8.safetensors +3 -0

adaface/adaface_wrapper.py CHANGED Viewed

@@ -247,7 +247,7 @@ class AdaFaceWrapper(nn.Module):
                 token_embeds[token_id] = subj_embs[i]
             print(f"Updated {len(self.placeholder_token_ids)} tokens ({self.all_placeholder_tokens_str}) in the text encoder.")
-    def update_prompt(self, prompt):
         if prompt is None:
             prompt = ""
@@ -259,7 +259,10 @@ class AdaFaceWrapper(nn.Module):
         # When we do joint training, seems both work better if they are appended to the prompt.
         # Therefore we simply appended all placeholder_tokens_str's to the prompt.
         # NOTE: Prepending them hurts compositional prompts.
-        prompt = prompt + " " + self.all_placeholder_tokens_str
         return prompt
@@ -290,14 +293,16 @@ class AdaFaceWrapper(nn.Module):
             self.update_text_encoder_subj_embeddings(all_adaface_subj_embs)
         return all_adaface_subj_embs
-    def encode_prompt(self, prompt, negative_prompt=None, device=None, verbose=False):
         if negative_prompt is None:
             negative_prompt = self.negative_prompt
         if device is None:
             device = self.device
-        prompt = self.update_prompt(prompt)
         if verbose:
             print(f"Subject prompt: {prompt}")
@@ -350,8 +355,10 @@ class AdaFaceWrapper(nn.Module):
         return prompt_embeds_, negative_prompt_embeds_, pooled_prompt_embeds_, negative_pooled_prompt_embeds_
     # ref_img_strength is used only in the img2img pipeline.
-    def forward(self, noise, prompt, negative_prompt=None, guidance_scale=6.0,
-                out_image_count=4, ref_img_strength=0.8, generator=None, verbose=False):
         noise = noise.to(device=self.device, dtype=torch.float16)
         if negative_prompt is None:
@@ -359,7 +366,9 @@ class AdaFaceWrapper(nn.Module):
         # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
         prompt_embeds_, negative_prompt_embeds_, pooled_prompt_embeds_, \
             negative_pooled_prompt_embeds_ = \
-                self.encode_prompt(prompt, negative_prompt, device=self.device, verbose=verbose)
         # Repeat the prompt embeddings for all images in the batch.
         prompt_embeds_ = prompt_embeds_.repeat(out_image_count, 1, 1)
         if negative_prompt_embeds_ is not None:

                 token_embeds[token_id] = subj_embs[i]
             print(f"Updated {len(self.placeholder_token_ids)} tokens ({self.all_placeholder_tokens_str}) in the text encoder.")
+    def update_prompt(self, prompt, placeholder_tokens_pos='postpend'):
         if prompt is None:
             prompt = ""
         # When we do joint training, seems both work better if they are appended to the prompt.
         # Therefore we simply appended all placeholder_tokens_str's to the prompt.
         # NOTE: Prepending them hurts compositional prompts.
+        if placeholder_tokens_pos == 'prepend':
+            prompt = self.all_placeholder_tokens_str + " " + prompt
+        elif placeholder_tokens_pos == 'postpend':
+            prompt = prompt + " " + self.all_placeholder_tokens_str
         return prompt
             self.update_text_encoder_subj_embeddings(all_adaface_subj_embs)
         return all_adaface_subj_embs
+    def encode_prompt(self, prompt, negative_prompt=None,
+                      placeholder_tokens_pos='postpend',
+                      device=None, verbose=False):
         if negative_prompt is None:
             negative_prompt = self.negative_prompt
         if device is None:
             device = self.device
+        prompt = self.update_prompt(prompt, placeholder_tokens_pos=placeholder_tokens_pos)
         if verbose:
             print(f"Subject prompt: {prompt}")
         return prompt_embeds_, negative_prompt_embeds_, pooled_prompt_embeds_, negative_pooled_prompt_embeds_
     # ref_img_strength is used only in the img2img pipeline.
+    def forward(self, noise, prompt, negative_prompt=None,
+                placeholder_tokens_pos='postpend',
+                guidance_scale=6.0, out_image_count=4,
+                ref_img_strength=0.8, generator=None, verbose=False):
         noise = noise.to(device=self.device, dtype=torch.float16)
         if negative_prompt is None:
         # prompt_embeds_, negative_prompt_embeds_: [1, 77, 768]
         prompt_embeds_, negative_prompt_embeds_, pooled_prompt_embeds_, \
             negative_pooled_prompt_embeds_ = \
+                self.encode_prompt(prompt, negative_prompt,
+                                   placeholder_tokens_pos=placeholder_tokens_pos,
+                                   device=self.device, verbose=verbose)
         # Repeat the prompt embeddings for all images in the batch.
         prompt_embeds_ = prompt_embeds_.repeat(out_image_count, 1, 1)
         if negative_prompt_embeds_ is not None:

adaface/face_id_to_ada_prompt.py CHANGED Viewed

@@ -863,8 +863,17 @@ class Joint_FaceID2AdaPrompt(FaceID2AdaPrompt):
                 ckpt_subj_basis_generator.initialize_static_img_suffix_embs(self.encoders_num_static_img_suffix_embs[i],
                                                                             img_prompt_dim=self.output_dim)
-                subj_basis_generator.extend_prompt2token_proj_attention(\
-                    ckpt_subj_basis_generator.prompt2token_proj_attention_multipliers, -1, -1, 1, perturb_std=0)
                 subj_basis_generator.load_state_dict(ckpt_subj_basis_generator.state_dict())
                 # extend_prompt2token_proj_attention_multiplier is an integer >= 1.

                 ckpt_subj_basis_generator.initialize_static_img_suffix_embs(self.encoders_num_static_img_suffix_embs[i],
                                                                             img_prompt_dim=self.output_dim)
+                if subj_basis_generator.prompt2token_proj_attention_multipliers \
+                  == [1] * 12:
+                    subj_basis_generator.extend_prompt2token_proj_attention(\
+                        ckpt_subj_basis_generator.prompt2token_proj_attention_multipliers, -1, -1, 1, perturb_std=0)
+                elif subj_basis_generator.prompt2token_proj_attention_multipliers \
+                  != ckpt_subj_basis_generator.prompt2token_proj_attention_multipliers:
+                    raise ValueError("Inconsistent prompt2token_proj_attention_multipliers.")
+                assert subj_basis_generator.prompt2token_proj_attention_multipliers \
+                    == ckpt_subj_basis_generator.prompt2token_proj_attention_multipliers, \
+                    "Inconsistent prompt2token_proj_attention_multipliers."
                 subj_basis_generator.load_state_dict(ckpt_subj_basis_generator.state_dict())
                 # extend_prompt2token_proj_attention_multiplier is an integer >= 1.

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from animatediff.utils.util import save_videos_grid
 from adaface.adaface_wrapper import AdaFaceWrapper
 import random
-from infer import load_model
 MAX_SEED=10000
 import uuid
 from insightface.app import FaceAnalysis
@@ -24,20 +24,13 @@ parser = argparse.ArgumentParser()
 parser.add_argument("--adaface_encoder_types", type=str, nargs="+", default=["consistentID", "arc2face"],
                     choices=["arc2face", "consistentID"], help="Type(s) of the ID2Ada prompt encoders")
 parser.add_argument('--adaface_ckpt_path', type=str,
-                    default='models/adaface/VGGface2_HQ_masks2024-10-05T09-28-53_zero3-ada-28000.pt')
-# Don't use 'sd15' for base_model_type; it just generates messy videos.
-parser.add_argument('--base_model_type', type=str, default='rv51',
-                    choices=["sar", "rv51"])
 parser.add_argument('--gpu', type=int, default=None)
 parser.add_argument('--ip', type=str, default="0.0.0.0")
 args = parser.parse_args()
-base_model_type_to_path = {
-    "sd15": "models/sd15-dste8-vae.safetensors",    # LDM format. Needs to be converted.
-    "sar":  "models/sar/sar.safetensors",           # LDM format. Needs to be converted.
-    "rv51":  "models/rv51/realisticVisionV51_v51VAE.safetensors"
-}
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -50,16 +43,16 @@ app = FaceAnalysis(name="buffalo_l", root='models/insightface', providers=['CUDA
 app.prepare(ctx_id=0, det_size=(320, 320))
 device = "cuda" if args.gpu is None else f"cuda:{args.gpu}"
-id_animator = load_model(base_model_type=args.base_model_type, device=device)
-base_model_path = base_model_type_to_path[args.base_model_type]
 adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
                          adaface_encoder_types=args.adaface_encoder_types,
                          adaface_ckpt_paths=[args.adaface_ckpt_path], device=device)
-basedir     = os.getcwd()
-savedir     = os.path.join(basedir,'samples')
 os.makedirs(savedir, exist_ok=True)
 #print(f"### Cleaning cached examples ...")
@@ -81,10 +74,23 @@ def get_clicked_image(data: gr.SelectData):
     return data.index
 @spaces.GPU
-def gen_init_images(uploaded_image_paths, prompt, out_image_count=3):
     if uploaded_image_paths is None:
         print("No image uploaded")
         return None, None, None
     # uploaded_image_paths is a list of tuples:
     # [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
     # Extract the file paths.
@@ -98,9 +104,20 @@ def gen_init_images(uploaded_image_paths, prompt, out_image_count=3):
     # Generate two images each time for the user to select from.
     noise = torch.randn(out_image_count, 3, 512, 512)
     # samples: A list of PIL Image instances.
     with torch.no_grad():
-        samples = adaface(noise, prompt, out_image_count=out_image_count, verbose=True)
     face_paths = []
     for sample in samples:
@@ -114,13 +131,25 @@ def gen_init_images(uploaded_image_paths, prompt, out_image_count=3):
     return gr.update(value=face_paths, visible=True), gr.update(value=face_paths, visible=False), gr.update(visible=True)
 @spaces.GPU(duration=90)
-def generate_image(image_container, uploaded_image_paths, init_img_file_paths, init_img_selected_idx,
-                   init_image_strength, init_image_final_weight,
                    prompt, negative_prompt, num_steps, video_length, guidance_scale, seed,
                    attn_scale, image_embed_cfg_begin_scale, image_embed_cfg_end_scale,
                    is_adaface_enabled, adaface_ckpt_path, adaface_power_scale,
                    id_animator_anneal_steps, progress=gr.Progress(track_tqdm=True)):
     if prompt is None:
         prompt = ""
@@ -145,7 +174,8 @@ def generate_image(image_container, uploaded_image_paths, init_img_file_paths, i
     else:
         if (adaface_ckpt_path is not None and adaface_ckpt_path.strip() != '') \
           and (adaface_ckpt_path != args.adaface_ckpt_path):
-            # Reload the embedding manager
             adaface.id2ada_prompt_encoder.load_adaface_ckpt(adaface_ckpt_path)
         with torch.no_grad():
@@ -154,7 +184,9 @@ def generate_image(image_container, uploaded_image_paths, init_img_file_paths, i
                                                    update_text_encoder=True)
             # adaface_prompt_embeds: [1, 77, 768].
-            adaface_prompt_embeds, _, _, _ = adaface.encode_prompt(prompt, verbose=True)
         image_embed_cfg_scales = (image_embed_cfg_begin_scale, image_embed_cfg_end_scale)
@@ -252,20 +284,20 @@ with gr.Blocks(css=css) as demo:
             prompt = gr.Dropdown(label="Prompt",
                        info="Try something like 'man/woman walking on the beach'.",
-                       value="((best quality)), ((masterpiece)), ((realistic)), highlighted hair, futuristic silver armor suit, confident stance, high-resolution, living room, smiling, head tilted, perfect smooth skin",
                        allow_custom_value=True,
                        filterable=False,
                        choices=[
-                            "((best quality)), ((masterpiece)), ((realistic)), highlighted hair, futuristic silver armor suit, confident stance, high-resolution, living room, smiling, head tilted, perfect smooth skin",
-                            "walking on the beach, sunset, orange sky, eye level shot",
-                            "in a white apron and chef hat, garnishing a gourmet dish, full body view, long shot",
-                            "dancing pose among folks in a park, waving hands",
-                            "in iron man costume flying pose, the sky ablaze with hues of orange and purple, full body view, long shot",
-                            "jedi wielding a lightsaber, star wars, full body view, eye level shot",
-                            "playing guitar on a boat, ocean waves",
-                            "with a passion for reading, curled up with a book in a cozy nook near a window",
-                            "running pose in a park, eye level shot",
-                            "in superman costume flying pose, the sky ablaze with hues of orange and purple, full body view, long shot"
                        ])
             init_image_strength = gr.Slider(
@@ -285,6 +317,14 @@ with gr.Blocks(css=css) as demo:
                     value=0.1,
                 )
             guidance_scale = gr.Slider(
                 label="Guidance scale",
                 minimum=1.0,
@@ -352,18 +392,18 @@ with gr.Blocks(css=css) as demo:
                 image_embed_cfg_begin_scale = gr.Slider(
                         label="ID-Animator Image Embedding Initial Scale",
                         info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
-                        minimum=0.3,
                         maximum=1.5,
                         step=0.1,
-                        value=1.2,
                     )
                 image_embed_cfg_end_scale = gr.Slider(
                         label="ID-Animator Image Embedding Final Scale",
                         info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
-                        minimum=0.0,
                         maximum=1.5,
                         step=0.1,
-                        value=0.8,
                     )
                 id_animator_anneal_steps = gr.Slider(
@@ -393,7 +433,7 @@ with gr.Blocks(css=css) as demo:
         init_img_files.upload(fn=swap_to_gallery, inputs=init_img_files, outputs=[uploaded_init_img_gallery, init_clear_button_column, init_img_files])
         remove_init_and_reupload.click(fn=remove_back_to_files,        outputs=[uploaded_init_img_gallery, init_clear_button_column,
                                                                                 init_img_files, init_img_selected_idx])
-        gen_init.click(fn=gen_init_images, inputs=[uploaded_files_gallery, prompt],
                        outputs=[uploaded_init_img_gallery, init_img_files, init_clear_button_column])
         uploaded_init_img_gallery.select(fn=get_clicked_image, inputs=None, outputs=init_img_selected_idx)
@@ -405,8 +445,9 @@ with gr.Blocks(css=css) as demo:
             queue=False,
             api_name=False,
         ).then(
-                 fn=generate_image,
-                 inputs=[image_container, files, init_img_files, init_img_selected_idx, init_image_strength, init_image_final_weight,
                          prompt, negative_prompt, num_steps, video_length, guidance_scale,
                          seed, attn_scale, image_embed_cfg_begin_scale, image_embed_cfg_end_scale,
                          is_adaface_enabled, adaface_ckpt_path, adaface_power_scale, id_animator_anneal_steps],

 from adaface.adaface_wrapper import AdaFaceWrapper
 import random
+from infer import load_model, model_style_type2base_model_path
 MAX_SEED=10000
 import uuid
 from insightface.app import FaceAnalysis
 parser.add_argument("--adaface_encoder_types", type=str, nargs="+", default=["consistentID", "arc2face"],
                     choices=["arc2face", "consistentID"], help="Type(s) of the ID2Ada prompt encoders")
 parser.add_argument('--adaface_ckpt_path', type=str,
+                    default='models/adaface/VGGface2_HQ_masks2024-10-08T14-42-05_zero3-ada-24500.pt')
+parser.add_argument('--model_style_type', type=str, default='realistic',
+                    choices=["realistic", "anime", "photorealistic"], help="Type of the base model")
 parser.add_argument('--gpu', type=int, default=None)
 parser.add_argument('--ip', type=str, default="0.0.0.0")
 args = parser.parse_args()
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
 app.prepare(ctx_id=0, det_size=(320, 320))
 device = "cuda" if args.gpu is None else f"cuda:{args.gpu}"
+global adaface, id_animator
+base_model_path = model_style_type2base_model_path[args.model_style_type]
+id_animator = load_model(model_style_type=args.model_style_type, device=device)
 adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
                          adaface_encoder_types=args.adaface_encoder_types,
                          adaface_ckpt_paths=[args.adaface_ckpt_path], device=device)
+basedir = os.getcwd()
+savedir = os.path.join(basedir,'samples')
 os.makedirs(savedir, exist_ok=True)
 #print(f"### Cleaning cached examples ...")
     return data.index
 @spaces.GPU
+def gen_init_images(uploaded_image_paths, model_style_type, prompt, out_image_count=3):
+    global adaface, id_animator
     if uploaded_image_paths is None:
         print("No image uploaded")
         return None, None, None
+    model_style_type = model_style_type.lower()
+    base_model_path = model_style_type2base_model_path[model_style_type]
+    # If the base model type is changed, reload the model.
+    if model_style_type != args.model_style_type:
+        id_animator = load_model(model_style_type=model_style_type, device=device)
+        adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
+                                 adaface_encoder_types=args.adaface_encoder_types,
+                                 adaface_ckpt_paths=[args.adaface_ckpt_path], device=device)
+        # Update base model type.
+        args.model_style_type = model_style_type
     # uploaded_image_paths is a list of tuples:
     # [('/tmp/gradio/249981e66a7c665aaaf1c7eaeb24949af4366c88/jensen huang.jpg', None)]
     # Extract the file paths.
     # Generate two images each time for the user to select from.
     noise = torch.randn(out_image_count, 3, 512, 512)
+    enhance_face = True
+    if enhance_face and "face portrait" not in prompt:
+        if "portrait" in prompt:
+            # Enhance the face features by replacing "portrait" with "face portrait".
+            prompt = prompt.replace("portrait", "face portrait")
+        else:
+            prompt = "face portrait, " + prompt
     # samples: A list of PIL Image instances.
     with torch.no_grad():
+        samples = adaface(noise, prompt,
+                          placeholder_tokens_pos='prepend',
+                          out_image_count=out_image_count, verbose=True)
     face_paths = []
     for sample in samples:
     return gr.update(value=face_paths, visible=True), gr.update(value=face_paths, visible=False), gr.update(visible=True)
 @spaces.GPU(duration=90)
+def generate_video(image_container, uploaded_image_paths, init_img_file_paths, init_img_selected_idx,
+                   init_image_strength, init_image_final_weight, model_style_type,
                    prompt, negative_prompt, num_steps, video_length, guidance_scale, seed,
                    attn_scale, image_embed_cfg_begin_scale, image_embed_cfg_end_scale,
                    is_adaface_enabled, adaface_ckpt_path, adaface_power_scale,
                    id_animator_anneal_steps, progress=gr.Progress(track_tqdm=True)):
+    global adaface, id_animator
+    model_style_type = model_style_type.lower()
+    base_model_path = model_style_type2base_model_path[model_style_type]
+    # If the base model type is changed, reload the model.
+    if model_style_type != args.model_style_type:
+        id_animator = load_model(model_style_type=model_style_type, device=device)
+        adaface = AdaFaceWrapper(pipeline_name="text2img", base_model_path=base_model_path,
+                                 adaface_encoder_types=args.adaface_encoder_types,
+                                 adaface_ckpt_paths=[args.adaface_ckpt_path], device=device)
+        # Update base model type.
+        args.model_style_type = model_style_type
     if prompt is None:
         prompt = ""
     else:
         if (adaface_ckpt_path is not None and adaface_ckpt_path.strip() != '') \
           and (adaface_ckpt_path != args.adaface_ckpt_path):
+            args.adaface_ckpt_path = adaface_ckpt_path
+            # Reload the adaface model weights.
             adaface.id2ada_prompt_encoder.load_adaface_ckpt(adaface_ckpt_path)
         with torch.no_grad():
                                                    update_text_encoder=True)
             # adaface_prompt_embeds: [1, 77, 768].
+            adaface_prompt_embeds, _, _, _ = adaface.encode_prompt(prompt,
+                                                                   placeholder_tokens_pos='prepend',
+                                                                   verbose=True)
         image_embed_cfg_scales = (image_embed_cfg_begin_scale, image_embed_cfg_end_scale)
             prompt = gr.Dropdown(label="Prompt",
                        info="Try something like 'man/woman walking on the beach'.",
+                       value="portrait, ((best quality)), ((masterpiece)), ((realistic)), highlighted hair, futuristic silver armor suit, confident stance, high-resolution, living room, smiling, head tilted, perfect smooth skin",
                        allow_custom_value=True,
                        filterable=False,
                        choices=[
+                            "portrait, ((best quality)), ((masterpiece)), ((realistic)), highlighted hair, futuristic silver armor suit, confident stance, high-resolution, living room, smiling, head tilted, perfect smooth skin",
+                            "portrait, walking on the beach, sunset, orange sky, eye level shot",
+                            "portrait, in a white apron and chef hat, garnishing a gourmet dish, full body view, long shot",
+                            "portrait, dancing pose among folks in a park, waving hands",
+                            "portrait, in iron man costume flying pose, the sky ablaze with hues of orange and purple, full body view, long shot",
+                            "portrait, jedi wielding a lightsaber, star wars, full body view, eye level shot",
+                            "portrait, playing guitar on a boat, ocean waves",
+                            "portrait, with a passion for reading, curled up with a book in a cozy nook near a window",
+                            "portrait, running pose in a park, eye level shot",
+                            "portrait, in superman costume flying pose, the sky ablaze with hues of orange and purple, full body view, long shot"
                        ])
             init_image_strength = gr.Slider(
                     value=0.1,
                 )
+            model_style_type = gr.Dropdown(
+                label="Base Model Style Type",
+                info="Switching the base model type will take 10~20 seconds to reload the model",
+                value=args.model_style_type,
+                choices=["Rrealistic", "Anime", "Photorealistic"],
+                allow_custom_value=False,
+                filterable=False,
+            )
             guidance_scale = gr.Slider(
                 label="Guidance scale",
                 minimum=1.0,
                 image_embed_cfg_begin_scale = gr.Slider(
                         label="ID-Animator Image Embedding Initial Scale",
                         info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
+                        minimum=0.6,
                         maximum=1.5,
                         step=0.1,
+                        value=1.0,
                     )
                 image_embed_cfg_end_scale = gr.Slider(
                         label="ID-Animator Image Embedding Final Scale",
                         info="The scale of the ID-Animator image embedding (influencing coarse facial features and poses)",
+                        minimum=0.3,
                         maximum=1.5,
                         step=0.1,
+                        value=0.5,
                     )
                 id_animator_anneal_steps = gr.Slider(
         init_img_files.upload(fn=swap_to_gallery, inputs=init_img_files, outputs=[uploaded_init_img_gallery, init_clear_button_column, init_img_files])
         remove_init_and_reupload.click(fn=remove_back_to_files,        outputs=[uploaded_init_img_gallery, init_clear_button_column,
                                                                                 init_img_files, init_img_selected_idx])
+        gen_init.click(fn=gen_init_images, inputs=[uploaded_files_gallery, model_style_type, prompt],
                        outputs=[uploaded_init_img_gallery, init_img_files, init_clear_button_column])
         uploaded_init_img_gallery.select(fn=get_clicked_image, inputs=None, outputs=init_img_selected_idx)
             queue=False,
             api_name=False,
         ).then(
+                 fn=generate_video,
+                 inputs=[image_container, files, init_img_files, init_img_selected_idx, init_image_strength,
+                         init_image_final_weight, model_style_type,
                          prompt, negative_prompt, num_steps, video_length, guidance_scale,
                          seed, attn_scale, image_embed_cfg_begin_scale, image_embed_cfg_end_scale,
                          is_adaface_enabled, adaface_ckpt_path, adaface_power_scale, id_animator_anneal_steps],

infer.py CHANGED Viewed

@@ -9,19 +9,19 @@ from safetensors import safe_open
 from animatediff.utils.convert_from_ckpt import convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint
 from faceadapter.face_adapter import FaceAdapterPlusForVideoLora
-base_model_type_to_path = {
-    "sd15": "models/sd15-dste8-vae.safetensors",    # LDM format. Needs to be converted.
-    "sar":  "models/sar/sar.safetensors",           # LDM format. Needs to be converted.
-    "rv51":  "models/rv51/realisticVisionV51_v51VAE.safetensors"
 }
-def load_model(base_model_type="rv51", device="cuda"):
     inference_config    = "inference-v2.yaml"
     sd_version          = "animatediff/sd"
     id_ckpt             = "models/animator.ckpt"
     image_encoder_path  = "models/image_encoder"
-    base_model_path         = base_model_type_to_path[base_model_type]
     motion_module_path="models/v3_sd15_mm.ckpt"
     motion_lora_path = "models/v3_sd15_adapter.ckpt"

 from animatediff.utils.convert_from_ckpt import convert_ldm_unet_checkpoint, convert_ldm_clip_checkpoint, convert_ldm_vae_checkpoint
 from faceadapter.face_adapter import FaceAdapterPlusForVideoLora
+model_style_type2base_model_path = {
+    "realistic": "models/rv51/realisticVisionV51_v51VAE_dste8.safetensors",
+    "anime": "models/aingdiffusion/aingdiffusion_v170_ar.safetensors",
+    "photorealistic": "models/sar/sar.safetensors" # LDM format. Needs to be converted.
 }
+def load_model(model_style_type="realistic", device="cuda"):
     inference_config    = "inference-v2.yaml"
     sd_version          = "animatediff/sd"
     id_ckpt             = "models/animator.ckpt"
     image_encoder_path  = "models/image_encoder"
+    base_model_path     = model_style_type2base_model_path[model_style_type]
     motion_module_path="models/v3_sd15_mm.ckpt"
     motion_lora_path = "models/v3_sd15_adapter.ckpt"

models/adaface/{VGGface2_HQ_masks2024-10-05T09-28-53_zero3-ada-28000.pt → VGGface2_HQ_masks2024-10-08T14-42-05_zero3-ada-24500.pt} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f6959ba41eb8cc8fcc738ba5ecc751de3acc0d1180e3af2272b7b52b04c6ae8
-size 1814922042

 version https://git-lfs.github.com/spec/v1
+oid sha256:c66b1847072c66deaa38b9ec91c0d76ac5274dec8d02444fc9672f0defa4d156
+size 1814921594

models/{rv51/realisticVisionV51_v51VAE.safetensors → aingdiffusion/aingdiffusion_v170_ar.safetensors} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15012c538f503ce2ebfc2c8547b268c75ccdaff7a281db55399940ff1d70e21d
 size 2132625894

 version https://git-lfs.github.com/spec/v1
+oid sha256:883af0939ef9bbb7ca03e90e778512258be26be7bef9276768c1594f9b7d3590
 size 2132625894

models/rv51/realisticVisionV51_v51VAE_dste8.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a96d832b0df00b72e762486cec30311f4c706871f50120fc5dab6f60cf044a33
+size 2132625894