EchoMimic

Sleeping

App Files Files Community

AIfehack commited on Sep 15, 2024

Commit

cb9893e

verified ·

1 Parent(s): de254ad

Update webgui.py

Browse files

Files changed (1) hide show

webgui.py +10 -4

webgui.py CHANGED Viewed

@@ -35,6 +35,7 @@ huggingface_hub.snapshot_download(
     local_dir_use_symlinks=False,
 )
 is_shared_ui = True if "fffiloni/EchoMimic" in os.environ['SPACE_ID'] else False
 available_property = False if is_shared_ui else True
 advanced_settings_label = "Advanced Configuration (only for duplicated spaces)" if is_shared_ui else "Advanced Configuration"
@@ -73,13 +74,16 @@ else:
 device = "cuda"
 if not torch.cuda.is_available():
     device = "cpu"
 inference_config_path = config.inference_config
 infer_config = OmegaConf.load(inference_config_path)
 ############# model_init started #############
 ## vae init
-vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to("cuda", dtype=weight_dtype)
 ## reference net init
 reference_unet = UNet2DConditionModel.from_pretrained(
@@ -113,7 +117,8 @@ else:
 denoising_unet.load_state_dict(torch.load(config.denoising_unet_path, map_location="cpu"), strict=False)
 ## face locator init
-face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device="cuda")
 face_locator.load_state_dict(torch.load(config.face_locator_path))
 ## load audio processor params
@@ -134,7 +139,7 @@ pipe = Audio2VideoPipeline(
     audio_guider=audio_processor,
     face_locator=face_locator,
     scheduler=scheduler,
-).to("cuda", dtype=weight_dtype)
 def select_face(det_bboxes, probs):
     ## max face from faces that the prob is above 0.8
@@ -182,7 +187,8 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
         face_mask = cv2.resize(face_mask, (width, height))
     ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
-    face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
     video = pipe(
         ref_image_pil,

     local_dir_use_symlinks=False,
 )
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
 is_shared_ui = True if "fffiloni/EchoMimic" in os.environ['SPACE_ID'] else False
 available_property = False if is_shared_ui else True
 advanced_settings_label = "Advanced Configuration (only for duplicated spaces)" if is_shared_ui else "Advanced Configuration"
 device = "cuda"
 if not torch.cuda.is_available():
     device = "cpu"
+device = "cpu"
+torch.cuda.is_available = lambda : False
 inference_config_path = config.inference_config
 infer_config = OmegaConf.load(inference_config_path)
 ############# model_init started #############
 ## vae init
+# vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to("cuda", dtype=weight_dtype)
+vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to("cpu", dtype=weight_dtype)
 ## reference net init
 reference_unet = UNet2DConditionModel.from_pretrained(
 denoising_unet.load_state_dict(torch.load(config.denoising_unet_path, map_location="cpu"), strict=False)
 ## face locator init
+# face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device="cuda")
+face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device="cpu")
 face_locator.load_state_dict(torch.load(config.face_locator_path))
 ## load audio processor params
     audio_guider=audio_processor,
     face_locator=face_locator,
     scheduler=scheduler,
+).to("cpu", dtype=weight_dtype)
 def select_face(det_bboxes, probs):
     ## max face from faces that the prob is above 0.8
         face_mask = cv2.resize(face_mask, (width, height))
     ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
+    # face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
+    face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cpu").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
     video = pipe(
         ref_image_pil,