AIfehack commited on
Commit
cb9893e
·
verified ·
1 Parent(s): de254ad

Update webgui.py

Browse files
Files changed (1) hide show
  1. webgui.py +10 -4
webgui.py CHANGED
@@ -35,6 +35,7 @@ huggingface_hub.snapshot_download(
35
  local_dir_use_symlinks=False,
36
  )
37
 
 
38
  is_shared_ui = True if "fffiloni/EchoMimic" in os.environ['SPACE_ID'] else False
39
  available_property = False if is_shared_ui else True
40
  advanced_settings_label = "Advanced Configuration (only for duplicated spaces)" if is_shared_ui else "Advanced Configuration"
@@ -73,13 +74,16 @@ else:
73
  device = "cuda"
74
  if not torch.cuda.is_available():
75
  device = "cpu"
 
 
76
 
77
  inference_config_path = config.inference_config
78
  infer_config = OmegaConf.load(inference_config_path)
79
 
80
  ############# model_init started #############
81
  ## vae init
82
- vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to("cuda", dtype=weight_dtype)
 
83
 
84
  ## reference net init
85
  reference_unet = UNet2DConditionModel.from_pretrained(
@@ -113,7 +117,8 @@ else:
113
  denoising_unet.load_state_dict(torch.load(config.denoising_unet_path, map_location="cpu"), strict=False)
114
 
115
  ## face locator init
116
- face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device="cuda")
 
117
  face_locator.load_state_dict(torch.load(config.face_locator_path))
118
 
119
  ## load audio processor params
@@ -134,7 +139,7 @@ pipe = Audio2VideoPipeline(
134
  audio_guider=audio_processor,
135
  face_locator=face_locator,
136
  scheduler=scheduler,
137
- ).to("cuda", dtype=weight_dtype)
138
 
139
  def select_face(det_bboxes, probs):
140
  ## max face from faces that the prob is above 0.8
@@ -182,7 +187,8 @@ def process_video(uploaded_img, uploaded_audio, width, height, length, seed, fac
182
  face_mask = cv2.resize(face_mask, (width, height))
183
 
184
  ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
185
- face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
 
186
 
187
  video = pipe(
188
  ref_image_pil,
 
35
  local_dir_use_symlinks=False,
36
  )
37
 
38
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
39
  is_shared_ui = True if "fffiloni/EchoMimic" in os.environ['SPACE_ID'] else False
40
  available_property = False if is_shared_ui else True
41
  advanced_settings_label = "Advanced Configuration (only for duplicated spaces)" if is_shared_ui else "Advanced Configuration"
 
74
  device = "cuda"
75
  if not torch.cuda.is_available():
76
  device = "cpu"
77
+ device = "cpu"
78
+ torch.cuda.is_available = lambda : False
79
 
80
  inference_config_path = config.inference_config
81
  infer_config = OmegaConf.load(inference_config_path)
82
 
83
  ############# model_init started #############
84
  ## vae init
85
+ # vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to("cuda", dtype=weight_dtype)
86
+ vae = AutoencoderKL.from_pretrained(config.pretrained_vae_path).to("cpu", dtype=weight_dtype)
87
 
88
  ## reference net init
89
  reference_unet = UNet2DConditionModel.from_pretrained(
 
117
  denoising_unet.load_state_dict(torch.load(config.denoising_unet_path, map_location="cpu"), strict=False)
118
 
119
  ## face locator init
120
+ # face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device="cuda")
121
+ face_locator = FaceLocator(320, conditioning_channels=1, block_out_channels=(16, 32, 96, 256)).to(dtype=weight_dtype, device="cpu")
122
  face_locator.load_state_dict(torch.load(config.face_locator_path))
123
 
124
  ## load audio processor params
 
139
  audio_guider=audio_processor,
140
  face_locator=face_locator,
141
  scheduler=scheduler,
142
+ ).to("cpu", dtype=weight_dtype)
143
 
144
  def select_face(det_bboxes, probs):
145
  ## max face from faces that the prob is above 0.8
 
187
  face_mask = cv2.resize(face_mask, (width, height))
188
 
189
  ref_image_pil = Image.fromarray(face_img[:, :, [2, 1, 0]])
190
+ # face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cuda").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
191
+ face_mask_tensor = torch.Tensor(face_mask).to(dtype=weight_dtype, device="cpu").unsqueeze(0).unsqueeze(0).unsqueeze(0) / 255.0
192
 
193
  video = pipe(
194
  ref_image_pil,