FSFM-3C commited on
Commit
2fa1887
·
1 Parent(s): 4413e3a
Files changed (2) hide show
  1. app.py +10 -8
  2. engine_finetune.py +1 -1
app.py CHANGED
@@ -273,6 +273,7 @@ def extract_face_from_fixed_num_frames(src_video, dst_path, num_frames=None, dev
273
 
274
  video_capture.release()
275
  # cv2.destroyAllWindows()
 
276
 
277
 
278
  def FSFM3C_video_detection(video):
@@ -286,7 +287,7 @@ def FSFM3C_video_detection(video):
286
  frame_path = os.path.join(FRAME_SAVE_PATH, str(num_files))
287
  os.makedirs(frame_path, exist_ok=True)
288
  os.makedirs(os.path.join(frame_path, '0'), exist_ok=True)
289
- extract_face_from_fixed_num_frames(video, frame_path, num_frames=num_frames, device=device)
290
 
291
  args.data_path = frame_path
292
  args.batch_size = 32
@@ -300,9 +301,10 @@ def FSFM3C_video_detection(video):
300
  drop_last=False
301
  )
302
 
303
- frame_preds_list, video_y_pred_list = test_all(data_loader_val, model, device)
304
 
305
- return video_y_pred_list
 
306
 
307
 
308
  def FSFM3C_image_detection(image):
@@ -333,9 +335,9 @@ def FSFM3C_image_detection(image):
333
  drop_last=False
334
  )
335
 
336
- frame_preds_list, video_y_pred_list = test_all(data_loader_val, model, device)
337
 
338
- return video_y_pred_list
339
 
340
 
341
  # WebUI
@@ -348,11 +350,11 @@ with gr.Blocks() as demo:
348
 
349
  gr.Markdown("- <b>V1.0 [2024-12] (Current):</b> "
350
  "Create this page with basic detectors (simply fine-tuned models) that follow the paper implementation. "
351
- "<b>Notes:</b> Performance is limited because no any optimization of data, models, hyperparameters, etc. is done for downstream tasks. <br>"
352
- "- </b>(TODO):</b> Update practical models, and optimized interfaces, and provide more functions such as visualizations, a unified detector, and multi-modal diagnosis.")
353
 
354
  gr.Markdown(
355
- "> Please provide an <b>image</b> or a <b>video (<100s </b>, default to uniform sampling 32 frames)</b> and </b>select the model</b> for detection. <br>"
356
  "- <b>DfD-Checkpoint_Fine-tuned_on_FF++</b> for deepfake detection, FSFM VIT-B fine-tuned on the FF++_c23 dataset (train&val sets of 4 manipulations, 32 frames per video) <br>"
357
  "- <b>FAS-Checkpoint_Fine-tuned_on_MCIO</b> for face anti-spoofing, FSFM VIT-B fine-tuned on the MCIO datasets (2 frames per video) ")
358
 
 
273
 
274
  video_capture.release()
275
  # cv2.destroyAllWindows()
276
+ return frame_index
277
 
278
 
279
  def FSFM3C_video_detection(video):
 
287
  frame_path = os.path.join(FRAME_SAVE_PATH, str(num_files))
288
  os.makedirs(frame_path, exist_ok=True)
289
  os.makedirs(os.path.join(frame_path, '0'), exist_ok=True)
290
+ frame_index = extract_face_from_fixed_num_frames(video, frame_path, num_frames=num_frames, device=device)
291
 
292
  args.data_path = frame_path
293
  args.batch_size = 32
 
301
  drop_last=False
302
  )
303
 
304
+ frame_preds_list, video_pred_list = test_all(data_loader_val, model, device)
305
 
306
+
307
+ return video_pred_list
308
 
309
 
310
  def FSFM3C_image_detection(image):
 
335
  drop_last=False
336
  )
337
 
338
+ frame_preds_list, video_pred_list = test_all(data_loader_val, model, device)
339
 
340
+ return video_pred_list
341
 
342
 
343
  # WebUI
 
350
 
351
  gr.Markdown("- <b>V1.0 [2024-12] (Current):</b> "
352
  "Create this page with basic detectors (simply fine-tuned models) that follow the paper implementation. "
353
+ "<b>Notes:</b> Performance is limited because no any optimization of data, models, hyperparameters, etc. is done for downstream tasks."
354
+ "- </b>[TODO]: </b> Update practical models, and optimized interfaces, and provide more functions such as visualizations, a unified detector, and multi-modal diagnosis.")
355
 
356
  gr.Markdown(
357
+ "> Please provide an <b>image</b> or a <b>video (<100s </b>, default to uniform sampling 32 frames)</b> and <b>select the model</b> for detection. <br>"
358
  "- <b>DfD-Checkpoint_Fine-tuned_on_FF++</b> for deepfake detection, FSFM VIT-B fine-tuned on the FF++_c23 dataset (train&val sets of 4 manipulations, 32 frames per video) <br>"
359
  "- <b>FAS-Checkpoint_Fine-tuned_on_MCIO</b> for face anti-spoofing, FSFM VIT-B fine-tuned on the MCIO datasets (2 frames per video) ")
360
 
engine_finetune.py CHANGED
@@ -320,4 +320,4 @@ def test_all(data_loader, model, device):
320
  # video_auc=metric_logger.video_auc, video_eer=metric_logger.video_eer))
321
 
322
  # return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
323
- return frame_preds_list, video_y_pred_list
 
320
  # video_auc=metric_logger.video_auc, video_eer=metric_logger.video_eer))
321
 
322
  # return {k: meter.global_avg for k, meter in metric_logger.meters.items()}
323
+ return frame_preds_list, video_pred_list