Spaces:

Wolowolo
/

FSFM-deepfake_diffusion_spoof_face_detection

Running

App Files Files Community

FSFM-3C commited on 27 days ago

Commit

4413e3a

1 Parent(s): f711a0a

modified: app.py

Browse files

Files changed (1) hide show

app.py +35 -30

app.py CHANGED Viewed

@@ -7,8 +7,10 @@
 # pip uninstall nvidia_cublas_cu11
 import sys
 sys.path.append('..')
 import os
 os.system(f'pip install dlib')
 import torch
 import numpy as np
@@ -24,14 +26,13 @@ from engine_finetune import test_all
 import dlib
 from huggingface_hub import hf_hub_download
 P = os.path.abspath(__file__)
 FRAME_SAVE_PATH = os.path.join(P[:-6], 'frame')
 CKPT_SAVE_PATH = os.path.join(P[:-6], 'checkpoints')
 CKPT_LIST = ['DfD-Checkpoint_Fine-tuned_on_FF++',
              'FAS-Checkpoint_Fine-tuned_on_MCIO']
 CKPT_NAME = {'DfD-Checkpoint_Fine-tuned_on_FF++': 'finetuned_models/FF++_c23_32frames/checkpoint-min_val_loss.pth',
-             'FAS-Checkpoint_Fine-tuned_on_MCIO': 'finetuned_models/MCIO_protocol/Both_MCIO/checkpoint-min_val_loss.pth' }
 os.makedirs(FRAME_SAVE_PATH, exist_ok=True)
 os.makedirs(CKPT_SAVE_PATH, exist_ok=True)
@@ -170,13 +171,14 @@ model = models_vit.__dict__['vit_base_patch16'](
     global_pool=args.global_pool,
 )
 def load_model(ckpt):
-    if ckpt=='choose from here' or 'continuously updating...':
         return gr.update()
     args.resume = os.path.join(CKPT_SAVE_PATH, ckpt)
     if os.path.isfile(args.resume) == False:
         hf_hub_download(local_dir=CKPT_SAVE_PATH,
-                        repo_id='Wolowolo/fsfm-3c/'+ CKPT_NAME[ckpt],
                         filename=ckpt)
     checkpoint = torch.load(args.resume, map_location='cpu')
     model.load_state_dict(checkpoint['model'])
@@ -230,14 +232,16 @@ def extract_face(frame):
         return Image.fromarray(cropped_face)
     else:
         return None
 def get_frame_index_uniform_sample(total_frame_num, extract_frame_num):
     interval = np.linspace(0, total_frame_num - 1, num=extract_frame_num, dtype=int)
     return interval.tolist()
 import cv2
 def extract_face_from_fixed_num_frames(src_video, dst_path, num_frames=None, device='cpu'):
     """
     1) extract specific num of frames from videos in [1st(index 0) frame, last frame] with uniform sample interval
@@ -255,7 +259,7 @@ def extract_face_from_fixed_num_frames(src_video, dst_path, num_frames=None, dev
     for frame_index in frame_indices:
         video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
         ret, frame = video_capture.read()
-        image = Image.fromarray(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))
         img = extract_face(image)
         if img == None:
             continue
@@ -263,27 +267,27 @@ def extract_face_from_fixed_num_frames(src_video, dst_path, num_frames=None, dev
         if not ret:
             continue
         save_img_name = f"frame_{frame_index}.png"
         img.save(os.path.join(dst_path, '0', save_img_name))
         # cv2.imwrite(os.path.join(dst_path, '0', save_img_name), frame)
     video_capture.release()
     # cv2.destroyAllWindows()
 def FSFM3C_video_detection(video):
     model.to(device)
     # extract frames
     num_frames = 32
     files = os.listdir(FRAME_SAVE_PATH)
     num_files = len(files)
     frame_path = os.path.join(FRAME_SAVE_PATH, str(num_files))
     os.makedirs(frame_path, exist_ok=True)
     os.makedirs(os.path.join(frame_path, '0'), exist_ok=True)
     extract_face_from_fixed_num_frames(video, frame_path, num_frames=num_frames, device=device)
     args.data_path = frame_path
     args.batch_size = 32
     dataset_val = build_dataset(is_train=False, args=args)
@@ -295,7 +299,7 @@ def FSFM3C_video_detection(video):
         pin_memory=args.pin_mem,
         drop_last=False
     )
     frame_preds_list, video_y_pred_list = test_all(data_loader_val, model, device)
     return video_y_pred_list
@@ -303,20 +307,20 @@ def FSFM3C_video_detection(video):
 def FSFM3C_image_detection(image):
     model.to(device)
     files = os.listdir(FRAME_SAVE_PATH)
     num_files = len(files)
-    frame_path = os.path.join(FRAME_SAVE_PATH, str(num_files))
     os.makedirs(frame_path, exist_ok=True)
     os.makedirs(os.path.join(frame_path, '0'), exist_ok=True)
     save_img_name = f"frame_0.png"
     img = extract_face(image)
     if img is None:
         return ['Invalid Input']
     img = img.resize((224, 224), Image.BICUBIC)
     img.save(os.path.join(frame_path, '0', save_img_name))
     args.data_path = frame_path
     args.batch_size = 1
     dataset_val = build_dataset(is_train=False, args=args)
@@ -328,7 +332,7 @@ def FSFM3C_image_detection(image):
         pin_memory=args.pin_mem,
         drop_last=False
     )
     frame_preds_list, video_y_pred_list = test_all(data_loader_val, model, device)
     return video_y_pred_list
@@ -336,7 +340,8 @@ def FSFM3C_image_detection(image):
 # WebUI
 with gr.Blocks() as demo:
-    gr.HTML("<h1 style='text-align: center;'>🦱 Real Facial Image&Video Detection <br> Against Face Forgery and Spoofing (Deepfake/Diffusion/Presentation-attacks)</h1>")
     gr.Markdown("### -Powered by the fine-tuned model that is pre-trained from [FSFM-3C](https://fsfm-3c.github.io/)")
     gr.Markdown("### Release:")
@@ -346,25 +351,26 @@ with gr.Blocks() as demo:
                 "<b>Notes:</b> Performance is limited because no any optimization of data, models, hyperparameters, etc. is done for downstream tasks. <br>"
                 "- </b>(TODO):</b> Update practical models, and optimized interfaces, and provide more functions such as visualizations, a unified detector, and multi-modal diagnosis.")
-    gr.Markdown("> Please provide an <b>image</b> or a <b>video (<100s </b>, default to uniform sampling 32 frames)</b> and </b>select the model</b> for detection. <br>"
-                "- <b>DfD-Checkpoint_Fine-tuned_on_FF++</b> for deepfake detection, FSFM VIT-B fine-tuned on the FF++_c23 dataset (train&val sets of  4 manipulations, 32 frames per video) <br>"
-                "- <b>FAS-Checkpoint_Fine-tuned_on_MCIO</b> for face anti-spoofing, FSFM VIT-B fine-tuned on the MCIO datasets (2 frames per video) ")
     with gr.Column():
         ckpt_select_dropdown = gr.Dropdown(
-            label = "Select the Model Checkpoint for Detection (🖱️ below)",
-            choices = ['choose from here'] + CKPT_LIST + ['continuously updating...'],
-            multiselect = False,
-            value = 'choose from here',
-            interactive = True,
-            )
         with gr.Row(elem_classes="center-align"):
             with gr.Column(scale=5):
                 gr.Markdown(
                     "## Image Detection"
                 )
                 image = gr.Image(label="Upload/Capture/Paste your image", type="pil")
-                image_submit_btn = gr.Button("Submit")
                 output_results_image = gr.Textbox(label="Detection Result")
             with gr.Column(scale=5):
                 gr.Markdown(
@@ -390,7 +396,6 @@ with gr.Blocks() as demo:
         outputs=[ckpt_select_dropdown],
     )
 if __name__ == "__main__":
     gr.close_all()
     demo.queue()

 # pip uninstall nvidia_cublas_cu11
 import sys
 sys.path.append('..')
 import os
 os.system(f'pip install dlib')
 import torch
 import numpy as np
 import dlib
 from huggingface_hub import hf_hub_download
 P = os.path.abspath(__file__)
 FRAME_SAVE_PATH = os.path.join(P[:-6], 'frame')
 CKPT_SAVE_PATH = os.path.join(P[:-6], 'checkpoints')
 CKPT_LIST = ['DfD-Checkpoint_Fine-tuned_on_FF++',
              'FAS-Checkpoint_Fine-tuned_on_MCIO']
 CKPT_NAME = {'DfD-Checkpoint_Fine-tuned_on_FF++': 'finetuned_models/FF++_c23_32frames/checkpoint-min_val_loss.pth',
+             'FAS-Checkpoint_Fine-tuned_on_MCIO': 'finetuned_models/MCIO_protocol/Both_MCIO/checkpoint-min_val_loss.pth'}
 os.makedirs(FRAME_SAVE_PATH, exist_ok=True)
 os.makedirs(CKPT_SAVE_PATH, exist_ok=True)
     global_pool=args.global_pool,
 )
 def load_model(ckpt):
+    if ckpt == 'choose from here' or 'continuously updating...':
         return gr.update()
     args.resume = os.path.join(CKPT_SAVE_PATH, ckpt)
     if os.path.isfile(args.resume) == False:
         hf_hub_download(local_dir=CKPT_SAVE_PATH,
+                        repo_id='Wolowolo/fsfm-3c/' + CKPT_NAME[ckpt],
                         filename=ckpt)
     checkpoint = torch.load(args.resume, map_location='cpu')
     model.load_state_dict(checkpoint['model'])
         return Image.fromarray(cropped_face)
     else:
         return None
 def get_frame_index_uniform_sample(total_frame_num, extract_frame_num):
     interval = np.linspace(0, total_frame_num - 1, num=extract_frame_num, dtype=int)
     return interval.tolist()
 import cv2
 def extract_face_from_fixed_num_frames(src_video, dst_path, num_frames=None, device='cpu'):
     """
     1) extract specific num of frames from videos in [1st(index 0) frame, last frame] with uniform sample interval
     for frame_index in frame_indices:
         video_capture.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
         ret, frame = video_capture.read()
+        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
         img = extract_face(image)
         if img == None:
             continue
         if not ret:
             continue
         save_img_name = f"frame_{frame_index}.png"
         img.save(os.path.join(dst_path, '0', save_img_name))
         # cv2.imwrite(os.path.join(dst_path, '0', save_img_name), frame)
     video_capture.release()
     # cv2.destroyAllWindows()
 def FSFM3C_video_detection(video):
     model.to(device)
     # extract frames
     num_frames = 32
     files = os.listdir(FRAME_SAVE_PATH)
     num_files = len(files)
     frame_path = os.path.join(FRAME_SAVE_PATH, str(num_files))
     os.makedirs(frame_path, exist_ok=True)
     os.makedirs(os.path.join(frame_path, '0'), exist_ok=True)
     extract_face_from_fixed_num_frames(video, frame_path, num_frames=num_frames, device=device)
     args.data_path = frame_path
     args.batch_size = 32
     dataset_val = build_dataset(is_train=False, args=args)
         pin_memory=args.pin_mem,
         drop_last=False
     )
     frame_preds_list, video_y_pred_list = test_all(data_loader_val, model, device)
     return video_y_pred_list
 def FSFM3C_image_detection(image):
     model.to(device)
     files = os.listdir(FRAME_SAVE_PATH)
     num_files = len(files)
+    frame_path = os.path.join(FRAME_SAVE_PATH, str(num_files))
     os.makedirs(frame_path, exist_ok=True)
     os.makedirs(os.path.join(frame_path, '0'), exist_ok=True)
     save_img_name = f"frame_0.png"
     img = extract_face(image)
     if img is None:
         return ['Invalid Input']
     img = img.resize((224, 224), Image.BICUBIC)
     img.save(os.path.join(frame_path, '0', save_img_name))
     args.data_path = frame_path
     args.batch_size = 1
     dataset_val = build_dataset(is_train=False, args=args)
         pin_memory=args.pin_mem,
         drop_last=False
     )
     frame_preds_list, video_y_pred_list = test_all(data_loader_val, model, device)
     return video_y_pred_list
 # WebUI
 with gr.Blocks() as demo:
+    gr.HTML(
+        "<h1 style='text-align: center;'>🦱 Real Facial Image&Video Detection <br> Against Face Forgery and Spoofing (Deepfake/Diffusion/Presentation-attacks)</h1>")
     gr.Markdown("### -Powered by the fine-tuned model that is pre-trained from [FSFM-3C](https://fsfm-3c.github.io/)")
     gr.Markdown("### Release:")
                 "<b>Notes:</b> Performance is limited because no any optimization of data, models, hyperparameters, etc. is done for downstream tasks. <br>"
                 "- </b>(TODO):</b> Update practical models, and optimized interfaces, and provide more functions such as visualizations, a unified detector, and multi-modal diagnosis.")
+    gr.Markdown(
+        "> Please provide an <b>image</b> or a <b>video (<100s </b>, default to uniform sampling 32 frames)</b> and </b>select the model</b> for detection. <br>"
+        "- <b>DfD-Checkpoint_Fine-tuned_on_FF++</b> for deepfake detection, FSFM VIT-B fine-tuned on the FF++_c23 dataset (train&val sets of  4 manipulations, 32 frames per video) <br>"
+        "- <b>FAS-Checkpoint_Fine-tuned_on_MCIO</b> for face anti-spoofing, FSFM VIT-B fine-tuned on the MCIO datasets (2 frames per video) ")
     with gr.Column():
         ckpt_select_dropdown = gr.Dropdown(
+            label="Select the Model Checkpoint for Detection (🖱️ below)",
+            choices=['choose from here'] + CKPT_LIST + ['continuously updating...'],
+            multiselect=False,
+            value='choose from here',
+            interactive=True,
+        )
         with gr.Row(elem_classes="center-align"):
             with gr.Column(scale=5):
                 gr.Markdown(
                     "## Image Detection"
                 )
                 image = gr.Image(label="Upload/Capture/Paste your image", type="pil")
+                image_submit_btn = gr.Button("Submit")
                 output_results_image = gr.Textbox(label="Detection Result")
             with gr.Column(scale=5):
                 gr.Markdown(
         outputs=[ckpt_select_dropdown],
     )
 if __name__ == "__main__":
     gr.close_all()
     demo.queue()