Spaces:

Peleck
/

swap-mukham_WIP

Build error

+import os
+import cv2
+import time
+import shutil
+import base64
+import datetime
+import argparse
+import numpy as np
+import gradio as gr
+from tqdm import tqdm
+import concurrent.futures
+import threading
+cv_reader_lock = threading.Lock()
+## ------------------------------ USER ARGS ------------------------------
+parser = argparse.ArgumentParser(description="Swap-Mukham Face Swapper")
+parser.add_argument("--out_dir", help="Default Output directory", default=os.getcwd())
+parser.add_argument("--max_threads", type=int, help="Max num of threads to use", default=2)
+parser.add_argument("--colab", action="store_true", help="Colab mode", default=False)
+parser.add_argument("--cpu", action="store_true", help="Enable cpu mode", default=False)
+parser.add_argument("--prefer_text_widget", action="store_true", help="Replaces target video widget with text widget", default=False)
+user_args = parser.parse_args()
+USE_CPU = 1
+if not USE_CPU:
+    import torch
+import default_paths as dp
+import global_variables as gv
+from swap_mukham import SwapMukham
+from nsfw_checker import NSFWChecker
+from face_parsing import mask_regions_to_list
+from utils.device import get_device_and_provider, device_types_list
+from utils.image import (
+    image_mask_overlay,
+    resize_image_by_resolution,
+    resolution_map,
+    fast_pil_encode,
+    fast_numpy_encode,
+    get_crf_for_resolution,
+)
+from utils.io import (
+    open_directory,
+    get_images_from_directory,
+    copy_files_to_directory,
+    create_directory,
+    get_single_video_frame,
+    ffmpeg_merge_frames,
+    ffmpeg_mux_audio,
+    add_datetime_to_filename,
+)
+gr.processing_utils.encode_pil_to_base64 = fast_pil_encode
+gr.processing_utils.encode_array_to_base64 = fast_numpy_encode
+gv.USE_COLAB = user_args.colab
+gv.MAX_THREADS = user_args.max_threads
+gv.DEFAULT_OUTPUT_PATH = user_args.out_dir
+PREFER_TEXT_WIDGET = user_args.prefer_text_widget
+WORKSPACE = None
+OUTPUT_FILE = None
+preferred_device = "cpu" if USE_CPU else "cuda"
+DEVICE_LIST = device_types_list
+DEVICE, PROVIDER, OPTIONS = get_device_and_provider(device=preferred_device)
+SWAP_MUKHAM = SwapMukham(device=DEVICE)
+IS_RUNNING = False
+CURRENT_FRAME = None
+COLLECTED_FACES = []
+FOREGROUND_MASK_DICT = {}
+NSFW_CACHE = {}
+## ------------------------------ MAIN PROCESS ------------------------------
+def process(
+    test_mode,
+    target_type,
+    image_path,
+    video_path,
+    directory_path,
+    source_path,
+    use_foreground_mask,
+    img_fg_mask,
+    fg_mask_softness,
+    output_path,
+    output_name,
+    use_datetime_suffix,
+    sequence_output_format,
+    keep_output_sequence,
+    swap_condition,
+    age,
+    distance,
+    face_enhancer_name,
+    face_upscaler_opacity,
+    use_face_parsing,
+    parse_from_target,
+    mask_regions,
+    mask_blur_amount,
+    mask_erode_amount,
+    swap_iteration,
+    face_scale,
+    use_laplacian_blending,
+    crop_top,
+    crop_bott,
+    crop_left,
+    crop_right,
+    current_idx,
+    number_of_threads,
+    use_frame_selection,
+    frame_selection_ranges,
+    video_quality,
+    face_detection_condition,
+    face_detection_size,
+    face_detection_threshold,
+    averaging_method,
+    progress=gr.Progress(track_tqdm=True),
+    *specifics,
+):
+    global WORKSPACE
+    global OUTPUT_FILE
+    global PREVIEW
+    WORKSPACE, OUTPUT_FILE, PREVIEW = None, None, None
+    global IS_RUNNING
+    IS_RUNNING = True
+    ## ------------------------------ GUI UPDATE FUNC ------------------------------
+    def ui_before():
+        return (
+            gr.update(visible=True, value=None),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(visible=False, value=None),
+        )
+    def ui_after():
+        return (
+            gr.update(visible=True, value=PREVIEW),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+            gr.update(visible=False, value=None),
+        )
+    def ui_after_vid():
+        return (
+            gr.update(visible=False),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+            gr.update(value=OUTPUT_FILE, visible=True),
+        )
+    if not test_mode:
+        yield ui_before()  # resets ui preview
+        progress(0, desc="Processing")
+    start_time = time.time()
+    total_exec_time = lambda start_time: divmod(time.time() - start_time, 60)
+    get_finsh_text = (
+        lambda start_time: f"Completed in {int(total_exec_time(start_time)[0])} min {int(total_exec_time(start_time)[1])} sec."
+    )
+    ## ------------------------------ PREPARE INPUTS ------------------------------
+    if use_datetime_suffix:
+        output_name = add_datetime_to_filename(output_name)
+    mask_regions = mask_regions_to_list(mask_regions)
+    specifics = list(specifics)
+    half = len(specifics) // 2
+    if swap_condition == "specific face":
+        source_specifics = [
+            ([s.name for s in src] if src is not None else None, spc) for src, spc in zip(specifics[:half], specifics[half:])
+        ]
+    else:
+        source_paths = [i.name for i in source_path]
+        source_specifics = [(source_paths, None)]
+    if crop_top > crop_bott:
+        crop_top, crop_bott = crop_bott, crop_top
+    if crop_left > crop_right:
+        crop_left, crop_right = crop_right, crop_left
+    crop_mask = (crop_top, 511 - crop_bott, crop_left, 511 - crop_right)
+    input_args = {
+        "similarity": distance,
+        "age": age,
+        "face_scale": face_scale,
+        "num_of_pass": swap_iteration,
+        "face_upscaler_opacity": face_upscaler_opacity,
+        "mask_crop_values": crop_mask,
+        "mask_erode_amount": mask_erode_amount,
+        "mask_blur_amount": mask_blur_amount,
+        "use_laplacian_blending": use_laplacian_blending,
+        "swap_condition": swap_condition,
+        "face_parse_regions": mask_regions,
+        "use_face_parsing": use_face_parsing,
+        "face_detection_size": [int(face_detection_size), int(face_detection_size)],
+        "face_detection_threshold": face_detection_threshold,
+        "face_detection_condition": face_detection_condition,
+        "parse_from_target": parse_from_target,
+        "averaging_method": averaging_method,
+    }
+    SWAP_MUKHAM.set_values(input_args)
+    if (
+        SWAP_MUKHAM.face_upscaler is None
+        or SWAP_MUKHAM.face_upscaler_name != face_enhancer_name
+    ):
+        SWAP_MUKHAM.load_face_upscaler(face_enhancer_name, device=DEVICE)
+    if SWAP_MUKHAM.face_parser is None and use_face_parsing:
+        SWAP_MUKHAM.load_face_parser(device=DEVICE)
+    SWAP_MUKHAM.analyse_source_faces(source_specifics)
+    mask = None
+    if use_foreground_mask and img_fg_mask is not None:
+        mask = img_fg_mask.get("mask", None)
+        mask = cv2.cvtColor(mask, cv2.COLOR_BGRA2RGB)
+        if fg_mask_softness > 0:
+            mask = cv2.blur(mask, (int(fg_mask_softness), int(fg_mask_softness)))
+        mask = mask.astype("float32") / 255.0
+    def nsfw_assertion(is_nsfw):
+        if is_nsfw:
+            message = "NSFW content detected !"
+            gr.Info(message)
+            assert not is_nsfw, message
+    ## ------------------------------ IMAGE ------------------------------
+    if target_type == "Image" and not test_mode:
+        target = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
+        is_nsfw = SWAP_MUKHAM.nsfw_detector.check_image(target)
+        nsfw_assertion(is_nsfw)
+        output = SWAP_MUKHAM.process_frame(
+            [target, mask]
+        )
+        output_file = os.path.join(output_path, output_name + ".png")
+        cv2.imwrite(output_file, output)
+        PREVIEW = output
+        OUTPUT_FILE = output_file
+        WORKSPACE = output_path
+        gr.Info(get_finsh_text(start_time))
+        yield ui_after()
+    ## ------------------------------ VIDEO ------------------------------
+    elif target_type == "Video" and not test_mode:
+        video_path = video_path.replace('"', '').strip()
+        if video_path in NSFW_CACHE.keys():
+            nsfw_assertion(NSFW_CACHE.get(video_path))
+        else:
+            is_nsfw = SWAP_MUKHAM.nsfw_detector.check_video(video_path)
+            NSFW_CACHE[video_path] = is_nsfw
+            nsfw_assertion(is_nsfw)
+        temp_path = os.path.join(output_path, output_name)
+        os.makedirs(temp_path, exist_ok=True)
+        cap = cv2.VideoCapture(video_path)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        is_in_range = lambda idx: any([int(rng[0]) <= idx <= int(rng[1]) for rng in frame_selection_ranges]) if use_frame_selection else True
+        print("[ Swapping process started ]")
+        def swap_video_func(frame_index):
+            if IS_RUNNING:
+                with cv_reader_lock:
+                    cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_index))
+                    valid_frame, frame = cap.read()
+                if valid_frame:
+                    if is_in_range(frame_index):
+                        mask = FOREGROUND_MASK_DICT.get(frame_index, None) if use_foreground_mask else None
+                        output = SWAP_MUKHAM.process_frame([frame, mask])
+                    else:
+                        output = frame
+                    frame_path = os.path.join(temp_path, f"frame_{frame_index}.{sequence_output_format}")
+                    if sequence_output_format == "jpg":
+                        cv2.imwrite(frame_path, output, [int(cv2.IMWRITE_JPEG_QUALITY), 100])
+                    else:
+                        cv2.imwrite(frame_path, output)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=number_of_threads) as executor:
+            futures = [executor.submit(swap_video_func, idx) for idx in range(total_frames)]
+            with tqdm(total=total_frames, desc="Processing") as pbar:
+                for future in concurrent.futures.as_completed(futures):
+                    future.result()
+                    pbar.update(1)
+        cap.release()
+        if IS_RUNNING:
+            print("[ Merging image sequence ]")
+            progress(0, desc="Merging image sequence")
+            WORKSPACE = output_path
+            out_without_audio = output_name + "_without_audio" + ".mp4"
+            destination = os.path.join(output_path, out_without_audio)
+            crf = get_crf_for_resolution(max(width,height), video_quality)
+            ret, destination = ffmpeg_merge_frames(
+                temp_path, f"frame_%d.{sequence_output_format}", destination, fps=fps, crf=crf, ffmpeg_path=dp.FFMPEG_PATH
+            )
+            OUTPUT_FILE = destination
+            if ret:
+                print("[ Merging audio ]")
+                progress(0, desc="Merging audio")
+                OUTPUT_FILE = destination
+                out_with_audio = out_without_audio.replace("_without_audio", "")
+                _ret, _destination = ffmpeg_mux_audio(
+                    video_path, out_without_audio, out_with_audio, ffmpeg_path=dp.FFMPEG_PATH
+                )
+                if _ret:
+                    OUTPUT_FILE = _destination
+                    os.remove(out_without_audio)
+            if os.path.exists(temp_path) and not keep_output_sequence:
+                print("[ Removing temporary files ]")
+                progress(0, desc="Removing temporary files")
+                shutil.rmtree(temp_path)
+            finish_text = get_finsh_text(start_time)
+            print(f"[ {finish_text} ]")
+            gr.Info(finish_text)
+            yield ui_after_vid()
+    ## ------------------------------ DIRECTORY ------------------------------
+    elif target_type == "Directory" and not test_mode:
+        temp_path = os.path.join(output_path, output_name)
+        temp_path = create_directory(temp_path, remove_existing=True)
+        directory_path = directory_path.replace('"', '').strip()
+        image_paths = get_images_from_directory(directory_path)
+        is_nsfw = SWAP_MUKHAM.nsfw_detector.check_image_paths(image_paths)
+        nsfw_assertion(is_nsfw)
+        new_image_paths = copy_files_to_directory(image_paths, temp_path)
+        def swap_func(img_path):
+            if IS_RUNNING:
+                frame = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)
+                output = SWAP_MUKHAM.process_frame([frame, None])
+                cv2.imwrite(img_path, output)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=number_of_threads) as executor:
+            futures = [executor.submit(swap_func, img_path) for img_path in new_image_paths]
+            with tqdm(total=len(new_image_paths), desc="Processing") as pbar:
+                for future in concurrent.futures.as_completed(futures):
+                    future.result()
+                    pbar.update(1)
+        PREVIEW = cv2.imread(new_image_paths[-1])
+        WORKSPACE = temp_path
+        OUTPUT_FILE = new_image_paths[-1]
+        gr.Info(get_finsh_text(start_time))
+        yield ui_after()
+    ## ------------------------------ STREAM ------------------------------
+    elif target_type == "Stream" and not test_mode:
+        pass
+    ## ------------------------------ TEST ------------------------------
+    if test_mode and target_type == "Video":
+        mask = None
+        if use_face_parsing_mask:
+            mask = FOREGROUND_MASK_DICT.get(current_idx, None)
+        if CURRENT_FRAME is not None and isinstance(CURRENT_FRAME, np.ndarray):
+            PREVIEW = SWAP_MUKHAM.process_frame(
+                [CURRENT_FRAME[:, :, ::-1], mask]
+            )
+            gr.Info(get_finsh_text(start_time))
+            yield ui_after()
+## ------------------------------ GRADIO GUI ------------------------------
+css = """
+div.gradio-container{
+    max-width: unset !important;
+}
+footer{
+    display:none !important
+}
+#slider_row {
+  display: flex;
+  flex-wrap: wrap;
+  justify-content: space-between;
+}
+#refresh_slider {
+  flex: 0 1 20%;
+  display: flex;
+  align-items: center;
+}
+#frame_slider {
+  flex: 1 0 80%;
+  display: flex;
+  align-items: center;
+}
+"""
+WIDGET_PREVIEW_HEIGHT = 450
+with gr.Blocks(css=css, theme=gr.themes.Default()) as interface:
+    gr.Markdown("# 🗿 Swap Mukham")
+    gr.Markdown("### Single image face swapper")
+    with gr.Row():
+        with gr.Row():
+            with gr.Column(scale=0.35):
+                with gr.Tabs():
+                    with gr.TabItem("📄 Input"):
+                        swap_condition = gr.Dropdown(
+                            gv.FACE_DETECT_CONDITIONS,
+                            info="Choose which face or faces in the target image to swap.",
+                            multiselect=False,
+                            show_label=False,
+                            value=gv.FACE_DETECT_CONDITIONS[0],
+                            interactive=True,
+                        )
+                        age = gr.Number(
+                            value=25, label="Value", interactive=True, visible=False
+                        )
+                        ## ------------------------------ SOURCE IMAGE ------------------------------
+                        source_image_input = gr.Files(
+                            label="Source face", type="file", interactive=True,
+                        )
+                        ## ------------------------------ SOURCE SPECIFIC ------------------------------
+                        with gr.Box(visible=False) as specific_face:
+                            for i in range(gv.NUM_OF_SRC_SPECIFIC):
+                                idx = i + 1
+                                code = "\n"
+                                code += f"with gr.Tab(label='{idx}'):"
+                                code += "\n\twith gr.Row():"
+                                code += f"\n\t\tsrc{idx} = gr.Files(interactive=True, type='file', label='Source Face {idx}')"
+                                code += f"\n\t\ttrg{idx} = gr.Image(interactive=True, type='numpy', label='Specific Face {idx}')"
+                                exec(code)
+                        ## ------------------------------ TARGET TYPE ------------------------------
+                        with gr.Group():
+                            target_type = gr.Radio(
+                                ["Image", "Video", "Directory"],
+                                label="Target Type",
+                                value="Video",
+                            )
+                            ## ------------------------------ TARGET IMAGE ------------------------------
+                            with gr.Box(visible=False) as input_image_group:
+                                target_image_input = gr.Image(
+                                    label="Target Image",
+                                    interactive=True,
+                                    type="filepath",
+                                    height=200
+                                )
+                            ## ------------------------------ TARGET VIDEO ------------------------------
+                            with gr.Box(visible=True) as input_video_group:
+                                with gr.Column():
+                                    video_widget = gr.Text if PREFER_TEXT_WIDGET else gr.Video
+                                    video_input = video_widget(
+                                        label="Target Video", interactive=True,
+                                    )
+                                    ## ------------------------------ FRAME SELECTION ------------------------------
+                                    with gr.Accordion("Frame Selection", open=False):
+                                        use_frame_selection = gr.Checkbox(
+                                            label="Use frame selection", value=False, interactive=True,
+                                        )
+                                        frame_selection_ranges = gr.Numpy(
+                                            headers=["Start Frame", "End Frame"],
+                                            datatype=["number", "number"],
+                                            row_count=1,
+                                            col_count=(2, "fixed"),
+                                            interactive=True
+                                        )
+                            ## ------------------------------ TARGET DIRECTORY ------------------------------
+                            with gr.Box(visible=False) as input_directory_group:
+                                directory_input = gr.Text(
+                                    label="Target Image Directory", interactive=True
+                                )
+                    ## ------------------------------ TAB MODEL ------------------------------
+                    with gr.TabItem("🎚️ Model"):
+                        with gr.Accordion("Detection", open=False):
+                            face_detection_condition = gr.Dropdown(
+                                gv.SINGLE_FACE_DETECT_CONDITIONS,
+                                label="Condition",
+                                value=gv.DETECT_CONDITION,
+                                interactive=True,
+                                info="This condition is only used when multiple faces are detected on source or specific image.",
+                            )
+                            face_detection_size = gr.Number(
+                                label="Detection Size",
+                                value=gv.DETECT_SIZE,
+                                interactive=True,
+                            )
+                            face_detection_threshold = gr.Number(
+                                label="Detection Threshold",
+                                value=gv.DETECT_THRESHOLD,
+                                interactive=True,
+                            )
+                            face_scale = gr.Slider(
+                                label="Landmark Scale",
+                                minimum=0,
+                                maximum=2,
+                                value=1,
+                                interactive=True,
+                            )
+                        with gr.Accordion("Embedding/Recognition", open=True):
+                            averaging_method = gr.Dropdown(
+                                    gv.AVERAGING_METHODS,
+                                    label="Averaging Method",
+                                    value=gv.AVERAGING_METHOD,
+                                    interactive=True,
+                                )
+                            distance_slider = gr.Slider(
+                                minimum=0,
+                                maximum=2,
+                                value=0.65,
+                                interactive=True,
+                                label="Specific-Target Distance",
+                            )
+                        with gr.Accordion("Swapper", open=True):
+                            with gr.Row():
+                                swap_iteration = gr.Slider(
+                                    label="Swap Iteration",
+                                    minimum=1,
+                                    maximum=4,
+                                    value=1,
+                                    step=1,
+                                    interactive=True,
+                                )
+                    ## ------------------------------ TAB POST-PROCESS ------------------------------
+                    with gr.TabItem("🪄 Post-Process"):
+                        with gr.Row():
+                            face_enhancer_name = gr.Dropdown(
+                                gv.FACE_ENHANCER_LIST,
+                                label="Face Enhancer",
+                                value="NONE",
+                                multiselect=False,
+                                interactive=True,
+                            )
+                            face_upscaler_opacity = gr.Slider(
+                                label="Opacity",
+                                minimum=0,
+                                maximum=1,
+                                value=1,
+                                step=0.001,
+                                interactive=True,
+                            )
+                        with gr.Accordion("Face Mask", open=False):
+                            with gr.Group():
+                                with gr.Row():
+                                    use_face_parsing_mask = gr.Checkbox(
+                                        label="Enable Face Parsing",
+                                        value=False,
+                                        interactive=True,
+                                    )
+                                    parse_from_target = gr.Checkbox(
+                                        label="Parse from target",
+                                        value=False,
+                                        interactive=True,
+                                    )
+                                mask_regions = gr.Dropdown(
+                                    gv.MASK_REGIONS,
+                                    value=gv.MASK_REGIONS_DEFAULT,
+                                    multiselect=True,
+                                    label="Include",
+                                    interactive=True,
+                                )
+                        with gr.Accordion("Crop Face Bounding-Box", open=False):
+                            with gr.Group():
+                                with gr.Row():
+                                    crop_top = gr.Slider(
+                                        label="Top",
+                                        minimum=0,
+                                        maximum=511,
+                                        value=0,
+                                        step=1,
+                                        interactive=True,
+                                    )
+                                    crop_bott = gr.Slider(
+                                        label="Bottom",
+                                        minimum=0,
+                                        maximum=511,
+                                        value=511,
+                                        step=1,
+                                        interactive=True,
+                                    )
+                                with gr.Row():
+                                    crop_left = gr.Slider(
+                                        label="Left",
+                                        minimum=0,
+                                        maximum=511,
+                                        value=0,
+                                        step=1,
+                                        interactive=True,
+                                    )
+                                    crop_right = gr.Slider(
+                                        label="Right",
+                                        minimum=0,
+                                        maximum=511,
+                                        value=511,
+                                        step=1,
+                                        interactive=True,
+                                    )
+                        with gr.Row():
+                            mask_erode_amount = gr.Slider(
+                                label="Mask Erode",
+                                minimum=0,
+                                maximum=1,
+                                value=gv.MASK_ERODE_AMOUNT,
+                                step=0.001,
+                                interactive=True,
+                            )
+                            mask_blur_amount = gr.Slider(
+                                label="Mask Blur",
+                                minimum=0,
+                                maximum=1,
+                                value=gv.MASK_BLUR_AMOUNT,
+                                step=0.001,
+                                interactive=True,
+                            )
+                        use_laplacian_blending = gr.Checkbox(
+                            label="Laplacian Blending",
+                            value=True,
+                            interactive=True,
+                        )
+                    ## ------------------------------ TAB OUTPUT ------------------------------
+                    with gr.TabItem("📤 Output"):
+                        output_directory = gr.Text(
+                            label="Output Directory",
+                            value=gv.DEFAULT_OUTPUT_PATH,
+                            interactive=True,
+                        )
+                        with gr.Group():
+                            output_name = gr.Text(
+                                label="Output Name", value="Result", interactive=True
+                            )
+                            use_datetime_suffix = gr.Checkbox(
+                                label="Suffix date-time", value=True, interactive=True
+                            )
+                        with gr.Accordion("Video settings", open=True):
+                            with gr.Row():
+                                sequence_output_format = gr.Dropdown(
+                                        ["jpg", "png"],
+                                        label="Sequence format",
+                                        value="jpg",
+                                        interactive=True,
+                                    )
+                                video_quality = gr.Dropdown(
+                                    gv.VIDEO_QUALITY_LIST,
+                                    label="Quality",
+                                    value=gv.VIDEO_QUALITY,
+                                    interactive=True
+                                )
+                            keep_output_sequence = gr.Checkbox(
+                                label="Keep output sequence", value=False, interactive=True
+                            )
+                    ## ------------------------------ TAB PERFORMANCE ------------------------------
+                    with gr.TabItem("🛠️ Performance"):
+                        preview_resolution = gr.Dropdown(
+                            gv.RESOLUTIONS,
+                            label="Preview Resolution",
+                            value="Original",
+                            interactive=True,
+                        )
+                        number_of_threads = gr.Number(
+                            step=1,
+                            interactive=True,
+                            label="Max number of threads",
+                            value=gv.MAX_THREADS,
+                            minimum=1,
+                        )
+                        with gr.Box():
+                            with gr.Column():
+                                with gr.Row():
+                                    face_analyser_device = gr.Radio(
+                                        DEVICE_LIST,
+                                        label="Face detection & recognition",
+                                        value=DEVICE,
+                                        interactive=True,
+                                    )
+                                    face_analyser_device_submit = gr.Button("Apply")
+                                with gr.Row():
+                                    face_swapper_device = gr.Radio(
+                                        DEVICE_LIST,
+                                        label="Face swapper",
+                                        value=DEVICE,
+                                        interactive=True,
+                                    )
+                                    face_swapper_device_submit = gr.Button("Apply")
+                                with gr.Row():
+                                    face_parser_device = gr.Radio(
+                                        DEVICE_LIST,
+                                        label="Face parsing",
+                                        value=DEVICE,
+                                        interactive=True,
+                                    )
+                                    face_parser_device_submit = gr.Button("Apply")
+                                with gr.Row():
+                                    face_upscaler_device = gr.Radio(
+                                        DEVICE_LIST,
+                                        label="Face upscaler",
+                                        value=DEVICE,
+                                        interactive=True,
+                                    )
+                                    face_upscaler_device_submit = gr.Button("Apply")
+                                face_analyser_device_submit.click(
+                                    fn=lambda d: SWAP_MUKHAM.load_face_analyser(
+                                        device=d
+                                    ),
+                                    inputs=[face_analyser_device],
+                                )
+                                face_swapper_device_submit.click(
+                                    fn=lambda d: SWAP_MUKHAM.load_face_swapper(
+                                        device=d
+                                    ),
+                                    inputs=[face_swapper_device],
+                                )
+                                face_parser_device_submit.click(
+                                    fn=lambda d: SWAP_MUKHAM.load_face_parser(device=d),
+                                    inputs=[face_parser_device],
+                                )
+                                face_upscaler_device_submit.click(
+                                    fn=lambda n, d: SWAP_MUKHAM.load_face_upscaler(
+                                        n, device=d
+                                    ),
+                                    inputs=[face_enhancer_name, face_upscaler_device],
+                                )
+            ## ------------------------------ SWAP, CANCEL, FRAME SLIDER ------------------------------
+            with gr.Column(scale=0.65):
+                with gr.Row():
+                    swap_button = gr.Button("✨ Swap", variant="primary")
+                    cancel_button = gr.Button("⛔ Cancel")
+                    collect_faces = gr.Button("👨 Collect Faces")
+                    test_swap = gr.Button("🧪 Test Swap")
+                with gr.Box() as frame_slider_box:
+                    with gr.Row(elem_id="slider_row", equal_height=True):
+                        set_slider_range_btn = gr.Button(
+                            "Set Range", interactive=True, elem_id="refresh_slider"
+                        )
+                        frame_slider = gr.Slider(
+                            label="Frame",
+                            minimum=0,
+                            maximum=1,
+                            value=0,
+                            step=1,
+                            interactive=True,
+                            elem_id="frame_slider",
+                        )
+                ## ------------------------------ PREVIEW ------------------------------
+                with gr.Tabs():
+                    with gr.TabItem("Preview"):
+                        preview_image = gr.Image(
+                            label="Preview", type="numpy", interactive=False, height=WIDGET_PREVIEW_HEIGHT,
+                        )
+                        preview_video = gr.Video(
+                            label="Output", interactive=False, visible=False, height=WIDGET_PREVIEW_HEIGHT,
+                        )
+                        preview_enabled_text = gr.Markdown(
+                            "Disable paint foreground to preview !", visible=False
+                        )
+                        with gr.Row():
+                            output_directory_button = gr.Button(
+                                "📂", interactive=False, visible=not gv.USE_COLAB
+                            )
+                            output_video_button = gr.Button(
+                                "🎬", interactive=False, visible=not gv.USE_COLAB
+                            )
+                            output_directory_button.click(
+                                lambda: open_directory(path=WORKSPACE),
+                                inputs=None,
+                                outputs=None,
+                            )
+                            output_video_button.click(
+                                lambda: open_directory(path=OUTPUT_FILE),
+                                inputs=None,
+                                outputs=None,
+                            )
+                    ## ------------------------------ FOREGROUND MASK ------------------------------
+                    with gr.TabItem("Paint Foreground"):
+                        with gr.Box() as fg_mask_group:
+                            with gr.Row():
+                                with gr.Row():
+                                    use_foreground_mask = gr.Checkbox(
+                                    label="Use foreground mask", value=False, interactive=True)
+                                fg_mask_softness = gr.Slider(
+                                    label="Mask Softness",
+                                    minimum=0,
+                                    maximum=200,
+                                    value=1,
+                                    step=1,
+                                    interactive=True,
+                                )
+                                add_fg_mask_btn = gr.Button("Add", interactive=True)
+                                del_fg_mask_btn = gr.Button("Del", interactive=True)
+                            img_fg_mask = gr.Image(
+                                label="Paint Mask",
+                                tool="sketch",
+                                interactive=True,
+                                type="numpy",
+                                height=WIDGET_PREVIEW_HEIGHT,
+                            )
+                    ## ------------------------------ COLLECT FACE ------------------------------
+                    with gr.TabItem("Collected Faces"):
+                        collected_faces = gr.Gallery(
+                            label="Faces",
+                            show_label=False,
+                            elem_id="gallery",
+                            columns=[6], rows=[6], object_fit="contain", height=WIDGET_PREVIEW_HEIGHT,
+                        )
+    ## ------------------------------ FOOTER LINKS ------------------------------
+    with gr.Row(variant='panel'):
+        gr.HTML(
+            """
+            <div style="display: flex; flex-direction: row; justify-content: center;">
+                <h3 style="margin-right: 10px;"><a href="https://github.com/sponsors/harisreedhar" style="text-decoration: none;">🤝 Sponsor</a></h3>
+                <h3 style="margin-right: 10px;"><a href="https://github.com/harisreedhar/Swap-Mukham" style="text-decoration: none;">👨‍💻 Source</a></h3>
+                <h3 style="margin-right: 10px;"><a href="https://github.com/harisreedhar/Swap-Mukham#disclaimer" style="text-decoration: none;">⚠️ Disclaimer</a></h3>
+                <h3 style="margin-right: 10px;"><a href="https://colab.research.google.com/github/harisreedhar/Swap-Mukham/blob/main/swap_mukham_colab.ipynb" style="text-decoration: none;">🌐 Colab</a></h3>
+                <h3><a href="https://github.com/harisreedhar/Swap-Mukham#acknowledgements" style="text-decoration: none;">🤗 Acknowledgements</a></h3>
+            </div>
+            """
+        )
+    ## ------------------------------ GRADIO EVENTS ------------------------------
+    def on_target_type_change(value):
+        visibility = {
+            "Image": (True, False, False, False, True, False, False, False),
+            "Video": (False, True, False, True, True, True, True, True),
+            "Directory": (False, False, True, False, False, False, False, False),
+            "Stream": (False, False, True, False, False, False, False, False),
+        }
+        return list(gr.update(visible=i) for i in visibility[value])
+    target_type.change(
+        on_target_type_change,
+        inputs=[target_type],
+        outputs=[
+            input_image_group,
+            input_video_group,
+            input_directory_group,
+            frame_slider_box,
+            fg_mask_group,
+            add_fg_mask_btn,
+            del_fg_mask_btn,
+            test_swap,
+        ],
+    )
+    target_image_input.change(
+        lambda inp: gr.update(value=inp),
+        inputs=[target_image_input],
+        outputs=[img_fg_mask]
+    )
+    def on_swap_condition_change(value):
+        visibility = {
+            "age less than": (True, False, True),
+            "age greater than": (True, False, True),
+            "specific face": (False, True, False),
+        }
+        return tuple(
+            gr.update(visible=i) for i in visibility.get(value, (False, False, True))
+        )
+    swap_condition.change(
+        on_swap_condition_change,
+        inputs=[swap_condition],
+        outputs=[age, specific_face, source_image_input],
+    )
+    def on_set_slider_range(video_path):
+        if video_path is None or not os.path.exists(video_path):
+            gr.Info("Check video path")
+        else:
+            try:
+                cap = cv2.VideoCapture(video_path)
+                fps = cap.get(cv2.CAP_PROP_FPS)
+                total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+                cap.release()
+                if total_frames > 0:
+                    total_frames -= 1
+                    return gr.Slider.update(
+                        minimum=0, maximum=total_frames, value=0, interactive=True
+                    )
+                gr.Info("Error fetching video")
+            except:
+                gr.Info("Error fetching video")
+    set_slider_range_event = set_slider_range_btn.click(
+        on_set_slider_range,
+        inputs=[video_input],
+        outputs=[frame_slider],
+    )
+    def update_preview(video_path, frame_index, use_foreground_mask, resolution):
+        if not os.path.exists(video_path):
+            yield gr.update(value=None), gr.update(value=None), gr.update(visible=False)
+        else:
+            frame = get_single_video_frame(video_path, frame_index)
+            if frame is not None:
+                if use_foreground_mask:
+                    overlayed_image = frame
+                    if frame_index in FOREGROUND_MASK_DICT.keys():
+                        mask = FOREGROUND_MASK_DICT.get(frame_index, None)
+                        if mask is not None:
+                            overlayed_image = image_mask_overlay(frame, mask)
+                        yield gr.update(value=None), gr.update(value=None), gr.update(visible=False)  # clear previous mask
+                    frame = resize_image_by_resolution(frame, resolution)
+                    yield gr.update(value=frame[:, :, ::-1]), gr.update(
+                        value=overlayed_image[:, :, ::-1], visible=True
+                    ), gr.update(visible=False)
+                else:
+                    frame = resize_image_by_resolution(frame, resolution)
+                    yield gr.update(value=frame[:, :, ::-1]), gr.update(value=None), gr.update(
+                        visible=False
+                    )
+                global CURRENT_FRAME
+                CURRENT_FRAME = frame
+    frame_slider_event = frame_slider.change(
+        fn=update_preview,
+        inputs=[video_input, frame_slider, use_foreground_mask, preview_resolution],
+        outputs=[preview_image, img_fg_mask, preview_video],
+        show_progress=False,
+    )
+    def add_foreground_mask(fg, frame_index, softness):
+        if fg is not None:
+            mask = fg.get("mask", None)
+            if mask is not None:
+                alpha_rgb = cv2.cvtColor(mask, cv2.COLOR_BGRA2RGB)
+                alpha_rgb = cv2.blur(alpha_rgb, (softness, softness))
+                FOREGROUND_MASK_DICT[frame_index] = alpha_rgb.astype("float32") / 255.0
+                gr.Info(f"saved mask index {frame_index}")
+    add_foreground_mask_event = add_fg_mask_btn.click(
+        fn=add_foreground_mask,
+        inputs=[img_fg_mask, frame_slider, fg_mask_softness],
+    ).then(
+        fn=update_preview,
+        inputs=[video_input, frame_slider, use_foreground_mask, preview_resolution],
+        outputs=[preview_image, img_fg_mask, preview_video],
+        show_progress=False,
+    )
+    def delete_foreground_mask(frame_index):
+        if frame_index in FOREGROUND_MASK_DICT.keys():
+            FOREGROUND_MASK_DICT.pop(frame_index)
+            gr.Info(f"Deleted mask index {frame_index}")
+    del_custom_mask_event = del_fg_mask_btn.click(
+        fn=delete_foreground_mask, inputs=[frame_slider]
+    ).then(
+        fn=update_preview,
+        inputs=[video_input, frame_slider, use_foreground_mask, preview_resolution],
+        outputs=[preview_image, img_fg_mask, preview_video],
+        show_progress=False,
+    )
+    def get_collected_faces(image):
+        if image is not None:
+            gr.Info(f"Collecting faces...")
+            faces = SWAP_MUKHAM.collect_heads(image)
+            COLLECTED_FACES.extend(faces)
+            yield COLLECTED_FACES
+            gr.Info(f"Collected {len(faces)} faces")
+    collect_faces.click(get_collected_faces, inputs=[preview_image], outputs=[collected_faces])
+    src_specific_inputs = []
+    gen_variable_txt = ",".join(
+        [f"src{i+1}" for i in range(gv.NUM_OF_SRC_SPECIFIC)]
+        + [f"trg{i+1}" for i in range(gv.NUM_OF_SRC_SPECIFIC)]
+    )
+    exec(f"src_specific_inputs = ({gen_variable_txt})")
+    test_mode = gr.Checkbox(value=False, visible=False)
+    swap_inputs = [
+        test_mode,
+        target_type,
+        target_image_input,
+        video_input,
+        directory_input,
+        source_image_input,
+        use_foreground_mask,
+        img_fg_mask,
+        fg_mask_softness,
+        output_directory,
+        output_name,
+        use_datetime_suffix,
+        sequence_output_format,
+        keep_output_sequence,
+        swap_condition,
+        age,
+        distance_slider,
+        face_enhancer_name,
+        face_upscaler_opacity,
+        use_face_parsing_mask,
+        parse_from_target,
+        mask_regions,
+        mask_blur_amount,
+        mask_erode_amount,
+        swap_iteration,
+        face_scale,
+        use_laplacian_blending,
+        crop_top,
+        crop_bott,
+        crop_left,
+        crop_right,
+        frame_slider,
+        number_of_threads,
+        use_frame_selection,
+        frame_selection_ranges,
+        video_quality,
+        face_detection_condition,
+        face_detection_size,
+        face_detection_threshold,
+        averaging_method,
+        *src_specific_inputs,
+    ]
+    swap_outputs = [
+        preview_image,
+        output_directory_button,
+        output_video_button,
+        preview_video,
+    ]
+    swap_event = swap_button.click(fn=process, inputs=swap_inputs, outputs=swap_outputs)
+    test_swap_settings = swap_inputs
+    test_swap_settings[0] = gr.Checkbox(value=True, visible=False)
+    test_swap_event = test_swap.click(
+        fn=update_preview,
+        inputs=[video_input, frame_slider, use_foreground_mask, preview_resolution],
+        outputs=[preview_image, preview_video],
+        show_progress=False,
+    ).then(
+        fn=process, inputs=test_swap_settings, outputs=swap_outputs, show_progress=True
+    )
+    def stop_running():
+        global IS_RUNNING
+        IS_RUNNING = False
+        print("[ Process cancelled ]")
+        gr.Info("Process cancelled")
+    cancel_button.click(
+        fn=stop_running,
+        inputs=None,
+        cancels=[swap_event, set_slider_range_event, test_swap_event],
+        show_progress=True,
+    )
+if __name__ == "__main__":
+    if gv.USE_COLAB:
+        print("Running in colab mode")
+    interface.queue(concurrency_count=2, max_size=20).launch(share=gv.USE_COLAB)

assets/images/loading.gif ADDED Viewed

assets/images/logo.png ADDED Viewed

assets/pretrained_models/readme.md ADDED Viewed

	@@ -0,0 +1 @@


1	+

change_log.md ADDED Viewed

	@@ -0,0 +1,5 @@

+# Change-log
+## 30/07/2023
+- change existing nsfw filter to open-nsfw from yahoo
+- Add codeformer support

default_paths.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import os
+FFMPEG_PATH = "./ffmpeg/ffmpeg" if os.path.exists("./ffmpeg/ffmpeg") else None
+INSWAPPER_PATH = "./assets/pretrained_models/inswapper_128.onnx"
+FACE_PARSER_PATH = "./assets/pretrained_models/faceparser.onnx"
+ARCFACE_PATH = "./assets/pretrained_models/w600k_r50.onnx"
+RETINAFACE_PATH = "./assets/pretrained_models/det_10g.onnx"
+OPEN_NSFW_PATH = "./assets/pretrained_models/open-nsfw.onnx"
+GENDERAGE_PATH = "./assets/pretrained_models/gender_age.onnx"
+CODEFORMER_PATH = "./assets/pretrained_models/codeformer.onnx"
+GFPGAN_V14_PATH = "./assets/pretrained_models/GFPGANv1.4.onnx"
+GFPGAN_V13_PATH = "./assets/pretrained_models/GFPGANv1.3.onnx"
+GFPGAN_V12_PATH = "./assets/pretrained_models/GFPGANv1.2.onnx"
+GPEN_BFR_512_PATH = "./assets/pretrained_models/GPEN-BFR-512.onnx"
+GPEN_BFR_256_PATH = "./assets/pretrained_models/GPEN-BFR-256.onnx"
+RESTOREFORMER_PATH = "./assets/pretrained_models/restoreformer.onnx"

face_analyser.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import os
+import cv2
+import threading
+import numpy as np
+from tqdm import tqdm
+import concurrent.futures
+import default_paths as dp
+from dataclasses import dataclass
+from utils.arcface import ArcFace
+from utils.gender_age import GenderAge
+from utils.retinaface import RetinaFace
+cache = {}
+@dataclass
+class Face:
+    bbox: np.ndarray
+    kps: np.ndarray
+    det_score: float
+    embedding: np.ndarray
+    gender: int
+    age: int
+    def __getitem__(self, key):
+        return getattr(self, key)
+    def __setitem__(self, key, value):
+        if hasattr(self, key):
+            setattr(self, key, value)
+        else:
+            raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{key}'")
+single_face_detect_conditions = [
+    "best detection",
+    "left most",
+    "right most",
+    "top most",
+    "bottom most",
+    "middle",
+    "biggest",
+    "smallest",
+]
+multi_face_detect_conditions = [
+    "all face",
+    "specific face",
+    "age less than",
+    "age greater than",
+    "all male",
+    "all female"
+]
+face_detect_conditions =  multi_face_detect_conditions + single_face_detect_conditions
+def get_single_face(faces, method="best detection"):
+    total_faces = len(faces)
+    if total_faces == 0:
+        return None
+    if total_faces == 1:
+        return faces[0]
+    if method == "best detection":
+        return sorted(faces, key=lambda face: face["det_score"])[-1]
+    elif method == "left most":
+        return sorted(faces, key=lambda face: face["bbox"][0])[0]
+    elif method == "right most":
+        return sorted(faces, key=lambda face: face["bbox"][0])[-1]
+    elif method == "top most":
+        return sorted(faces, key=lambda face: face["bbox"][1])[0]
+    elif method == "bottom most":
+        return sorted(faces, key=lambda face: face["bbox"][1])[-1]
+    elif method == "middle":
+        return sorted(faces, key=lambda face: (
+                (face["bbox"][0] + face["bbox"][2]) / 2 - 0.5) ** 2 +
+                ((face["bbox"][1] + face["bbox"][3]) / 2 - 0.5) ** 2)[len(faces) // 2]
+    elif method == "biggest":
+        return sorted(faces, key=lambda face: (face["bbox"][2] - face["bbox"][0]) * (face["bbox"][3] - face["bbox"][1]))[-1]
+    elif method == "smallest":
+        return sorted(faces, key=lambda face: (face["bbox"][2] - face["bbox"][0]) * (face["bbox"][3] - face["bbox"][1]))[0]
+def filter_face_by_age(faces, age, method="age less than"):
+    if method == "age less than":
+        return [face for face in faces if face["age"] < age]
+    elif method == "age greater than":
+        return [face for face in faces if face["age"] > age]
+    elif method == "age equals to":
+        return [face for face in faces if face["age"] == age]
+def cosine_distance(a, b):
+    a /= np.linalg.norm(a)
+    b /= np.linalg.norm(b)
+    return 1 - np.dot(a, b)
+def is_similar_face(face1, face2, threshold=0.6):
+    distance = cosine_distance(face1["embedding"], face2["embedding"])
+    return distance < threshold
+class AnalyseFace:
+    def __init__(self, provider=["CPUExecutionProvider"], session_options=None):
+        self.detector = RetinaFace(model_file=dp.RETINAFACE_PATH, provider=provider, session_options=session_options)
+        self.recognizer = ArcFace(model_file=dp.ARCFACE_PATH, provider=provider, session_options=session_options)
+        self.gender_age = GenderAge(model_file=dp.GENDERAGE_PATH, provider=provider, session_options=session_options)
+        self.detect_condition = "best detection"
+        self.detection_size = (640, 640)
+        self.detection_threshold = 0.5
+    def analyser(self, img, skip_task=[]):
+        bboxes, kpss = self.detector.detect(img, input_size=self.detection_size, det_thresh=self.detection_threshold)
+        faces = []
+        for i in range(bboxes.shape[0]):
+            feat, gender, age = None, None, None
+            bbox = bboxes[i, 0:4]
+            det_score = bboxes[i, 4]
+            kps = None
+            if kpss is not None:
+                kps = kpss[i]
+            if 'embedding' not in skip_task:
+                feat = self.recognizer.get(img, kpss[i])
+            if 'gender_age' not in skip_task:
+                gender, age = self.gender_age.predict(img, kpss[i])
+            face = Face(bbox=bbox, kps=kps, det_score=det_score, embedding=feat, gender=gender, age=age)
+            faces.append(face)
+        return faces
+    def get_faces(self, image, scale=1., skip_task=[]):
+        if isinstance(image, str):
+            image = cv2.imread(image)
+        faces = self.analyser(image, skip_task=skip_task)
+        if scale != 1: # landmark-scale
+            for i, face in enumerate(faces):
+                landmark = face['kps']
+                center = np.mean(landmark, axis=0)
+                landmark = center + (landmark - center) * scale
+                faces[i]['kps'] = landmark
+        return faces
+    def get_face(self, image, scale=1., skip_task=[]):
+        faces = self.get_faces(image, scale=scale, skip_task=skip_task)
+        return get_single_face(faces, method=self.detect_condition)
+    def get_averaged_face(self, images, method="mean"):
+        if not isinstance(images, list):
+            images = [images]
+        face = self.get_face(images[0], scale=1., skip_task=[])
+        if len(images) > 1:
+            embeddings = [face['embedding']]
+            for image in images[1:]:
+                face = self.get_face(image, scale=1., skip_task=[])
+                embeddings.append(face['embedding'])
+            if method == "mean":
+                avg_embedding = np.mean(embeddings, axis=0)
+            elif method == "median":
+                avg_embedding = np.median(embeddings, axis=0)
+            face['embedding'] = avg_embedding
+        return face

face_parsing.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import cv2
+import onnxruntime
+import numpy as np
+mask_regions = {
+    "Background":0,
+    "Skin":1,
+    "L-Eyebrow":2,
+    "R-Eyebrow":3,
+    "L-Eye":4,
+    "R-Eye":5,
+    "Eye-G":6,
+    "L-Ear":7,
+    "R-Ear":8,
+    "Ear-R":9,
+    "Nose":10,
+    "Mouth":11,
+    "U-Lip":12,
+    "L-Lip":13,
+    "Neck":14,
+    "Neck-L":15,
+    "Cloth":16,
+    "Hair":17,
+    "Hat":18
+}
+class FaceParser:
+    def __init__(self, model_path=None, provider=['CPUExecutionProvider'], session_options=None):
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(model_path, sess_options=self.session_options, providers=provider)
+        self.mean = np.array([0.485, 0.456, 0.406]).reshape((1, 1, 3))
+        self.std = np.array([0.229, 0.224, 0.225]).reshape((1, 1, 3))
+    def parse(self, img, regions=[1,2,3,4,5,10,11,12,13]):
+        img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_LINEAR)
+        img = img.astype(np.float32)[:,:,::-1] / 255.0
+        img = (img - self.mean) / self.std
+        img = np.expand_dims(img.transpose((2, 0, 1)), axis=0).astype(np.float32)
+        out = self.session.run(None, {'input':img})[0]
+        out = out.squeeze(0).argmax(0)
+        out = np.isin(out, regions).astype('float32')
+        return out.clip(0, 1)
+def mask_regions_to_list(values):
+    out_ids = []
+    for value in values:
+        if value in mask_regions.keys():
+            out_ids.append(mask_regions.get(value))
+    return out_ids

face_swapper.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import time
+import onnx
+import cv2
+import onnxruntime
+import numpy as np
+from onnx import numpy_helper
+from numpy.linalg import norm as l2norm
+from utils.face_alignment import norm_crop2
+class Inswapper():
+    def __init__(self, model_file=None, provider=['CPUExecutionProvider'], session_options=None):
+        self.model_file = model_file
+        model = onnx.load(self.model_file)
+        graph = model.graph
+        self.emap = numpy_helper.to_array(graph.initializer[-1])
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=provider)
+    def forward(self, frame, target, source, n_pass=1):
+        trg, matrix = norm_crop2(frame, target['kps'], 128)
+        latent = source['embedding'].reshape((1, -1))
+        latent = np.dot(latent, self.emap)
+        latent /= np.linalg.norm(latent)
+        blob = trg.astype('float32') / 255
+        blob = blob[:, :, ::-1]
+        blob = np.expand_dims(blob, axis=0).transpose(0, 3, 1, 2)
+        for _ in range(max(int(n_pass),1)):
+            blob = self.session.run(['output'], {'target': blob, 'source': latent})[0]
+        out = blob[0].transpose((1, 2, 0))
+        out = (out * 255).clip(0,255)
+        out = out.astype('uint8')[:, :, ::-1]
+        del blob, latent
+        return trg, out, matrix

face_upscaler.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import os
+import cv2
+import default_paths as dp
+from upscaler.GPEN import GPEN
+from upscaler.GFPGAN import GFPGAN
+from upscaler.codeformer import CodeFormer
+from upscaler.restoreformer import RestoreFormer
+def gfpgan_runner(img, model):
+    img = model.enhance(img)
+    return img
+def codeformer_runner(img, model):
+    img = model.enhance(img, w=0.9)
+    return img
+def gpen_runner(img, model):
+    img = model.enhance(img)
+    return img
+def restoreformer_runner(img, model):
+    img = model.enhance(img)
+    return img
+supported_upscalers = {
+    "CodeFormer": (dp.CODEFORMER_PATH, codeformer_runner),
+    "GFPGANv1.4": (dp.GFPGAN_V14_PATH, gfpgan_runner),
+    "GFPGANv1.3": (dp.GFPGAN_V13_PATH, gfpgan_runner),
+    "GFPGANv1.2": (dp.GFPGAN_V12_PATH, gfpgan_runner),
+    "GPEN-BFR-512": (dp.GPEN_BFR_512_PATH, gpen_runner),
+    "GPEN-BFR-256": (dp.GPEN_BFR_256_PATH, gpen_runner),
+    "RestoreFormer": (dp.RESTOREFORMER_PATH, gpen_runner),
+}
+cv2_upscalers = ["LANCZOS4", "CUBIC", "NEAREST"]
+def get_available_upscalers_names():
+    available = []
+    for name, data in supported_upscalers.items():
+        if os.path.exists(data[0]):
+            available.append(name)
+    return available
+def load_face_upscaler(name='GFPGAN', provider=["CPUExecutionProvider"], session_options=None):
+    assert name in get_available_upscalers_names() + cv2_upscalers, f"Face upscaler {name} unavailable."
+    if name in supported_upscalers.keys():
+        model_path, model_runner = supported_upscalers.get(name)
+    if name == 'CodeFormer':
+        model = CodeFormer(model_path=model_path, provider=provider, session_options=session_options)
+    elif name.startswith('GFPGAN'):
+        model = GFPGAN(model_path=model_path, provider=provider, session_options=session_options)
+    elif name.startswith('GPEN'):
+        model = GPEN(model_path=model_path, provider=provider, session_options=session_options)
+    elif name == "RestoreFormer":
+        model = RestoreFormer(model_path=model_path, provider=provider, session_options=session_options)
+    elif name == 'LANCZOS4':
+        model = None
+        model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_LANCZOS4)
+    elif name == 'CUBIC':
+        model = None
+        model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_CUBIC)
+    elif name == 'NEAREST':
+        model = None
+        model_runner = lambda img, _: cv2.resize(img, (512,512), interpolation=cv2.INTER_NEAREST)
+    else:
+        model = None
+    return (model, model_runner)

global_variables.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+from face_parsing import mask_regions
+from utils.image import resolution_map
+from face_upscaler import get_available_upscalers_names, cv2_upscalers
+from face_analyser import single_face_detect_conditions, face_detect_conditions
+DEFAULT_OUTPUT_PATH = os.getcwd()
+MASK_BLUR_AMOUNT = 0.1
+MASK_ERODE_AMOUNT = 0.15
+MASK_REGIONS_DEFAULT = ["Skin", "R-Eyebrow", "L-Eyebrow", "L-Eye", "R-Eye", "Nose", "Mouth", "L-Lip", "U-Lip"]
+MASK_REGIONS = list(mask_regions.keys())
+NSFW_DETECTOR = None
+FACE_ENHANCER_LIST = ["NONE"]
+FACE_ENHANCER_LIST.extend(get_available_upscalers_names())
+FACE_ENHANCER_LIST.extend(cv2_upscalers)
+RESOLUTIONS = list(resolution_map.keys())
+SINGLE_FACE_DETECT_CONDITIONS = single_face_detect_conditions
+FACE_DETECT_CONDITIONS = face_detect_conditions
+DETECT_CONDITION = "best detection"
+DETECT_SIZE = 640
+DETECT_THRESHOLD = 0.6
+NUM_OF_SRC_SPECIFIC = 10
+MAX_THREADS = 2
+VIDEO_QUALITY_LIST = ["poor", "low", "medium", "high", "best"]
+VIDEO_QUALITY = "high"
+AVERAGING_METHODS = ["mean", "median"]
+AVERAGING_METHOD = "mean"

nsfw_checker/LICENSE.md ADDED Viewed

	@@ -0,0 +1,11 @@

+Copyright 2016, Yahoo Inc.
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

nsfw_checker/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . opennsfw import NSFWChecker

nsfw_checker/opennsfw.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import cv2
+import onnx
+import onnxruntime
+import numpy as np
+from tqdm import tqdm
+# https://github.com/yahoo/open_nsfw
+def prepare_image(img):
+    img = cv2.resize(img, (224,224)).astype('float32')
+    img -= np.array([104, 117, 123], dtype=np.float32)
+    img = np.expand_dims(img, axis=0)
+    return img
+class NSFWChecker:
+    def __init__(self, model_path=None, provider=["CPUExecutionProvider"], session_options=None):
+        model = onnx.load(model_path)
+        self.input_name = model.graph.input[0].name
+        self.session_options = session_options
+        if self.session_options == None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(model_path, sess_options=self.session_options, providers=provider)
+    def check_image(self, image, threshold=0.9):
+        if isinstance(image, str):
+            image = cv2.imread(image)
+        img = prepare_image(image)
+        score = self.session.run(None, {self.input_name:img})[0][0][1]
+        if score >= threshold:
+            return True
+        return False
+    def check_video(self, video_path, threshold=0.9, max_frames=100):
+        cap = cv2.VideoCapture(video_path)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        max_frames = min(total_frames, max_frames)
+        indexes = np.arange(total_frames, dtype=int)
+        shuffled_indexes = np.random.permutation(indexes)[:max_frames]
+        for idx in tqdm(shuffled_indexes, desc="Checking"):
+            cap.set(cv2.CAP_PROP_POS_FRAMES, int(idx))
+            valid_frame, frame = cap.read()
+            if valid_frame:
+                img = prepare_image(frame)
+                score = self.session.run(None, {self.input_name:img})[0][0][1]
+                if score >= threshold:
+                    cap.release()
+                    return True
+        cap.release()
+        return False
+    def check_image_paths(self, image_paths, threshold=0.9, max_frames=100):
+        total_frames = len(image_paths)
+        max_frames = min(total_frames, max_frames)
+        indexes = np.arange(total_frames, dtype=int)
+        shuffled_indexes = np.random.permutation(indexes)[:max_frames]
+        for idx in tqdm(shuffled_indexes, desc="Checking"):
+            frame = cv2.imread(image_paths[idx])
+            img = prepare_image(frame)
+            score = self.session.run(None, {self.input_name:img})[0][0][1]
+            if score >= threshold:
+                return True
+        return False

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=3.40
+numpy>=1.25.2
+opencv-python>=4.7.0.72
+opencv-python-headless>=4.7.0.72
+onnx==1.14.0
+onnxruntime==1.15.0

swap_mukham.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import cv2
+import numpy as np
+import default_paths as dp
+from utils.device import get_device_and_provider
+from utils.face_alignment import get_cropped_head
+from utils.image import paste_to_whole, mix_two_image
+from face_swapper import Inswapper
+from face_parsing import FaceParser
+from face_upscaler import get_available_upscalers_names, cv2_upscalers, load_face_upscaler
+from face_analyser import AnalyseFace, single_face_detect_conditions, face_detect_conditions, get_single_face, is_similar_face
+from nsfw_checker import NSFWChecker
+get_device_name = lambda x: x.lower().replace("executionprovider", "")
+class SwapMukham:
+    def __init__(self, device='cpu'):
+        self.load_nsfw_detector(device=device)
+        self.load_face_swapper(device=device)
+        self.load_face_analyser(device=device)
+        # self.load_face_parser(device=device)
+        # self.load_face_upscaler(device=device)
+        self.face_parser = None
+        self.face_upscaler = None
+        self.face_upscaler_name = ""
+    def set_values(self, args):
+        self.age = args.get('age', 0)
+        self.detect_condition = args.get('detect_condition', "left most")
+        self.similarity = args.get('similarity', 0.6)
+        self.swap_condition = args.get('swap_condition', 'left most')
+        self.face_scale = args.get('face_scale', 1.0)
+        self.num_of_pass = args.get('num_of_pass', 1)
+        self.mask_crop_values = args.get('mask_crop_values', (0,0,0,0))
+        self.mask_erode_amount = args.get('mask_erode_amount', 0.1)
+        self.mask_blur_amount = args.get('mask_blur_amount', 0.1)
+        self.use_laplacian_blending = args.get('use_laplacian_blending', False)
+        self.use_face_parsing = args.get('use_face_parsing', False)
+        self.face_parse_regions = args.get('face_parse_regions', [1,2,3,4,5,10,11,12,13])
+        self.face_upscaler_opacity = args.get('face_upscaler_opacity', 1.)
+        self.parse_from_target = args.get('parse_from_target', False)
+        self.averaging_method = args.get('averaging_method', 'mean')
+        self.analyser.detection_threshold = args.get('face_detection_threshold', 0.5)
+        self.analyser.detection_size = args.get('face_detection_size', (640, 640))
+        self.analyser.detect_condition = args.get('face_detection_condition', 'best detection')
+    def load_nsfw_detector(self, device='cpu'):
+        device, provider, options = get_device_and_provider(device=device)
+        self.nsfw_detector = NSFWChecker(model_path=dp.OPEN_NSFW_PATH, provider=provider, session_options=options)
+        _device = get_device_name(self.nsfw_detector.session.get_providers()[0])
+        print(f"[{_device}] NSFW detector model loaded.")
+    def load_face_swapper(self, device='cpu'):
+        device, provider, options = get_device_and_provider(device=device)
+        self.swapper = Inswapper(model_file=dp.INSWAPPER_PATH, provider=provider, session_options=options)
+        _device = get_device_name(self.swapper.session.get_providers()[0])
+        print(f"[{_device}] Face swapper model loaded.")
+    def load_face_analyser(self, device='cpu'):
+        device, provider, options = get_device_and_provider(device=device)
+        self.analyser = AnalyseFace(provider=provider, session_options=options)
+        _device_d = get_device_name(self.analyser.detector.session.get_providers()[0])
+        print(f"[{_device_d}] Face detection model loaded.")
+        _device_r = get_device_name(self.analyser.recognizer.session.get_providers()[0])
+        print(f"[{_device_r}] Face recognition model loaded.")
+        _device_g = get_device_name(self.analyser.gender_age.session.get_providers()[0])
+        print(f"[{_device_g}] Gender & Age detection model loaded.")
+    def load_face_parser(self, device='cpu'):
+        device, provider, options = get_device_and_provider(device=device)
+        self.face_parser = FaceParser(model_path=dp.FACE_PARSER_PATH, provider=provider, session_options=options)
+        _device = get_device_name(self.face_parser.session.get_providers()[0])
+        print(f"[{_device}] Face parsing model loaded.")
+    def load_face_upscaler(self, name, device='cpu'):
+        device, provider, options = get_device_and_provider(device=device)
+        if name in get_available_upscalers_names():
+            self.face_upscaler = load_face_upscaler(name=name, provider=provider, session_options=options)
+            self.face_upscaler_name = name
+            _device = get_device_name(self.face_upscaler[0].session.get_providers()[0])
+            print(f"[{_device}] Face upscaler model ({name}) loaded.")
+        else:
+            self.face_upscaler_name = ""
+            self.face_upscaler = None
+    def collect_heads(self, frame):
+        faces = self.analyser.get_faces(frame, skip_task=['embedding', 'gender_age'])
+        return [get_cropped_head(frame, face.kps) for face in faces if face["det_score"] > 0.5]
+    def analyse_source_faces(self, source_specific):
+        analysed_source_specific = []
+        for i, (source, specific) in enumerate(source_specific):
+            if source is not None:
+                analysed_source = self.analyser.get_averaged_face(source, method=self.averaging_method)
+                if specific is not None:
+                    analysed_specific = self.analyser.get_face(specific)
+                else:
+                    analysed_specific = None
+                analysed_source_specific.append((analysed_source, analysed_specific))
+        self.analysed_source_specific = analysed_source_specific
+    def process_frame(self, data):
+        frame, custom_mask = data
+        if len(frame.shape) == 2 or (len(frame.shape) == 3 and frame.shape[2] == 1):
+            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR)
+        alpha = None
+        if frame.shape[2] == 4:
+            alpha = frame[:, :, 3]
+            frame = frame[:, :, :3]
+        _frame = frame.copy()
+        condition = self.swap_condition
+        skip_task = []
+        if condition != "specific face":
+            skip_task.append('embedding')
+        if condition not in ['age less than', 'age greater than', 'all male', 'all female']:
+            skip_task.append('gender_age')
+        analysed_target_faces = self.analyser.get_faces(frame, scale=self.face_scale, skip_task=skip_task)
+        for analysed_target in analysed_target_faces:
+            if (condition == "all face" or
+                (condition == "age less than" and analysed_target["age"] <= self.age) or
+                (condition == "age greater than" and analysed_target["age"] > self.age) or
+                (condition == "all male" and analysed_target["gender"] == 1) or
+                (condition == "all female" and analysed_target["gender"] == 0)):
+                trg_face = analysed_target
+                src_face = self.analysed_source_specific[0][0]
+                _frame = self.swap_face(_frame, trg_face, src_face)
+            elif condition == "specific face":
+                for analysed_source, analysed_specific in self.analysed_source_specific:
+                    if is_similar_face(analysed_specific, analysed_target, threshold=self.similarity):
+                        trg_face = analysed_target
+                        src_face = analysed_source
+                        _frame = self.swap_face(_frame, trg_face, src_face)
+        if condition in single_face_detect_conditions and len(analysed_target_faces) > 0:
+            analysed_target = get_single_face(analysed_target_faces, method=condition)
+            trg_face = analysed_target
+            src_face = self.analysed_source_specific[0][0]
+            _frame = self.swap_face(_frame, trg_face, src_face)
+        if custom_mask is not None:
+            _mask = cv2.resize(custom_mask, _frame.shape[:2][::-1])
+            _frame = _mask * frame.astype('float32') + (1 - _mask) * _frame.astype('float32')
+            _frame = _frame.clip(0,255).astype('uint8')
+        if alpha is not None:
+            _frame = np.dstack((_frame, alpha))
+        return _frame
+    def swap_face(self, frame, trg_face, src_face):
+        target_face, generated_face, matrix = self.swapper.forward(frame, trg_face, src_face, n_pass=self.num_of_pass)
+        upscaled_face, matrix = self.upscale_face(generated_face, matrix)
+        if self.parse_from_target:
+            mask = self.face_parsed_mask(target_face)
+        else:
+            mask = self.face_parsed_mask(upscaled_face)
+        result = paste_to_whole(
+            upscaled_face,
+            frame,
+            matrix,
+            mask=mask,
+            crop_mask=self.mask_crop_values,
+            blur_amount=self.mask_blur_amount,
+            erode_amount = self.mask_erode_amount
+        )
+        return result
+    def upscale_face(self, face, matrix):
+        face_size = face.shape[0]
+        _face = cv2.resize(face, (512,512))
+        if self.face_upscaler is not None:
+            model, runner = self.face_upscaler
+            face = runner(face, model)
+        upscaled_face = cv2.resize(face, (512,512))
+        upscaled_face = mix_two_image(_face, upscaled_face, self.face_upscaler_opacity)
+        return upscaled_face, matrix * (512/face_size)
+    def face_parsed_mask(self, face):
+        if self.face_parser is not None and self.use_face_parsing:
+            mask = self.face_parser.parse(face, regions=self.face_parse_regions)
+        else:
+            mask = None
+        return mask

swap_mukham_colab.ipynb ADDED Viewed

	@@ -0,0 +1,183 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "colab_type": "text",
+        "id": "view-in-github"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/harisreedhar/Swap-Mukham/blob/main/swap_mukham_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bypvIQG5RHl9"
+      },
+      "source": [
+        "# 🗿 **Swap-Mukham**\n",
+        "*Face swap app based on insightface inswapper.*\n",
+        "- [Github](https://github.com/harisreedhar/Swap-Mukham)\n",
+        "- [Disclaimer](https://github.com/harisreedhar/Swap-Mukham#disclaimer)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "csC_DX5zWLEU"
+      },
+      "source": [
+        "# Clone Repository"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "klcx2cKDKX5x"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "! git clone https://github.com/harisreedhar/Swap-Mukham"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bebBDddfWTXf"
+      },
+      "source": [
+        "# Install Requirements"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VgTpg7EsTN3o"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "%cd Swap-Mukham/\n",
+        "print(\"Installing requirements...\")\n",
+        "!pip install -r requirements.txt -q\n",
+        "!pip install gdown\n",
+        "print(\"Installing requirements done.\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "T9L6tgD0Wats"
+      },
+      "source": [
+        "# Download Models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "17MZO9OvUQAk"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "inswapper_model = \"https://huggingface.co/deepinsight/inswapper/resolve/main/inswapper_128.onnx\" #@param {type:\"string\"}\n",
+        "gfpgan_model = \"https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth\" #@param {type:\"string\"}\n",
+        "face_parser_model = \"https://drive.google.com/uc?id=154JgKpzCPW82qINcVieuPH3fZ2e0P812\" #@param {type:\"string\"}\n",
+        "real_esrgan_2x_model = \"https://huggingface.co/ai-forever/Real-ESRGAN/resolve/main/RealESRGAN_x2.pth\" #@param {type:\"string\"}\n",
+        "real_esrgan_4x_model = \"https://huggingface.co/ai-forever/Real-ESRGAN/resolve/main/RealESRGAN_x4.pth\" #@param {type:\"string\"}\n",
+        "real_esrgan_8x_model = \"https://huggingface.co/ai-forever/Real-ESRGAN/resolve/main/RealESRGAN_x8.pth\" #@param {type:\"string\"}\n",
+        "codeformer_model = \"https://huggingface.co/bluefoxcreation/Codeformer-ONNX/resolve/main/codeformer.onnx\" #@param {type:\"string\"}\n",
+        "nsfw_det_model = \"https://huggingface.co/bluefoxcreation/open-nsfw/resolve/main/open-nsfw.onnx\" #@param {type:\"string\"}\n",
+        "import gdown\n",
+        "import urllib.request\n",
+        "print(\"Downloading swapper model...\")\n",
+        "urllib.request.urlretrieve(inswapper_model, \"/content/Swap-Mukham/assets/pretrained_models/inswapper_128.onnx\")\n",
+        "print(\"Downloading gfpgan model...\")\n",
+        "urllib.request.urlretrieve(gfpgan_model, \"/content/Swap-Mukham/assets/pretrained_models/GFPGANv1.4.pth\")\n",
+        "print(\"Downloading face parsing model...\")\n",
+        "gdown.download(face_parser_model, \"/content/Swap-Mukham/assets/pretrained_models/79999_iter.pth\")\n",
+        "print(\"Downloading realesrgan 2x model...\")\n",
+        "urllib.request.urlretrieve(real_esrgan_2x_model, \"/content/Swap-Mukham/assets/pretrained_models/RealESRGAN_x2.pth\")\n",
+        "print(\"Downloading realesrgan 4x model...\")\n",
+        "urllib.request.urlretrieve(real_esrgan_4x_model, \"/content/Swap-Mukham/assets/pretrained_models/RealESRGAN_x4.pth\")\n",
+        "print(\"Downloading realesrgan 8x model...\")\n",
+        "urllib.request.urlretrieve(real_esrgan_8x_model, \"/content/Swap-Mukham/assets/pretrained_models/RealESRGAN_x8.pth\")\n",
+        "print(\"Downloading codeformer...\")\n",
+        "urllib.request.urlretrieve(codeformer_model, \"/content/Swap-Mukham/assets/pretrained_models/codeformer.onnx\")\n",
+        "print(\"Downloading NSFW detector model...\")\n",
+        "urllib.request.urlretrieve(nsfw_det_model, \"/content/Swap-Mukham/assets/pretrained_models/open-nsfw.onnx\")\n",
+        "print(\"Downloading models done.\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uEcCUw0Co6bE"
+      },
+      "source": [
+        "# Mount Google drive (optional)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4KssYYippDMw"
+      },
+      "outputs": [],
+      "source": [
+        "from google.colab import auth, drive\n",
+        "auth.authenticate_user()\n",
+        "drive.mount('/content/drive')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-Tn68Ayqdrlk"
+      },
+      "source": [
+        "# Run App\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6dpBjbfVOrrc"
+      },
+      "outputs": [],
+      "source": [
+        "#@title\n",
+        "default_output_path = \"/content/Swap-Mukham\" #@param {type:\"string\"}\n",
+        "\n",
+        "command = f\"python app.py --cuda --colab --out_dir {default_output_path}\"\n",
+        "!{command}"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "include_colab_link": true,
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

upscaler/GFPGAN.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import cv2
+import torch
+import onnxruntime
+import numpy as np
+import threading
+import time
+# gfpgan converted to onnx
+# using https://github.com/xuanandsix/GFPGAN-onnxruntime-demo
+# same inference code for GFPGANv1.2, GFPGANv1.3, GFPGANv1.4
+lock = threading.Lock()
+class GFPGAN:
+    def __init__(self, model_path="GFPGANv1.4.onnx", provider=["CPUExecutionProvider"], session_options=None):
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(model_path, sess_options=self.session_options, providers=provider)
+        self.resolution = self.session.get_inputs()[0].shape[-2:]
+    def preprocess(self, img):
+        img = cv2.resize(img, self.resolution, interpolation=cv2.INTER_LINEAR)
+        img = img.astype(np.float32)[:,:,::-1] / 255.0
+        img = img.transpose((2, 0, 1))
+        img = (img - 0.5) / 0.5
+        img = np.expand_dims(img, axis=0).astype(np.float32)
+        return img
+    def postprocess(self, img):
+        img = (img.transpose(1,2,0).clip(-1,1) + 1) * 0.5
+        img = (img * 255)[:,:,::-1]
+        img = img.clip(0, 255).astype('uint8')
+        return img
+    def enhance(self, img):
+        img = self.preprocess(img)
+        with lock:
+            output = self.session.run(None, {'input':img})[0][0]
+        output = self.postprocess(output)
+        return output

upscaler/GPEN.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import cv2
+import torch
+import onnxruntime
+import numpy as np
+import threading
+import time
+lock = threading.Lock()
+class GPEN:
+    def __init__(self, model_path="GPEN-BFR-512.onnx", provider=["CPUExecutionProvider"], session_options=None):
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(model_path, sess_options=self.session_options, providers=provider)
+        self.resolution = self.session.get_inputs()[0].shape[-2:]
+    def preprocess(self, img):
+        img = cv2.resize(img, self.resolution, interpolation=cv2.INTER_LINEAR)
+        img = img.astype(np.float32)[:,:,::-1] / 255.0
+        img = img.transpose((2, 0, 1))
+        img = (img - 0.5) / 0.5
+        img = np.expand_dims(img, axis=0).astype(np.float32)
+        return img
+    def postprocess(self, img):
+        img = (img.transpose(1,2,0).clip(-1,1) + 1) * 0.5
+        img = (img * 255)[:,:,::-1]
+        img = img.clip(0, 255).astype('uint8')
+        return img
+    def enhance(self, img):
+        img = self.preprocess(img)
+        with lock:
+            output = self.session.run(None, {'input':img})[0][0]
+        output = self.postprocess(output)
+        return output

upscaler/__init__.py ADDED Viewed

File without changes

upscaler/codeformer.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import cv2
+import torch
+import onnxruntime
+import numpy as np
+import threading
+import time
+# codeformer converted to onnx
+# using https://github.com/redthing1/CodeFormer
+lock = threading.Lock()
+class CodeFormer:
+    def __init__(self, model_path="codeformer.onnx", provider=["CPUExecutionProvider"], session_options=None):
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(model_path, sess_options=self.session_options, providers=provider)
+        self.resolution = self.session.get_inputs()[0].shape[-2:]
+    def preprocess(self, img, w):
+        img = cv2.resize(img, self.resolution, interpolation=cv2.INTER_LINEAR)
+        img = img.astype(np.float32)[:,:,::-1] / 255.0
+        img = img.transpose((2, 0, 1))
+        img = (img - 0.5) / 0.5
+        img = np.expand_dims(img, axis=0).astype(np.float32)
+        w = np.array([w], dtype=np.double)
+        return img, w
+    def postprocess(self, img):
+        img = (img.transpose(1,2,0).clip(-1,1) + 1) * 0.5
+        img = (img * 255)[:,:,::-1]
+        img = img.clip(0, 255).astype('uint8')
+        return img
+    def enhance(self, img, w=0.9):
+        img, w = self.preprocess(img, w)
+        with lock:
+            output = self.session.run(None, {'x':img, 'w':w})[0][0]
+        output = self.postprocess(output)
+        return output

upscaler/restoreformer.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import cv2
+import torch
+import onnxruntime
+import numpy as np
+import threading
+import time
+lock = threading.Lock()
+class RestoreFormer:
+    def __init__(self, model_path="restoreformer.onnx", provider=["CPUExecutionProvider"], session_options=None):
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(model_path, sess_options=self.session_options, providers=provider)
+        self.resolution = self.session.get_inputs()[0].shape[-2:]
+    def preprocess(self, img):
+        img = cv2.resize(img, self.resolution, interpolation=cv2.INTER_LINEAR)
+        img = img.astype(np.float32)[:,:,::-1] / 255.0
+        img = img.transpose((2, 0, 1))
+        img = (img - 0.5) / 0.5
+        img = np.expand_dims(img, axis=0).astype(np.float32)
+        return img
+    def postprocess(self, img):
+        img = (img.transpose(1,2,0).clip(-1,1) + 1) * 0.5
+        img = (img * 255)[:,:,::-1]
+        img = img.clip(0, 255).astype('uint8')
+        return img
+    def enhance(self, img):
+        img = self.preprocess(img)
+        with lock:
+            output = self.session.run(None, {'input':img})[0][0]
+        output = self.postprocess(output)
+        return output

utils/__init__.py ADDED Viewed

File without changes

utils/arcface.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# -*- coding: utf-8 -*-
+# @Organization  : insightface.ai
+# @Author        : Jia Guo
+# @Time          : 2021-09-18
+# @Function      :
+import os
+import cv2
+import onnx
+import onnxruntime
+import numpy as np
+import default_paths as dp
+from .face_alignment import norm_crop2
+class ArcFace:
+    def __init__(self, model_file=None, provider=['CUDAExecutionProvider'], session_options=None):
+        assert model_file is not None
+        self.model_file = model_file
+        self.taskname = 'recognition'
+        find_sub = False
+        find_mul = False
+        model = onnx.load(self.model_file)
+        graph = model.graph
+        for nid, node in enumerate(graph.node[:8]):
+            #print(nid, node.name)
+            if node.name.startswith('Sub') or node.name.startswith('_minus'):
+                find_sub = True
+            if node.name.startswith('Mul') or node.name.startswith('_mul'):
+                find_mul = True
+        if find_sub and find_mul:
+            #mxnet arcface model
+            input_mean = 0.0
+            input_std = 1.0
+        else:
+            input_mean = 127.5
+            input_std = 127.5
+        self.input_mean = input_mean
+        self.input_std = input_std
+        #print('input mean and std:', self.input_mean, self.input_std)
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(self.model_file, providers=provider, sess_options=self.session_options)
+        input_cfg = self.session.get_inputs()[0]
+        input_shape = input_cfg.shape
+        input_name = input_cfg.name
+        self.input_size = tuple(input_shape[2:4][::-1])
+        self.input_shape = input_shape
+        outputs = self.session.get_outputs()
+        output_names = []
+        for out in outputs:
+            output_names.append(out.name)
+        self.input_name = input_name
+        self.output_names = output_names
+        assert len(self.output_names)==1
+        self.output_shape = outputs[0].shape
+    def prepare(self, ctx_id, **kwargs):
+        if ctx_id<0:
+            self.session.set_providers(['CPUExecutionProvider'])
+    def get(self, img, kps):
+        aimg, matrix = norm_crop2(img, landmark=kps, image_size=self.input_size[0])
+        embedding = self.get_feat(aimg).flatten()
+        return embedding
+    def compute_sim(self, feat1, feat2):
+        from numpy.linalg import norm
+        feat1 = feat1.ravel()
+        feat2 = feat2.ravel()
+        sim = np.dot(feat1, feat2) / (norm(feat1) * norm(feat2))
+        return sim
+    def get_feat(self, imgs):
+        if not isinstance(imgs, list):
+            imgs = [imgs]
+        input_size = self.input_size
+        blob = cv2.dnn.blobFromImages(imgs, 1.0 / self.input_std, input_size,
+                                      (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+        net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
+        return net_out
+    def forward(self, batch_data):
+        blob = (batch_data - self.input_mean) / self.input_std
+        net_out = self.session.run(self.output_names, {self.input_name: blob})[0]
+        return net_out

utils/device.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import onnx
+import onnxruntime
+device_types_list = ["cpu", "cuda"]
+available_providers = onnxruntime.get_available_providers()
+def get_device_and_provider(device='cpu'):
+    options = onnxruntime.SessionOptions()
+    options.log_severity_level = 3
+    if device == 'cuda':
+        if "CUDAExecutionProvider" in available_providers:
+            provider = [("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}), "CPUExecutionProvider"]
+            options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL
+        else:
+            device = 'cpu'
+            provider = ["CPUExecutionProvider"]
+    else:
+        device = 'cpu'
+        provider = ["CPUExecutionProvider"]
+    return device, provider, options
+data_type_bytes = {'uint8': 1, 'int8': 1, 'uint16': 2, 'int16': 2, 'float16': 2, 'float32': 4}
+def estimate_max_batch_size(resolution, chunk_size=1024, data_type='float32', channels=3):
+    pixel_size = data_type_bytes.get(data_type, 1)
+    image_size = resolution[0] * resolution[1] * pixel_size * channels
+    number_of_batches = (chunk_size * 1024 * 1024) // image_size
+    return max(number_of_batches, 1)

utils/face_alignment.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import cv2
+import numpy as np
+def umeyama(src, dst, estimate_scale):
+    num = src.shape[0]
+    dim = src.shape[1]
+    src_mean = src.mean(axis=0)
+    dst_mean = dst.mean(axis=0)
+    src_demean = src - src_mean
+    dst_demean = dst - dst_mean
+    A = np.dot(dst_demean.T, src_demean) / num
+    d = np.ones((dim,), dtype=np.double)
+    if np.linalg.det(A) < 0:
+        d[dim - 1] = -1
+    T = np.eye(dim + 1, dtype=np.double)
+    U, S, V = np.linalg.svd(A)
+    rank = np.linalg.matrix_rank(A)
+    if rank == 0:
+        return np.nan * T
+    elif rank == dim - 1:
+        if np.linalg.det(U) * np.linalg.det(V) > 0:
+            T[:dim, :dim] = np.dot(U, V)
+        else:
+            s = d[dim - 1]
+            d[dim - 1] = -1
+            T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V))
+            d[dim - 1] = s
+    else:
+        T[:dim, :dim] = np.dot(U, np.dot(np.diag(d), V.T))
+    if estimate_scale:
+        scale = 1.0 / src_demean.var(axis=0).sum() * np.dot(S, d)
+    else:
+        scale = 1.0
+    T[:dim, dim] = dst_mean - scale * np.dot(T[:dim, :dim], src_mean.T)
+    T[:dim, :dim] *= scale
+    return T
+arcface_dst = np.array(
+    [[38.2946, 51.6963], [73.5318, 51.5014], [56.0252, 71.7366],
+     [41.5493, 92.3655], [70.7299, 92.2041]],
+    dtype=np.float32)
+def estimate_norm(lmk, image_size=112, mode='arcface'):
+    assert lmk.shape == (5, 2)
+    assert image_size % 112 == 0 or image_size % 128 == 0
+    if image_size % 112 == 0:
+        ratio = float(image_size) / 112.0
+        diff_x = 0
+    else:
+        ratio = float(image_size) / 128.0
+        diff_x = 8.0 * ratio
+    dst = arcface_dst * ratio
+    dst[:, 0] += diff_x
+    M = umeyama(lmk, dst, True)[0:2, :]
+    return M
+def norm_crop2(img, landmark, image_size=112, mode='arcface'):
+    M = estimate_norm(landmark, image_size, mode)
+    warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0, borderMode=cv2.BORDER_REPLICATE)
+    return warped, M
+def get_cropped_head(img, landmark, scale=1.4):
+    # it is ugly but works :D
+    center = np.mean(landmark, axis=0)
+    landmark = center + (landmark - center) * scale
+    M = estimate_norm(landmark, 128, mode='arcface')
+    warped = cv2.warpAffine(img, M/0.25, (512, 512), borderValue=0.0)
+    return warped

utils/gender_age.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import cv2
+import numpy as np
+import onnxruntime
+from .face_alignment import norm_crop2
+class GenderAge:
+    def __init__(self, model_file=None, provider=['CPUExecutionProvider'], session_options=None):
+        self.model_file = model_file
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(self.model_file, sess_options=self.session_options, providers=provider)
+    def predict(self, img, kps):
+        aimg, matrix = norm_crop2(img, kps, 128)
+        blob = cv2.resize(aimg, (62,62), interpolation=cv2.INTER_AREA)
+        blob = np.expand_dims(blob, axis=0).astype('float32')
+        _prob, _age = self.session.run(None, {'data':blob})
+        prob = _prob[0][0][0]
+        age = round(_age[0][0][0][0] * 100)
+        gender = np.argmax(prob)
+        return gender, age

utils/image.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import cv2
+import base64
+import numpy as np
+def laplacian_blending(A, B, m, num_levels=7):
+    assert A.shape == B.shape
+    assert B.shape == m.shape
+    height = m.shape[0]
+    width = m.shape[1]
+    size_list = np.array([4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192])
+    size = size_list[np.where(size_list > max(height, width))][0]
+    GA = np.zeros((size, size, 3), dtype=np.float32)
+    GA[:height, :width, :] = A
+    GB = np.zeros((size, size, 3), dtype=np.float32)
+    GB[:height, :width, :] = B
+    GM = np.zeros((size, size, 3), dtype=np.float32)
+    GM[:height, :width, :] = m
+    gpA = [GA]
+    gpB = [GB]
+    gpM = [GM]
+    for i in range(num_levels):
+        GA = cv2.pyrDown(GA)
+        GB = cv2.pyrDown(GB)
+        GM = cv2.pyrDown(GM)
+        gpA.append(np.float32(GA))
+        gpB.append(np.float32(GB))
+        gpM.append(np.float32(GM))
+    lpA  = [gpA[num_levels-1]]
+    lpB  = [gpB[num_levels-1]]
+    gpMr = [gpM[num_levels-1]]
+    for i in range(num_levels-1,0,-1):
+        LA = np.subtract(gpA[i-1], cv2.pyrUp(gpA[i]))
+        LB = np.subtract(gpB[i-1], cv2.pyrUp(gpB[i]))
+        lpA.append(LA)
+        lpB.append(LB)
+        gpMr.append(gpM[i-1])
+    LS = []
+    for la,lb,gm in zip(lpA,lpB,gpMr):
+        ls = la * gm + lb * (1.0 - gm)
+        LS.append(ls)
+    ls_ = LS[0]
+    for i in range(1,num_levels):
+        ls_ = cv2.pyrUp(ls_)
+        ls_ = cv2.add(ls_, LS[i])
+    ls_ = ls_[:height, :width, :]
+    #ls_ = (ls_ - np.min(ls_)) * (255.0 / (np.max(ls_) - np.min(ls_)))
+    return ls_.clip(0, 255)
+def mask_crop(mask, crop):
+    top, bottom, left, right = crop
+    shape = mask.shape
+    top = int(top)
+    bottom = int(bottom)
+    if top + bottom < shape[1]:
+        if top > 0: mask[:top, :] = 0
+        if bottom > 0: mask[-bottom:, :] = 0
+    left = int(left)
+    right = int(right)
+    if left + right < shape[0]:
+        if left > 0: mask[:, :left] = 0
+        if right > 0: mask[:, -right:] = 0
+    return mask
+def create_image_grid(images, size=128):
+    num_images = len(images)
+    num_cols = int(np.ceil(np.sqrt(num_images)))
+    num_rows = int(np.ceil(num_images / num_cols))
+    grid = np.zeros((num_rows * size, num_cols * size, 3), dtype=np.uint8)
+    for i, image in enumerate(images):
+        row_idx = (i // num_cols) * size
+        col_idx = (i % num_cols) * size
+        image = cv2.resize(image.copy(), (size,size))
+        if image.dtype != np.uint8:
+            image = (image.astype('float32') * 255).astype('uint8')
+        if image.ndim == 2:
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+        grid[row_idx:row_idx + size, col_idx:col_idx + size] = image
+    return grid
+def paste_to_whole(foreground, background, matrix, mask=None, crop_mask=(0,0,0,0), blur_amount=0.1, erode_amount = 0.15, blend_method='linear'):
+    inv_matrix = cv2.invertAffineTransform(matrix)
+    fg_shape = foreground.shape[:2]
+    bg_shape = (background.shape[1], background.shape[0])
+    foreground = cv2.warpAffine(foreground, inv_matrix, bg_shape, borderValue=0.0, borderMode=cv2.BORDER_REPLICATE)
+    if mask is None:
+        mask = np.full(fg_shape, 1., dtype=np.float32)
+        mask = mask_crop(mask, crop_mask)
+        mask = cv2.warpAffine(mask, inv_matrix, bg_shape, borderValue=0.0)
+    else:
+        assert fg_shape == mask.shape[:2], "foreground & mask shape mismatch!"
+        mask = mask_crop(mask, crop_mask).astype('float32')
+        mask = cv2.warpAffine(mask, inv_matrix, (background.shape[1], background.shape[0]), borderValue=0.0)
+    _mask = mask.copy()
+    _mask[_mask > 0.05] = 1.
+    non_zero_points = cv2.findNonZero(_mask)
+    _, _, w, h = cv2.boundingRect(non_zero_points)
+    mask_size = int(np.sqrt(w * h))
+    if erode_amount > 0:
+        kernel_size = max(int(mask_size * erode_amount), 1)
+        structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))
+        mask = cv2.erode(mask, structuring_element)
+    if blur_amount > 0:
+        kernel_size = max(int(mask_size * blur_amount), 3)
+        if kernel_size % 2 == 0:
+            kernel_size += 1
+        mask = cv2.GaussianBlur(mask, (kernel_size, kernel_size), 0)
+    mask = np.tile(np.expand_dims(mask, axis=-1), (1, 1, 3))
+    if blend_method == 'laplacian':
+        composite_image = laplacian_blending(foreground, background, mask.clip(0,1), num_levels=4)
+    else:
+        composite_image = mask * foreground + (1 - mask) * background
+    return composite_image.astype("uint8").clip(0, 255)
+def image_mask_overlay(img, mask):
+    img = img.astype('float32') / 255.
+    img *= (mask + 0.25).clip(0, 1)
+    img = np.clip(img * 255., 0., 255.).astype('uint8')
+    return img
+def resize_with_padding(img, expected_size=(640, 360), color=(0, 0, 0), max_flip=False):
+    original_height, original_width = img.shape[:2]
+    if max_flip and original_height > original_width:
+        expected_size = (expected_size[1], expected_size[0])
+    aspect_ratio = original_width / original_height
+    new_width = expected_size[0]
+    new_height = int(new_width / aspect_ratio)
+    if new_height > expected_size[1]:
+        new_height = expected_size[1]
+        new_width = int(new_height * aspect_ratio)
+    resized_img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_AREA)
+    canvas = cv2.copyMakeBorder(resized_img,
+                                top=(expected_size[1] - new_height) // 2,
+                                bottom=(expected_size[1] - new_height + 1) // 2,
+                                left=(expected_size[0] - new_width) // 2,
+                                right=(expected_size[0] - new_width + 1) // 2,
+                                borderType=cv2.BORDER_CONSTANT, value=color)
+    return canvas
+def create_image_grid(images, size=128):
+    num_images = len(images)
+    num_cols = int(np.ceil(np.sqrt(num_images)))
+    num_rows = int(np.ceil(num_images / num_cols))
+    grid = np.zeros((num_rows * size, num_cols * size, 3), dtype=np.uint8)
+    for i, image in enumerate(images):
+        row_idx = (i // num_cols) * size
+        col_idx = (i % num_cols) * size
+        image = cv2.resize(image.copy(), (size,size))
+        if image.dtype != np.uint8:
+            image = (image.astype('float32') * 255).astype('uint8')
+        if image.ndim == 2:
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
+        grid[row_idx:row_idx + size, col_idx:col_idx + size] = image
+    return grid
+def image_to_html(img, size=(640, 360), extension="jpg"):
+    if img is not None:
+        img = resize_with_padding(img, expected_size=size)
+        buffer = cv2.imencode(f".{extension}", img)[1]
+        base64_data = base64.b64encode(buffer.tobytes())
+        imgbs64 = f"data:image/{extension};base64," + base64_data.decode("utf-8")
+        html = '<div style="display: flex; justify-content: center; align-items: center; width: 100%;">'
+        html += f'<img src={imgbs64} alt="No Preview" style="max-width: 100%; max-height: 100%;">'
+        html += '</div>'
+        return html
+    return None
+def mix_two_image(a, b, opacity=1.):
+    a_dtype = a.dtype
+    b_dtype = b.dtype
+    a = a.astype('float32')
+    b = b.astype('float32')
+    a = cv2.resize(a, (b.shape[0], b.shape[1]))
+    opacity = min(max(opacity, 0.), 1.)
+    mixed_img = opacity * b + (1 - opacity) * a
+    return mixed_img.astype(a_dtype)
+resolution_map = {
+        "Original": None,
+        "240p": (426, 240),
+        "360p": (640, 360),
+        "480p": (854, 480),
+        "720p": (1280, 720),
+        "1080p": (1920, 1080),
+        "1440p": (2560, 1440),
+        "2160p": (3840, 2160),
+    }
+def resize_image_by_resolution(img, quality):
+    resolution = resolution_map.get(quality, None)
+    if resolution is None:
+        return img
+    h, w = img.shape[:2]
+    if h > w:
+        ratio = resolution[0] / h
+    else:
+        ratio = resolution[0] / w
+    new_h, new_w = int(h * ratio), int(w * ratio)
+    img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    return img
+def fast_pil_encode(pil_image):
+    image_arr = np.asarray(pil_image)[:,:,::-1]
+    buffer = cv2.imencode('.jpg', image_arr)[1]
+    base64_data = base64.b64encode(buffer.tobytes())
+    return "data:image/jpg;base64," + base64_data.decode("utf-8")
+def fast_numpy_encode(img_array):
+    buffer = cv2.imencode('.jpg', img_array)[1]
+    base64_data = base64.b64encode(buffer.tobytes())
+    return "data:image/jpg;base64," + base64_data.decode("utf-8")
+crf_quality_by_resolution = {
+    240: {"poor": 45, "low": 35, "medium": 28, "high": 23, "best": 20},
+    360: {"poor": 35, "low": 28, "medium": 23, "high": 20, "best": 18},
+    480: {"poor": 28, "low": 23, "medium": 20, "high": 18, "best": 16},
+    720: {"poor": 23, "low": 20, "medium": 18, "high": 16, "best": 14},
+    1080: {"poor": 20, "low": 18, "medium": 16, "high": 14, "best": 12},
+    1440: {"poor": 18, "low": 16, "medium": 14, "high": 12, "best": 10},
+    2160: {"poor": 16, "low": 14, "medium": 12, "high": 10, "best": 8}
+}
+def get_crf_for_resolution(resolution, quality):
+    available_resolutions = list(crf_quality_by_resolution.keys())
+    closest_resolution = min(available_resolutions, key=lambda x: abs(x - resolution))
+    return crf_quality_by_resolution[closest_resolution][quality]

utils/io.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os
+import cv2
+import glob
+import shutil
+import subprocess
+from datetime import datetime
+image_extensions = ["jpg", "jpeg", "png", "bmp", "tiff", "ico", "webp"]
+def get_images_from_directory(directory_path):
+    file_paths =[]
+    for file_path in glob.glob(os.path.join(directory_path, "*")):
+        if any(file_path.lower().endswith(ext) for ext in image_extensions):
+            file_paths.append(file_path)
+    file_paths.sort()
+    return file_paths
+def open_directory(path=None):
+    if path is None:
+        return
+    try:
+        os.startfile(path)
+    except:
+        subprocess.Popen(["xdg-open", path])
+def copy_files_to_directory(files, destination):
+    file_paths = []
+    for file_path in files:
+        new_file_path = shutil.copy(file_path, destination)
+        file_paths.append(new_file_path)
+    return file_paths
+def create_directory(directory_path, remove_existing=True):
+    if os.path.exists(directory_path) and remove_existing:
+        shutil.rmtree(directory_path)
+    if not os.path.exists(directory_path):
+        os.mkdir(directory_path)
+        return directory_path
+    else:
+        counter = 1
+        while True:
+            new_directory_path = f"{directory_path}_{counter}"
+            if not os.path.exists(new_directory_path):
+                os.mkdir(new_directory_path)
+                return new_directory_path
+            counter += 1
+def add_datetime_to_filename(filename):
+    current_datetime = datetime.now()
+    formatted_datetime = current_datetime.strftime("%Y%m%d_%H%M%S")
+    file_name, file_extension = os.path.splitext(filename)
+    new_filename = f"{file_name}_{formatted_datetime}{file_extension}"
+    return new_filename
+def get_single_video_frame(video_path, frame_index):
+    cap = cv2.VideoCapture(video_path, cv2.CAP_FFMPEG)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    frame_index = min(int(frame_index), total_frames-1)
+    cap.set(cv2.CAP_PROP_POS_FRAMES, int(frame_index))
+    valid_frame, frame = cap.read()
+    cap.release()
+    if valid_frame:
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        return frame
+    return None
+def get_video_fps(video_path):
+    cap = cv2.VideoCapture(video_path)
+    fps = cap.get(cv2.CAP_PROP_FPS)
+    cap.release()
+    return fps
+def ffmpeg_extract_frames(video_path, destination, remove_existing=True, fps=30, name='frame_%d.jpg', ffmpeg_path=None):
+    ffmpeg_path = 'ffmpeg' if ffmpeg_path is None else ffmpeg_path
+    destination = create_directory(destination, remove_existing=remove_existing)
+    cmd = [
+        ffmpeg_path,
+        '-loglevel', 'info',
+        '-hwaccel', 'auto',
+        '-i', video_path,
+        '-q:v', '3',
+        '-pix_fmt', 'rgb24',
+        '-vf', 'fps=' + str(fps),
+        '-y',
+        os.path.join(destination, name)
+    ]
+    process = subprocess.Popen(cmd)
+    process.communicate()
+    if process.returncode == 0:
+        return True, get_images_from_directory(destination)
+    else:
+        print(f"Error: Failed to extract video.")
+    return False, None
+def ffmpeg_merge_frames(sequence_directory, pattern, destination, fps=30, crf=18, ffmpeg_path=None):
+    ffmpeg_path = 'ffmpeg' if ffmpeg_path is None else ffmpeg_path
+    cmd = [
+        ffmpeg_path,
+        '-loglevel', 'info',
+        '-hwaccel', 'auto',
+        '-r', str(fps),
+        # '-pattern_type', 'glob',
+        '-i', os.path.join(sequence_directory, pattern),
+        '-c:v', 'libx264',
+        '-crf', str(crf),
+        '-pix_fmt', 'yuv420p',
+        '-vf', 'colorspace=bt709:iall=bt601-6-625:fast=1',
+        '-y', destination
+    ]
+    process = subprocess.Popen(cmd)
+    process.communicate()
+    if process.returncode == 0:
+        return True, destination
+    else:
+        print(f"Error: Failed to merge image sequence.")
+    return False, None
+def ffmpeg_replace_video_segments(main_video_path, sub_clips_info, output_path, ffmpeg_path="ffmpeg"):
+    ffmpeg_path = 'ffmpeg' if ffmpeg_path is None else ffmpeg_path
+    filter_complex = ""
+    filter_complex += f"[0:v]split=2[v0][main_end]; "
+    filter_complex += f"[1:v]split={len(sub_clips_info)}{', '.join([f'[v{index + 1}]' for index in range(len(sub_clips_info))])}; "
+    overlay_exprs = "".join([f"[v{index + 1}]" for index in range(len(sub_clips_info))])
+    overlay_filters = f"[main_end][{overlay_exprs}]overlay=eof_action=pass[vout]; "
+    filter_complex += overlay_filters
+    cmd = [
+        ffmpeg_path, '-i', main_video_path,
+    ]
+    for sub_clip_path, _, _ in sub_clips_info:
+        cmd.extend(['-i', sub_clip_path])
+    cmd.extend([
+        '-filter_complex', filter_complex,
+        '-map', '[vout]',
+        output_path
+    ])
+    subprocess.run(cmd)
+def ffmpeg_mux_audio(source, target, output, ffmpeg_path=None):
+    ffmpeg_path = 'ffmpeg' if ffmpeg_path is None else ffmpeg_path
+    extracted_audio_path = os.path.join(os.path.dirname(output), 'extracted_audio.aac')
+    cmd1 = [
+        ffmpeg_path,
+        '-loglevel', 'info',
+        '-i', source,
+        '-vn',
+        '-c:a', 'aac',
+        '-y',
+        extracted_audio_path
+    ]
+    process = subprocess.Popen(cmd1)
+    process.communicate()
+    if process.returncode != 0:
+        print(f"Error: Failed to extract audio.")
+        return False, target
+    cmd2 = [
+        ffmpeg_path,
+        '-loglevel', 'info',
+        '-hwaccel', 'auto',
+        '-i', target,
+        '-i', extracted_audio_path,
+        '-c:v', 'copy',
+        '-map', '0:v:0',
+        '-map', '1:a:0',
+        '-y', output
+    ]
+    process = subprocess.Popen(cmd2)
+    process.communicate()
+    if process.returncode == 0:
+        if os.path.exists(extracted_audio_path):
+            os.remove(extracted_audio_path)
+        return True, output
+    else:
+        print(f"Error: Failed to mux audio.")
+    return False, None

utils/retinaface.py ADDED Viewed

	@@ -0,0 +1,268 @@

+# -*- coding: utf-8 -*-
+# @Organization  : insightface.ai
+# @Author        : Jia Guo
+# @Time          : 2021-09-18
+# @Function      :
+from __future__ import division
+import datetime
+import numpy as np
+import onnx
+import onnxruntime
+import os
+import cv2
+import sys
+import default_paths as dp
+def softmax(z):
+    assert len(z.shape) == 2
+    s = np.max(z, axis=1)
+    s = s[:, np.newaxis] # necessary step to do broadcasting
+    e_x = np.exp(z - s)
+    div = np.sum(e_x, axis=1)
+    div = div[:, np.newaxis] # dito
+    return e_x / div
+def distance2bbox(points, distance, max_shape=None):
+    """Decode distance prediction to bounding box.
+    Args:
+        points (Tensor): Shape (n, 2), [x, y].
+        distance (Tensor): Distance from the given point to 4
+            boundaries (left, top, right, bottom).
+        max_shape (tuple): Shape of the image.
+    Returns:
+        Tensor: Decoded bboxes.
+    """
+    x1 = points[:, 0] - distance[:, 0]
+    y1 = points[:, 1] - distance[:, 1]
+    x2 = points[:, 0] + distance[:, 2]
+    y2 = points[:, 1] + distance[:, 3]
+    if max_shape is not None:
+        x1 = x1.clamp(min=0, max=max_shape[1])
+        y1 = y1.clamp(min=0, max=max_shape[0])
+        x2 = x2.clamp(min=0, max=max_shape[1])
+        y2 = y2.clamp(min=0, max=max_shape[0])
+    return np.stack([x1, y1, x2, y2], axis=-1)
+def distance2kps(points, distance, max_shape=None):
+    """Decode distance prediction to bounding box.
+    Args:
+        points (Tensor): Shape (n, 2), [x, y].
+        distance (Tensor): Distance from the given point to 4
+            boundaries (left, top, right, bottom).
+        max_shape (tuple): Shape of the image.
+    Returns:
+        Tensor: Decoded bboxes.
+    """
+    preds = []
+    for i in range(0, distance.shape[1], 2):
+        px = points[:, i%2] + distance[:, i]
+        py = points[:, i%2+1] + distance[:, i+1]
+        if max_shape is not None:
+            px = px.clamp(min=0, max=max_shape[1])
+            py = py.clamp(min=0, max=max_shape[0])
+        preds.append(px)
+        preds.append(py)
+    return np.stack(preds, axis=-1)
+class RetinaFace:
+    def __init__(self, model_file=None, provider=["CPUExecutionProvider"], session_options=None):
+        self.model_file = model_file
+        self.session_options = session_options
+        if self.session_options is None:
+            self.session_options = onnxruntime.SessionOptions()
+        self.session = onnxruntime.InferenceSession(self.model_file, providers=provider, sess_options=self.session_options)
+        self.center_cache = {}
+        self.nms_thresh = 0.4
+        self.det_thresh = 0.5
+        self._init_vars()
+    def _init_vars(self):
+        input_cfg = self.session.get_inputs()[0]
+        input_shape = input_cfg.shape
+        #print(input_shape)
+        if isinstance(input_shape[2], str):
+            self.input_size = None
+        else:
+            self.input_size = tuple(input_shape[2:4][::-1])
+        #print('image_size:', self.image_size)
+        input_name = input_cfg.name
+        self.input_shape = input_shape
+        outputs = self.session.get_outputs()
+        output_names = []
+        for o in outputs:
+            output_names.append(o.name)
+        self.input_name = input_name
+        self.output_names = output_names
+        self.input_mean = 127.5
+        self.input_std = 128.0
+        #print(self.output_names)
+        #assert len(outputs)==10 or len(outputs)==15
+        self.use_kps = False
+        self._anchor_ratio = 1.0
+        self._num_anchors = 1
+        if len(outputs)==6:
+            self.fmc = 3
+            self._feat_stride_fpn = [8, 16, 32]
+            self._num_anchors = 2
+        elif len(outputs)==9:
+            self.fmc = 3
+            self._feat_stride_fpn = [8, 16, 32]
+            self._num_anchors = 2
+            self.use_kps = True
+        elif len(outputs)==10:
+            self.fmc = 5
+            self._feat_stride_fpn = [8, 16, 32, 64, 128]
+            self._num_anchors = 1
+        elif len(outputs)==15:
+            self.fmc = 5
+            self._feat_stride_fpn = [8, 16, 32, 64, 128]
+            self._num_anchors = 1
+            self.use_kps = True
+    def prepare(self, **kwargs):
+        nms_thresh = kwargs.get('nms_thresh', None)
+        if nms_thresh is not None:
+            self.nms_thresh = nms_thresh
+        det_thresh = kwargs.get('det_thresh', None)
+        if det_thresh is not None:
+            self.det_thresh = det_thresh
+        input_size = kwargs.get('input_size', None)
+        if input_size is not None:
+            if self.input_size is not None:
+                print('warning: det_size is already set in detection model, ignore')
+            else:
+                self.input_size = input_size
+    def forward(self, img, threshold):
+        scores_list = []
+        bboxes_list = []
+        kpss_list = []
+        input_size = tuple(img.shape[0:2][::-1])
+        blob = cv2.dnn.blobFromImage(img, 1.0/self.input_std, input_size, (self.input_mean, self.input_mean, self.input_mean), swapRB=True)
+        net_outs = self.session.run(self.output_names, {self.input_name : blob})
+        input_height = blob.shape[2]
+        input_width = blob.shape[3]
+        fmc = self.fmc
+        for idx, stride in enumerate(self._feat_stride_fpn):
+            scores = net_outs[idx]
+            bbox_preds = net_outs[idx+fmc]
+            bbox_preds = bbox_preds * stride
+            if self.use_kps:
+                kps_preds = net_outs[idx+fmc*2] * stride
+            height = input_height // stride
+            width = input_width // stride
+            K = height * width
+            key = (height, width, stride)
+            if key in self.center_cache:
+                anchor_centers = self.center_cache[key]
+            else:
+                anchor_centers = np.stack(np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
+                anchor_centers = (anchor_centers * stride).reshape( (-1, 2) )
+                if self._num_anchors>1:
+                    anchor_centers = np.stack([anchor_centers]*self._num_anchors, axis=1).reshape( (-1,2) )
+                if len(self.center_cache)<100:
+                    self.center_cache[key] = anchor_centers
+            pos_inds = np.where(scores>=threshold)[0]
+            bboxes = distance2bbox(anchor_centers, bbox_preds)
+            pos_scores = scores[pos_inds]
+            pos_bboxes = bboxes[pos_inds]
+            scores_list.append(pos_scores)
+            bboxes_list.append(pos_bboxes)
+            if self.use_kps:
+                kpss = distance2kps(anchor_centers, kps_preds)
+                kpss = kpss.reshape( (kpss.shape[0], -1, 2) )
+                pos_kpss = kpss[pos_inds]
+                kpss_list.append(pos_kpss)
+        return scores_list, bboxes_list, kpss_list
+    def detect(self, img, input_size = (640,640), max_num=0, metric='default', det_thresh=0.5):
+        assert input_size is not None or self.input_size is not None
+        input_size = self.input_size if input_size is None else input_size
+        im_ratio = float(img.shape[0]) / img.shape[1]
+        model_ratio = float(input_size[1]) / input_size[0]
+        if im_ratio>model_ratio:
+            new_height = input_size[1]
+            new_width = int(new_height / im_ratio)
+        else:
+            new_width = input_size[0]
+            new_height = int(new_width * im_ratio)
+        det_scale = float(new_height) / img.shape[0]
+        resized_img = cv2.resize(img, (new_width, new_height))
+        det_img = np.zeros( (input_size[1], input_size[0], 3), dtype=np.uint8 )
+        det_img[:new_height, :new_width, :] = resized_img
+        scores_list, bboxes_list, kpss_list = self.forward(det_img, det_thresh)
+        scores = np.vstack(scores_list)
+        scores_ravel = scores.ravel()
+        order = scores_ravel.argsort()[::-1]
+        bboxes = np.vstack(bboxes_list) / det_scale
+        if self.use_kps:
+            kpss = np.vstack(kpss_list) / det_scale
+        pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
+        pre_det = pre_det[order, :]
+        keep = self.nms(pre_det)
+        det = pre_det[keep, :]
+        if self.use_kps:
+            kpss = kpss[order,:,:]
+            kpss = kpss[keep,:,:]
+        else:
+            kpss = None
+        if max_num > 0 and det.shape[0] > max_num:
+            area = (det[:, 2] - det[:, 0]) * (det[:, 3] -
+                                                    det[:, 1])
+            img_center = img.shape[0] // 2, img.shape[1] // 2
+            offsets = np.vstack([
+                (det[:, 0] + det[:, 2]) / 2 - img_center[1],
+                (det[:, 1] + det[:, 3]) / 2 - img_center[0]
+            ])
+            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+            if metric=='max':
+                values = area
+            else:
+                values = area - offset_dist_squared * 2.0  # some extra weight on the centering
+            bindex = np.argsort(
+                values)[::-1]  # some extra weight on the centering
+            bindex = bindex[0:max_num]
+            det = det[bindex, :]
+            if kpss is not None:
+                kpss = kpss[bindex, :]
+        return det, kpss
+    def nms(self, dets):
+        thresh = self.nms_thresh
+        x1 = dets[:, 0]
+        y1 = dets[:, 1]
+        x2 = dets[:, 2]
+        y2 = dets[:, 3]
+        scores = dets[:, 4]
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        order = scores.argsort()[::-1]
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+            xx1 = np.maximum(x1[i], x1[order[1:]])
+            yy1 = np.maximum(y1[i], y1[order[1:]])
+            xx2 = np.minimum(x2[i], x2[order[1:]])
+            yy2 = np.minimum(y2[i], y2[order[1:]])
+            w = np.maximum(0.0, xx2 - xx1 + 1)
+            h = np.maximum(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (areas[i] + areas[order[1:]] - inter)
+            inds = np.where(ovr <= thresh)[0]
+            order = order[inds + 1]
+        return keep