File size: 5,675 Bytes
b3f76cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# This script is mostly based on the openpose preprocessor script of
# the sd-webui-controlnet project by Mikubill.
# https://github.com/Mikubill/sd-webui-controlnet/blob/main/annotator/openpose/face.py

import numpy as np
import onnxruntime as ort
import cv2
from PIL import Image
import pathlib
from typing import Tuple, Union, List
from tqdm import tqdm


def smart_resize(image: np.ndarray, shape: Tuple[int, int]) -> np.ndarray:
    """
    Resize an image to a target shape while preserving aspect ratio.

    Parameters
    ----------
    image : np.ndarray
        The input image.
    shape : Tuple[int, int]
        The target shape (height, width).

    Returns
    -------
    np.ndarray
        The resized image
    """

    Ht, Wt = shape
    if image.ndim == 2:
        Ho, Wo = image.shape
        Co = 1
    else:
        Ho, Wo, Co = image.shape
    if Co == 3 or Co == 1:
        k = float(Ht + Wt) / float(Ho + Wo)
        return cv2.resize(
            image,
            (int(Wt), int(Ht)),
            interpolation=cv2.INTER_AREA if k < 1 else cv2.INTER_LANCZOS4,
        )
    else:
        return np.stack(
            [smart_resize(image[:, :, i], shape) for i in range(Co)], axis=2
        )


class FaceLandmarkDetector:
    """
    The OpenPose face landmark detector model using ONNXRuntime.

    Parameters
    ----------
    face_model_path : str
        The path to the ONNX model file.
    """

    def __init__(self, face_model_path: pathlib.Path) -> None:
        """
        Initialize the OpenPose face landmark detector model.

        Parameters
        ----------
        face_model_path : pathlib.Path
            The path to the ONNX model file.
        """

        # Initialize ONNX runtime session
        self.session = ort.InferenceSession(
            face_model_path, providers=["CPUExecutionProvider"]
        )
        self.input_name = self.session.get_inputs()[0].name

    def _inference(self, face_img: np.ndarray) -> np.ndarray:
        """
        Run the OpenPose face landmark detector model on an image.

        Parameters
        ----------
        face_img : np.ndarray
            The input image.

        Returns
        -------
        np.ndarray
            The detected keypoints.
        """

        # face_img should be a numpy array: H x W x C (likely RGB or BGR)
        H, W, C = face_img.shape

        # Preprocessing
        w_size = 384  # ONNX is exported for this size
        # Resize input image
        resized_img = cv2.resize(
            face_img, (w_size, w_size), interpolation=cv2.INTER_LINEAR
        )

        # Normalize: /256.0 - 0.5 (mimicking original code)
        x_data = resized_img.astype(np.float32) / 256.0 - 0.5

        # Convert to channel-first format: (C, H, W)
        x_data = np.transpose(x_data, (2, 0, 1))

        # Add batch dimension: (1, C, H, W)
        x_data = np.expand_dims(x_data, axis=0)

        # Run inference
        outputs = self.session.run(None, {self.input_name: x_data})

        # Assuming the model's last output corresponds to the heatmaps
        # and is shaped like (1, num_parts, h_out, w_out)
        heatmaps_original = outputs[-1]

        # Remove batch dimension: (num_parts, h_out, w_out)
        heatmaps_original = np.squeeze(heatmaps_original, axis=0)

        # Resize the heatmaps back to the original image size
        num_parts = heatmaps_original.shape[0]
        heatmaps = np.zeros((num_parts, H, W), dtype=np.float32)
        for i in range(num_parts):
            heatmaps[i] = cv2.resize(
                heatmaps_original[i], (W, H), interpolation=cv2.INTER_LINEAR
            )

        peaks = self.compute_peaks_from_heatmaps(heatmaps)

        return peaks

    def __call__(
        self,
        face_img: Union[np.ndarray, List[np.ndarray], Image.Image, List[Image.Image]],
    ) -> List[np.ndarray]:
        """
        Run the OpenPose face landmark detector model on an image.

        Parameters
        ----------
        face_img : Union[np.ndarray, Image.Image, List[Image.Image]]
            The input image or a list of input images.

        Returns
        -------
        List[np.ndarray]
            The detected keypoints.
        """

        if isinstance(face_img, Image.Image):
            image_list = [np.array(face_img)]
        elif isinstance(face_img, list):
            if isinstance(face_img[0], Image.Image):
                image_list = [np.array(img) for img in face_img]
        elif isinstance(face_img, np.ndarray):
            if face_img.ndim == 4:
                image_list = [img for img in face_img]

        results = []

        for image in tqdm(image_list):
            keypoints = self._inference(image)
            results.append(keypoints)

        return results

    def compute_peaks_from_heatmaps(self, heatmaps: np.ndarray) -> np.ndarray:
        """
        Compute the peaks from the heatmaps.

        Parameters
        ----------
        heatmaps : np.ndarray
            The heatmaps.

        Returns
        -------
        np.ndarray
            The peaks, which are keypoints.
        """

        all_peaks = []
        for part in range(heatmaps.shape[0]):
            map_ori = heatmaps[part].copy()
            binary = np.ascontiguousarray(map_ori > 0.05, dtype=np.uint8)

            if np.sum(binary) == 0:
                all_peaks.append([-1, -1])
                continue

            positions = np.where(binary > 0.5)
            intensities = map_ori[positions]
            mi = np.argmax(intensities)
            y, x = positions[0][mi], positions[1][mi]
            all_peaks.append([x, y])

        return np.array(all_peaks)