Spaces:

FedeFT
/

Head_Pose_Estimation_and_LAEO_computation

Runtime error

App Files Files Community

federico commited on Aug 24, 2023

Commit

9d11120

1 Parent(s): 5beb0bf

Starting commint, requirements missing

Browse files

Files changed (9) hide show

ai/detection.py +293 -0
gradio_demo.py +128 -0
laeo_per_frame/__init__.py +0 -0
laeo_per_frame/interaction_per_frame_uncertainty.py +166 -0
utils/__init__.py +0 -0
utils/hpe.py +86 -0
utils/img_util.py +676 -0
utils/labels.py +333 -0
utils/my_utils.py +1375 -0

ai/detection.py ADDED Viewed

	@@ -0,0 +1,293 @@

+from utils.my_utils import rescale_bb, rescale_key_points, delete_items_from_array_aux, enlarge_bb
+from utils.labels import coco_category_index, face_category_index
+import time
+import numpy as np
+def detect(model, image, min_score_thresh, new_old_shape):
+    """
+    Detect objects in the image running the model
+    Args:
+        :model (tensorflow.python.saved_model): The Tensorflow object detection model
+        :image (numpy.ndarray): The image that is given as input to the object detection model
+        :min_score_threshold (float): The minimum score for the detections (detections with a score lower than this value will be discarded)
+        :new_old_shape (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
+                                the second element represents the bottom padding (applied by resize_preserving_ar() function) and
+                                the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
+                                the coordinates changes that we have to do)
+    Returns:
+        :detections (dict): dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
+        :inference_time (float): inference time for one image expressed in seconds
+    """
+    image = np.array(image).astype(np.uint8)
+    input_tensor = np.expand_dims(image, axis=0)
+    start_time = time.time()
+    det = model(input_tensor)
+    end_time = time.time()
+    detections = filter_detections(det, min_score_thresh, image.shape, new_old_shape)
+    inference_time = end_time - start_time
+    return detections, inference_time
+def filter_detections(detections, min_score_thresh, shape, new_old_shape=None):
+    """
+    Filter the detections based on a minimum threshold value and modify the bounding box coordinates if the image was resized for the detection
+    Args:
+        :detections (dict): The dictionary that outputs the model
+        :min_score_thresh (float): The minimum score for the detections (detections with a score lower than this value will be discarded)
+        :shape (tuple): The shape of the image
+        :new_old_shape (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
+                                the second element represents the bottom padding (applied by resize_preserving_ar() function) and
+                                the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
+                                the coordinates changes that we have to do)
+            (default is None)
+    Returns:
+        :filtered_detections (dict): dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
+    """
+    allowed_categories = ["person"]
+    # allowed_categories = ["Face"]  # if ssd face model
+    im_height, im_width, _ = shape
+    center_net = False
+    classes = detections['detection_classes'][0].numpy().astype(np.int32)
+    boxes = detections['detection_boxes'][0].numpy()
+    scores = detections['detection_scores'][0].numpy()
+    key_points_score = None
+    key_points = None
+    if 'detection_keypoint_scores' in detections:
+        key_points_score = detections['detection_keypoint_scores'][0].numpy()
+        key_points = detections['detection_keypoints'][0].numpy()
+        center_net = True
+    sorted_index = np.argsort(scores)[::-1]
+    scores = scores[sorted_index]
+    boxes = boxes[sorted_index]
+    classes = classes[sorted_index]
+    i = 0
+    while i < 10000:
+        if scores[i] < min_score_thresh:  # sorted
+            break
+        if coco_category_index[classes[i]]["name"] in allowed_categories:
+            i += 1
+        else:
+            scores = np.delete(scores, i)
+            boxes = delete_items_from_array_aux(boxes, i)
+            classes = np.delete(classes, i)
+            if center_net:
+                key_points_score = delete_items_from_array_aux(key_points_score, i)
+                key_points = delete_items_from_array_aux(key_points, i)
+    filtered_detections = dict()
+    filtered_detections['detection_classes'] = classes[:i]
+    rescaled_boxes = (boxes[:i])
+    if new_old_shape:
+        rescale_bb(rescaled_boxes, new_old_shape, im_width, im_height)
+        if center_net:
+            rescaled_key_points = key_points[:i]
+            rescale_key_points(rescaled_key_points, new_old_shape, im_width, im_height)
+    filtered_detections['detection_boxes'] = rescaled_boxes
+    filtered_detections['detection_scores'] = scores[:i]
+    if center_net:
+        filtered_detections['detection_keypoint_scores'] = key_points_score[:i]
+        filtered_detections['detection_keypoints'] = rescaled_key_points
+    aux_centroids = []
+    for bb in boxes[:i]:  # y_min, x_min, y_max, x_max
+        centroid_x = (bb[1] + bb[3]) / 2.
+        centroid_y = (bb[0] + bb[2]) / 2.
+        aux_centroids.append([centroid_x, centroid_y])
+    filtered_detections['detection_boxes_centroid'] = np.array(aux_centroids)
+    return filtered_detections
+# def detect_head_pose_ssd_face(image, detections, model, output_image):
+#     """
+#     Detect objects in the image running the model
+#
+#     Args:
+#         :model (tensorflow.python.saved_model): The Tensorflow object detection model
+#         :image (numpy.ndarray): The image that is given as input to the object detection model
+#         :min_score_threshold (float): The minimum score for the detections (detections with a score lower than this value will be discarded)
+#         :new_old_shape (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
+#                                 the second element represents the bottom padding (applied by resize_preserving_ar() function) and
+#                                 the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
+#                                 the coordinates changes that we have to do)
+#
+#     Returns:
+#         :detections (dict): dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
+#         :inference_time (float): inference time for one image expressed in seconds
+#     """
+#
+#     im_width, im_height = image.shape[1], image.shape[0]
+#     classes = detections['detection_classes']
+#     boxes = detections['detection_boxes']
+#
+#     i = 0
+#     while i < len(classes):  # for each bb (person)
+#         [y_min_perc, x_min_perc, y_max_perc, x_max_perc] = boxes[i]
+#         (x_min, x_max, y_min, y_max) = (int(x_min_perc * im_width), int(x_max_perc * im_width), int(y_min_perc * im_height), int(y_max_perc * im_height))
+#
+#         y_min_face, x_min_face, y_max_face, x_max_face = enlarge_bb(y_min, x_min, y_max, x_max, im_width, im_height)
+#         img_face = image[y_min_face:y_max_face, x_min_face:x_max_face]
+#         img_face = cv2.cvtColor(img_face, cv2.COLOR_BGR2RGB)
+#
+#         # img_face, _ = resize_preserving_ar(img_face, (224, 224))
+#         img_face = cv2.resize(img_face, (224, 224))
+#
+#         img_face = np.expand_dims(img_face, axis=0)
+#         yaw, pitch, roll = model.get_angle(img_face)
+#
+#         cv2.rectangle(output_image, (x_min_face, y_min_face), (x_max_face, y_max_face), (0, 0, 0), 2)
+#         # cv2.imshow("aa", output_image)
+#         # cv2.waitKey(0)
+#         # to original image coordinates
+#         x_min_orig, x_max_orig, y_min_orig, y_max_orig = x_min_face, x_max_face, y_min_face, y_max_face  # x_min_face + x_min, x_max_face + x_min, y_min_face + y_min, y_max_face+y_min
+#         draw_axis(output_image, yaw, pitch, roll, tdx=(x_min_orig + x_max_orig) / 2, tdy=(y_min_orig + y_max_orig) / 2,
+#                   size=abs(x_max_face - x_min_face))
+#
+#         i += 1
+#
+#     return output_image
+#
+#
+# def detect_head_pose(image, detections, model, detector, output_image):
+#     """
+#     Detect the pose of the head given an image and the person detected
+#
+#     Args:
+#         :image (numpy.ndarray): The image that is given as input
+#         :detections (dict):  dictionary with detection scores, classes, centroids and bounding box coordinates ordered by score in descending order
+#         :model (src.ai.whenet.WHENet): model to detect the pose of the head
+#         :detector (_dlib_pybind11.cnn_face_detection_model_v1): model to detect the face
+#         :output_image (numpy.ndarray): The output image where the drawings of the head pose will be done
+#
+#     Returns:
+#         :output_image (numpy.ndarray): The output image with the drawings of the head pose
+#     """
+#
+#     im_width, im_height = image.shape[1], image.shape[0]
+#     classes = detections['detection_classes']
+#     boxes = detections['detection_boxes']
+#
+#     i = 0
+#     while i < len(classes):  # for each bb (person)
+#         [y_min_perc, x_min_perc, y_max_perc, x_max_perc] = boxes[i]
+#         (x_min, x_max, y_min, y_max) = (int(x_min_perc * im_width), int(x_max_perc * im_width), int(y_min_perc * im_height), int(y_max_perc * im_height))
+#
+#         img_person = image[y_min:y_max, x_min:x_max]
+#
+#         # start_time = time.time()
+#         # img_face = img_person[:int(img_person.shape[0]/2), :]
+#         rect_faces = detection_dlib_cnn_face(detector,  img_person)
+#         # # rect_faces = detection_dlib_face(detector,  img_person)
+#         # end_time = time.time()
+#         # # print("Inference time dlib cnn: ", end_time - start_time)
+#
+#         if len(rect_faces) > 0:  # if the detector able to find faces
+#
+#             x_min_face, y_min_face, x_max_face, y_max_face = rect_faces[0][0], rect_faces[0][1], rect_faces[0][2], rect_faces[0][3]  # rect_faces[0][1]
+#             y_min_face, x_min_face, y_max_face, x_max_face = enlarge_bb(y_min_face, x_min_face, y_max_face, x_max_face, im_width, im_height)
+#
+#             img_face = img_person[y_min_face:y_max_face, x_min_face:x_max_face]
+#
+#             img_face = cv2.cvtColor(img_face, cv2.COLOR_BGR2RGB)
+#
+#             # img_face, _ = resize_preserving_ar(img_face, (224, 224))
+#             img_face = cv2.resize(img_face, (224, 224))
+#
+#             img_face = np.expand_dims(img_face, axis=0)
+#             # start_time = time.time()
+#             yaw, pitch, roll = model.get_angle(img_face)
+#             # end_time = time.time()
+#             # print("Inference time whenet: ", end_time - start_time)
+#
+#             cv2.rectangle(output_image, (x_min_face + x_min, y_min_face + y_min), (x_max_face + x_min, y_max_face + y_min), (0, 0, 0), 2)
+#             # to original image coordinates
+#             x_min_orig, x_max_orig, y_min_orig, y_max_orig = x_min_face + x_min, x_max_face + x_min, y_min_face + y_min, y_max_face+y_min
+#             draw_axis(output_image, yaw, pitch, roll, tdx=(x_min_orig + x_max_orig) / 2, tdy=(y_min_orig + y_max_orig) / 2,
+#                       size=abs(x_max_face - x_min_face))
+#             # draw_axis(image, yaw, pitch, roll, tdx=(x_min_face + x_max_face) / 2, tdy=(y_min_face + y_max_face) / 2,
+#             #           size=abs(x_max_face - x_min_face))
+#         else:  # otherwise
+#             # print("SHAPE ", img_person.shape)
+#             # x_min_face, y_min_face, x_max_face, y_max_face = int(img_person.shape[1]/8), 0, int(img_person.shape[1]-img_person.shape[1]/9), int(img_person.shape[0]/3)
+#             # img_face = img_person[y_min_face:y_max_face, x_min_face:x_max_face]
+#             # # img_face = resize_preserving_ar(img_face, (224, 224))
+#             # img_face = cv2.resize(img_face, (224, 224))
+#             # cv2.imshow("face_rsz", img_face)
+#             # cv2.waitKey(0)
+#             # img_face = np.expand_dims(img_face, axis=0)
+#             # # cv2.rectangle(img_face, (x_min_face, y_min_face), (x_max_face, y_max_face), (0, 0, 0), 1)
+#             # yaw, pitch, roll = model.get_angle(img_face)
+#             # print("YPR", yaw, pitch, roll)
+#             # draw_axis(img_person, yaw, pitch, roll, tdx=(x_min_face+x_max_face)/2, tdy=(y_min_face+y_max_face)/2, size=abs(x_max_face-x_min_face))
+#             # cv2.imshow('output', img_person)
+#             # cv2.waitKey(0)
+#             i += 1
+#             continue
+#
+#         i += 1
+#
+#     return output_image
+# def detect_head_pose_whenet(model, person, image):
+#
+#     """
+#     Detect the head pose using the whenet model and draw on image
+#
+#     Args:
+#         :model (): Whenet model
+#         :person ():
+#         :image (numpy.ndarray): The image that is given as input
+#
+#     Returns:
+#         :
+#     """
+#
+#     faces_coordinates = person.get_faces_coordinates()[-1]
+#
+#     y_min, x_min, y_max, x_max = faces_coordinates
+#
+#     image_face = image[y_min:y_max, x_min:x_max]
+#     img_face = cv2.cvtColor(image_face, cv2.COLOR_BGR2RGB)
+#
+#     # img_face, _ = resize_preserving_ar(img_face, (224, 224))
+#     img_face = cv2.resize(img_face, (224, 224))
+#
+#     img_face = np.expand_dims(img_face, axis=0)
+#     # start_time = time.time()
+#     yaw, pitch, roll = model.get_angle(img_face)
+#
+#     # end_time = tiypme.time()
+#     # print("Inference time whenet: ", end_time - start_time)
+#     # cv2.rectangle(image, (x_min, y_min), (x_max, y_max), (0, 0, 0), 2)
+#
+#     # to original image coordinates
+#     x_min_orig, x_max_orig, y_min_orig, y_max_orig = x_min, x_max, y_min, y_max
+#     vector_norm = draw_axis(image, yaw, pitch, roll, tdx=(x_min_orig + x_max_orig) / 2, tdy=(y_min_orig + y_max_orig) / 2,
+#               size=abs(x_max - x_min))
+#
+#
+#     visualize_vector(image, [int((x_min_orig + x_max_orig) / 2), int((y_min_orig + y_max_orig) / 2)], vector_norm)
+#
+#     person.update_poses_ypr([yaw, pitch, roll])
+#     person.update_poses_vector_norm(vector_norm)
+    # cv2.imshow("", image)
+    # cv2.waitKey(0)

gradio_demo.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import gdown
+import gradio as gr
+import logging
+import os
+import cv2
+import numpy as np
+import tensorflow as tf
+from ai.detection import detect
+from laeo_per_frame.interaction_per_frame_uncertainty import LAEO_computation
+from utils.hpe import hpe, project_ypr_in2d
+from utils.img_util import resize_preserving_ar, draw_detections, percentage_to_pixel, draw_key_points_pose, \
+    visualize_vector
+def load_image(camera, ):
+    # Capture the video frame by frame
+    try:
+        ret, frame = camera.read()
+        return True, frame
+    except:
+        logging.Logger('Error reading frame')
+        return False, None
+def demo_play(img, laeo=True, rgb=False):
+    # webcam in use
+    # gpus = tf.config.list_physical_devices('GPU')
+    # img = np.array(frame)
+    if not rgb:
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+    img_resized, new_old_shape = resize_preserving_ar(img, input_shape_od_model)
+    print('inference centernet')
+    detections, elapsed_time = detect(model, img_resized, min_score_thresh,
+                                      new_old_shape)  # detection classes boxes scores
+    # probably to draw on resized
+    img_with_detections = draw_detections(img_resized, detections, max_boxes_to_draw, None, None, None)
+    # cv2.imshow("aa", img_with_detections)
+    det, kpt = percentage_to_pixel(img.shape, detections['detection_boxes'], detections['detection_scores'],
+                                   detections['detection_keypoints'], detections['detection_keypoint_scores'])
+    # center_xy, yaw, pitch, roll = head_pose_estimation(kpt, 'centernet', gaze_model=gaze_model)
+    # _________ extract hpe and print to img
+    people_list = []
+    print('inferece hpe')
+    for j, kpt_person in enumerate(kpt):
+        yaw, pitch, roll, tdx, tdy = hpe(gaze_model, kpt_person, detector='centernet')
+        # img = draw_axis_3d(yaw[0].numpy()[0], pitch[0].numpy()[0], roll[0].numpy()[0], image=img, tdx=tdx, tdy=tdy,
+        #                    size=50)
+        people_list.append({'yaw'      : yaw[0].numpy()[0],
+                            'yaw_u'    : 0,
+                            'pitch'    : pitch[0].numpy()[0],
+                            'pitch_u'  : 0,
+                            'roll'     : roll[0].numpy()[0],
+                            'roll_u'   : 0,
+                            'center_xy': [tdx, tdy]
+                            })
+    for i in range(len(det)):
+        img = draw_key_points_pose(img, kpt[i])
+    # call LAEO
+    clip_uncertainty = 0.5
+    binarize_uncertainty = False
+    if laeo:
+        interaction_matrix = LAEO_computation(people_list, clipping_value=clip_uncertainty,
+                                          clip=binarize_uncertainty)
+    else:
+        interaction_matrix = np.zeros((len(people_list), len(people_list)))
+    # coloured arrow print per person
+    # TODO coloured arrow print per person
+    for index, person in enumerate(people_list):
+        green = round((max(interaction_matrix[index, :])) * 255)
+        colour = (0, green, 0)
+        if green < 40:
+            colour = (0, 0, 255)
+        vector = project_ypr_in2d(person['yaw'], person['pitch'], person['roll'])
+        img = visualize_vector(img, person['center_xy'], vector, title="",
+                               color=colour)
+    return img
+demo = gr.Interface(
+    fn= demo_play,
+    inputs = [gr.Image(source="webcam", streaming=True),
+        gr.Checkbox(value=True, label="LAEO", info="Compute and display LAEO"),
+        gr.Checkbox(value=True, label="rgb", info="Display output on W/B image"),
+        ],
+    outputs="image",
+    live=True
+)
+if __name__ == '__main__':
+        if not os.path.exists("data"):
+        gdown.download_folder("https://drive.google.com/drive/folders/1nQ1Cb_tBEhWxy183t-mIcVH7AhAfa6NO?usp=drive_link",
+                              use_cookies=False)
+    gaze_model_path = 'data/head_pose_estimation'
+    gaze_model = tf.keras.models.load_model(gaze_model_path, custom_objects={"tf": tf})
+    path_to_model = 'data/keypoint_detector/centernet_hg104_512x512_kpts_coco17_tpu-32'
+    model = tf.saved_model.load(os.path.join(path_to_model, 'saved_model'))
+    input_shape_od_model = (512, 512)
+    # params
+    min_score_thresh, max_boxes_to_draw, min_distance = .45, 50, 1.5
+    print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
+    demo.launch()

laeo_per_frame/__init__.py ADDED Viewed

File without changes

laeo_per_frame/interaction_per_frame_uncertainty.py ADDED Viewed

	@@ -0,0 +1,166 @@

+'''It calculates interaction frame per frame with not temporal consistency.
+    It also use the uncertainty to enlarge the visual cone.'''
+import re
+from math import sin, cos
+import numpy as np
+def project_ypr_in2d(yaw, pitch, roll):
+    """ Project yaw pitch roll on image plane. Result is NOT normalised.
+    :param yaw:
+    :param pitch:
+    :param roll:
+    :return:
+    """
+    pitch = pitch * np.pi / 180
+    yaw = -(yaw * np.pi / 180)
+    roll = roll * np.pi / 180
+    x3 = (sin(yaw))
+    y3 = (-cos(yaw) * sin(pitch))
+    # normalize the components
+    length = np.sqrt(x3 ** 2 + y3 ** 2)
+    # return [x3 / length, y3 / length]
+    return [x3, y3]
+def compute_interaction_cosine(head_position, gaze_direction, uncertainty, target, visual_cone=True):
+    """Computes the interaction between two people using the angle of view.
+    The interaction in measured as the cosine of the angle formed by the line from person A to B
+    and the gaze direction of person A.
+    Reference system of zero degree:
+    :param head_position: position of the head of person A
+    :param gaze_direction: gaze direction of the head of person A
+    :param target: position of head of person B
+    :param yaw:
+    :param pitch:
+    :param roll:
+    :param visual_cone: (default) True, if False gaze is a line, otherwise it is a cone (more like humans)
+    :return: float or double describing the quantity of interaction
+    """
+    if np.array_equal(head_position, target):
+        return 0  # or -1
+    else:
+        cone_aperture = None
+        if 0 <= uncertainty < 0.4:
+            cone_aperture = np.deg2rad(3)
+        elif 0.4 <= uncertainty <= 0.6:
+            cone_aperture = np.deg2rad(6)
+        elif 0.6 < uncertainty <= 1:
+            cone_aperture = np.deg2rad(9)
+        # direction from observer to target
+        _direction_ = np.arctan2((target[1] - head_position[1]), (target[0] - head_position[0]))
+        _direction_gaze_ = np.arctan2(gaze_direction[1], gaze_direction[0])
+        difference = _direction_ - _direction_gaze_  # radians
+        if visual_cone and (0 < difference < cone_aperture):
+            difference = 0
+        # difference of the line joining observer -> target with the gazing direction,
+        val = np.cos(difference)
+        if val < 0:
+            return 0
+        else:
+            return val
+def calculate_uncertainty(yaw_1, pitch_1, roll_1, clipping_value, clip=True):
+    # res_1 = abs((pitch_1 + yaw_1 + roll_1) / 3)
+    res_1 = abs((pitch_1 + yaw_1) / 2)
+    if clip:
+        # it binarize the uncertainty
+        if res_1 > clipping_value:
+            res_1 = clipping_value
+        else:
+            res_1 = 0
+    else:
+        # it leaves uncertainty untouched except for upper bound
+        if res_1 > clipping_value:
+            res_1 = clipping_value
+        elif res_1 < 0:
+            res_1 = 0
+    # normalize
+    res_1 = res_1 / clipping_value
+    # assert res_1 in [0, 1], 'uncertainty not binarized'
+    return res_1
+def atoi(text):
+    return int(text) if text.isdigit() else text
+def natural_keys(text):
+    '''
+           alist.sort(key=natural_keys) sorts in human order
+           http://nedbatchelder.com/blog/200712/human_sorting.html
+           (See Toothy's implementation in the comments)
+           '''
+    return [atoi(c) for c in re.split(r'(\d+)', text)]
+def delete_file_if_exist(*file_path):
+    for f in file_path:
+        if f.is_file():  # if exist already, replace
+            f.unlink(missing_ok=True)
+def LAEO_computation(people_list, clipping_value, clip):
+    #TODO here correct the average because -> 0+0.99-> LAEO, already corrected a bit
+    people_in_frame = len(people_list)
+    # create empty matrix with one entry per person in frame
+    matrix = np.empty((people_in_frame, people_in_frame))
+    interaction_matrix = np.zeros((people_in_frame, people_in_frame))
+    uncertainty_matrix = np.zeros((people_in_frame, people_in_frame))
+    norm_xy_all = []  # it will contains vector for printing
+    for subject in range(people_in_frame):
+        norm_xy = project_ypr_in2d(people_list[subject]['yaw'], people_list[subject]['pitch'],
+                                   people_list[subject]['roll'])
+        norm_xy_all.append(norm_xy)
+        uncertainty_1 = calculate_uncertainty(people_list[subject]['yaw_u'],
+                                              people_list[subject]['pitch_u'],
+                                              people_list[subject]['roll_u'], clipping_value=clipping_value,
+                                              clip=clip)
+        for object in range(people_in_frame):
+            uncertainty_2 = calculate_uncertainty(people_list[object]['yaw_u'],
+                                                  people_list[object]['pitch_u'],
+                                                  people_list[object]['roll_u'], clipping_value=clipping_value,
+                                                  clip=clip)
+            v = compute_interaction_cosine(people_list[subject]['center_xy'], norm_xy, uncertainty_1,
+                                           people_list[object]['center_xy'])
+            matrix[subject][object] = v
+            uncertainty_matrix[subject][object] = uncertainty_1
+            # uncertainty_matrix[object][subject] = uncertainty_2
+    # matrix is completed
+    for subject in range(people_in_frame):
+        for object in range(people_in_frame):
+            # take average of previous matrix
+            if matrix[subject][object] > 0.3 and matrix[object][subject] > 0.3:
+                v = (matrix[subject][object] + matrix[object][subject]) / 2
+                interaction_matrix[subject][object] = v
+            else:
+                interaction_matrix[subject][object] = 0
+    return interaction_matrix
+if __name__ == '__main__':
+    clip_uncertainty = 0
+    binarize_uncertainty = True
+    yaw, pitch, roll, tdx, tdy = 0, 0, 0, 0, 0
+    my_list = [{'yaw': yaw,
+                'pitch': pitch,
+                'roll': roll,
+                'center_xy': [tdx, tdy]}]
+    _ = LAEO_computation(my_list, clipping_value=clip_uncertainty, clip=binarize_uncertainty)

utils/__init__.py ADDED Viewed

File without changes

utils/hpe.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import math
+import os
+import numpy as np
+import tensorflow as tf
+from utils.my_utils import normalize_wrt_maximum_distance_point, retrieve_interest_points
+def head_pose_estimation(kpt, detector, gaze_model, id_list=None):
+    fps, shape = 20, (1280, 720)
+    yaw_list, pitch_list, roll_list, yaw_u_list, pitch_u_list, roll_u_list = [], [], [], [], [], []
+    center_xy = []
+    for j, kpt_person in enumerate(kpt):
+        # TODO here change order if openpose
+        face_kpt = retrieve_interest_points(kpt_person, detector=detector)
+        tdx = np.mean([face_kpt[k] for k in range(0, 15, 3) if face_kpt[k] != 0.0])
+        tdy = np.mean([face_kpt[k + 1] for k in range(0, 15, 3) if face_kpt[k + 1] != 0.0])
+        if math.isnan(tdx) or math.isnan(tdy):
+            tdx = -1
+            tdy = -1
+        center_xy.append([tdx, tdy])
+        face_kpt_normalized = np.array(normalize_wrt_maximum_distance_point(face_kpt))
+        # print(type(face_kpt_normalized), face_kpt_normalized)
+        aux = tf.cast(np.expand_dims(face_kpt_normalized, 0), tf.float32)
+        yaw, pitch, roll = gaze_model(aux, training=False)
+        # print(yaw[0].numpy()[0], pitch, roll)
+        yaw_list.append(yaw[0].numpy()[0])
+        pitch_list.append(pitch[0].numpy()[0])
+        roll_list.append(roll[0].numpy()[0])
+        yaw_u_list.append(yaw[0].numpy()[1])
+        pitch_u_list.append(pitch[0].numpy()[1])
+        roll_u_list.append(roll[0].numpy()[1])
+        # print(id_lists[j])
+        # print('yaw: ', yaw[0].numpy()[0], 'yaw unc: ', yaw[0].numpy()[1], 'pitch: ', pitch[0].numpy()[0],
+        #       'pitch unc: ', pitch[0].numpy()[1], 'roll: ', roll[0].numpy()[0], 'roll unc: ', roll[0].numpy()[1])
+        # draw_axis(yaw.numpy(), pitch.numpy(), roll.numpy(), im_pose, tdx, tdy)
+    return center_xy, yaw_list, pitch_list, roll_list
+def hpe(gaze_model, kpt_person, detector):
+    # TODO here change order if openpose
+    face_kpt = retrieve_interest_points(kpt_person, detector=detector)
+    tdx = np.mean([face_kpt[k] for k in range(0, 15, 3) if face_kpt[k] != 0.0])
+    tdy = np.mean([face_kpt[k + 1] for k in range(0, 15, 3) if face_kpt[k + 1] != 0.0])
+    if math.isnan(tdx) or math.isnan(tdy):
+        tdx = -1
+        tdy = -1
+    # center_xy.append([tdx, tdy])
+    face_kpt_normalized = np.array(normalize_wrt_maximum_distance_point(face_kpt))
+    # print(type(face_kpt_normalized), face_kpt_normalized)
+    aux = tf.cast(np.expand_dims(face_kpt_normalized, 0), tf.float32)
+    yaw, pitch, roll = gaze_model(aux, training=False)
+    return yaw, pitch, roll, tdx, tdy
+def project_ypr_in2d(yaw, pitch, roll):
+    """ Project yaw pitch roll on image plane. Result is NOT normalised.
+    :param yaw:
+    :param pitch:
+    :param roll:
+    :return:
+    """
+    pitch = pitch * np.pi / 180
+    yaw = -(yaw * np.pi / 180)
+    roll = roll * np.pi / 180
+    x3 = (math.sin(yaw))
+    y3 = (-math.cos(yaw) * math.sin(pitch))
+    # normalize the components
+    length = np.sqrt(x3**2 + y3**2)
+    return [x3, y3]

utils/img_util.py ADDED Viewed

	@@ -0,0 +1,676 @@

+import cv2
+import os
+import json
+import numpy as np
+from math import cos, sin, pi
+from utils.labels import coco_category_index, rgb_colors, color_pose, color_pose_normalized, pose_id_part, face_category_index, body_parts_openpose, body_parts, face_points, face_points_openpose, pose_id_part_zedcam, face_points_zedcam, body_parts_zedcam
+# from src.utils.my_utils import fit_plane_least_square  # , retrieve_line_from_two_points
+def percentage_to_pixel(shape, bb_boxes, bb_boxes_scores, key_points=None, key_points_score=None):
+    """
+    Convert the detections from percentage to pixels coordinates; it works both for the bounding boxes and for the key points if passed
+    Args:
+        :img_shape (tuple): the shape of the image
+        :bb_boxes (numpy.ndarray): list of list each one representing the bounding box coordinates expressed in percentage [y_min_perc, x_min_perc, y_max_perc, x_max_perc]
+        :bb_boxes_scores (numpy.ndarray): list of score for each bounding box in range [0, 1]
+        :key_points (numpy.ndarray): list of list of list each one representing the key points coordinates expressed in percentage [y_perc, x_perc]
+        :key_points_score (numpy.ndarray): list of list each one representing the score associated to each key point in range [0, 1]
+    Returns:
+        :det (numpy.ndarray): list of lists each one representing the bounding box coordinates in pixels and the score associated to each bounding box [x_min, y_min, x_max, y_max, score]
+        :kpt (list): list of lists each one representing the key points detected in pixels and the score associated to each point [x, y, score]
+    """
+    im_width, im_height = shape[1], shape[0]
+    det, kpt = [], []
+    if key_points is not None:
+        key_points = key_points
+        key_points_score = key_points_score
+    for i, _ in enumerate(bb_boxes):
+        y_min, x_min, y_max, x_max = bb_boxes[i]
+        x_min_rescaled, x_max_rescaled, y_min_rescaled, y_max_rescaled = x_min * im_width, x_max * im_width, y_min * im_height, y_max * im_height
+        det.append([int(x_min_rescaled), int(y_min_rescaled), int(x_max_rescaled), int(y_max_rescaled), bb_boxes_scores[i]])
+        if key_points is not None:
+            aux_list = []
+            for n, key_point in enumerate(key_points[i]):  # y x
+                aux = [int(key_point[0] * im_height), int(key_point[1] * im_width), key_points_score[i][n]]
+                aux_list.append(aux)
+            kpt.append(aux_list)
+    det = np.array(det)
+    return det, kpt
+def draw_detections(image, detections, max_boxes_to_draw, violate=None, couple_points=None, draw_class_score=False):
+    """
+    Given an image and a dictionary of detections this function return the image with the drawings of the bounding boxes (with violations information if specified)
+    Args:
+        :img (numpy.ndarray): The image that is given as input to the object detection model
+        :detections (dict): The dictionary with the detections information (detection_classes, detection_boxes, detection_scores,
+            detection_keypoint_scores, detection_keypoints, detection_boxes_centroid)
+        :max_boxes_to_draw (int): The maximum number of bounding boxes to draw
+        :violate (set): The indexes of detections (sorted) that violate the minimum distance computed by my_utils.compute_distance function
+            (default is None)
+        :couple_points (list): A list of tuples each one containing the couple of indexes that violate the minimum distance (used to draw lines in
+            between to bounding boxes)
+            (default is None)
+        :draw_class_score (bool): If this value is set to True, in the returned image will be drawn the category and the score over each bounding box
+            (default is False)
+    Returns:
+        :img_with_drawings (numpy.ndarray): The image with the bounding boxes of each detected objects and optionally with the situations of violation
+    """
+    im_width, im_height = image.shape[1], image.shape[0]
+    img_with_drawings = image.copy()
+    classes = detections['detection_classes']
+    boxes = detections['detection_boxes']
+    scores = detections['detection_scores']
+    centroids = detections['detection_boxes_centroid']
+    red = (0, 0, 255)
+    i = 0
+    while i < max_boxes_to_draw and i < len(classes):
+        [y_min, x_min, y_max, x_max] = boxes[i]
+        (x_min_rescaled, x_max_rescaled, y_min_rescaled, y_max_rescaled) = (x_min * im_width, x_max * im_width, y_min * im_height, y_max * im_height)
+        start_point, end_point = (int(x_max_rescaled), int(y_max_rescaled)), (int(x_min_rescaled), int(y_min_rescaled))
+        # [cx, cy] = centroids[i]
+        # (cx_rescaled, cy_rescaled) = (int(cx * im_width), int(cy * im_height))
+        color = rgb_colors[classes[i]]
+        if violate:
+            if i in violate:
+                color = red
+        cv2.rectangle(img_with_drawings, start_point, end_point, color, 2)
+        # cv2.circle(img_with_drawings, (cx_rescaled, cy_rescaled), 2, color, 2)
+        if draw_class_score:
+            cv2.rectangle(img_with_drawings, end_point, (start_point[0], end_point[1] - 25), rgb_colors[classes[i]], -1)
+            text = face_category_index[classes[i]]['name'] + " {:.2f}".format(scores[i])
+            cv2.putText(img_with_drawings, text, end_point, cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2, cv2.LINE_AA)
+        i += 1
+    if couple_points and len(centroids) > 1:
+        for j in range(len(couple_points)):
+            pt1 = centroids[couple_points[j][0]][0], centroids[couple_points[j][0]][1]
+            pt2 = centroids[couple_points[j][1]][0], centroids[couple_points[j][1]][1]
+            cv2.line(img_with_drawings, pt1, pt2, red, 2)
+    text_location = (int(image.shape[1]-image.shape[1]/4), int(image.shape[0]/17))
+    font_scale = 0.8 * 1 / (640/image.shape[0])
+    thickness = int(2 * (image.shape[0]/640))
+    cv2.putText(img_with_drawings, "# of people : "+str(i), text_location, cv2.FONT_HERSHEY_SIMPLEX, font_scale, red, thickness, cv2.LINE_AA)
+    return img_with_drawings
+def resize_preserving_ar(image, new_shape):
+    """
+    Resize and pad the input image in order to make it usable by an object detection model (e.g. mobilenet 640x640)
+    Args:
+        :image (numpy.ndarray): The image that will be resized and padded
+        :new_shape (tuple): The shape of the image output (height, width)
+    Returns:
+        :res_image (numpy.ndarray): The image modified to have the new shape
+    """
+    (old_height, old_width, _) = image.shape
+    (new_height, new_width) = new_shape
+    if old_height != old_width:  # rectangle
+        ratio_h, ratio_w = new_height / old_height, new_width / old_width
+        if ratio_h > ratio_w:
+            dim = (new_width, int(old_height * ratio_w))
+            img = cv2.resize(image, dim, interpolation=cv2.INTER_CUBIC)
+            bottom_padding = int(new_height - int(old_height * ratio_w)) if int(new_height - int(old_height * ratio_w)) >= 0 else 0
+            img = cv2.copyMakeBorder(img, 0, bottom_padding, 0, 0, cv2.BORDER_CONSTANT)
+            pad = (0, bottom_padding, dim)
+        else:
+            dim = (int(old_width * ratio_h), new_height)
+            img = cv2.resize(image, dim, interpolation=cv2.INTER_CUBIC)
+            right_padding = int(new_width - int(old_width * ratio_h)) if int(new_width - int(old_width * ratio_h)) >= 0 else 0
+            img = cv2.copyMakeBorder(img, 0, 0, 0, right_padding, cv2.BORDER_CONSTANT)
+            pad = (right_padding, 0, dim)
+    else:  # square
+        img = cv2.resize(image, new_shape, new_height, new_width)
+        pad = (0, 0, (new_height, new_width))
+    return img, pad
+def resize_and_padding_preserving_ar(image, new_shape):
+    """ Resize and pad the input image in order to make it usable by a pose model (e.g. mobilenet-posenet takes as input 257x257 images)
+    Args:
+        :image (numpy.ndarray): The image that will be resized and padded
+        :new_shape (tuple): The shape of the image output
+    Returns:
+        :res_image (numpy.ndarray): The image modified to have the new shape
+    """
+    (old_height, old_width, _) = image.shape
+    (new_height, new_width) = new_shape
+    if old_height != old_width:  # rectangle
+        ratio_h, ratio_w = new_height / old_height, new_width / old_width
+        # print(img.shape, "\nRATIO: ", ratio_h, ratio_w)
+        if ratio_h < ratio_w:
+            ratio = new_shape[0] / old_height
+            dim = (int(old_width * ratio), new_width)
+            img = cv2.resize(image, dim)
+            right_padding = int(new_width - img.shape[1]) if int(new_width - img.shape[1]) >= 0 else 0
+            img = cv2.copyMakeBorder(img, 0, 0, 0, right_padding, cv2.BORDER_CONSTANT)
+        else:
+            ratio = new_shape[1] / old_width
+            dim = (new_height, int(old_height * ratio))
+            img = cv2.resize(image, dim)
+            bottom_padding = int(new_height - img.shape[0]) if int(new_width - img.shape[0]) >= 0 else 0
+            img = cv2.copyMakeBorder(img, 0, bottom_padding, 0, 0, cv2.BORDER_CONSTANT)
+    else:  # square
+        img = cv2.resize(image, new_shape)
+    img = img.astype(np.float32) / 255.
+    res_image = np.expand_dims(img, 0)
+    return res_image
+def draw_axis(yaw, pitch, roll, image=None, tdx=None, tdy=None, size=50):
+    """
+    Draw yaw pitch and roll axis on the image if passed as input and returns the vector containing the projection of the vector on the image plane
+    Args:
+        :yaw (float): value that represents the yaw rotation of the face
+        :pitch (float): value that represents the pitch rotation of the face
+        :roll (float): value that represents the roll rotation of the face
+        :image (numpy.ndarray): The image where the three vector will be printed
+            (default is None)
+        :tdx (float64): x coordinate from where the vector drawing start expressed in pixel coordinates
+            (default is None)
+        :tdy (float64): y coordinate from where the vector drawing start expressed in pixel coordinates
+            (default is None)
+        :size (int): value that will be multiplied to each x, y and z value that enlarge the "vector drawing"
+            (default is 50)
+    Returns:
+        :list_projection_xy (list): list containing the unit vector [x, y, z]
+    """
+    pitch = pitch * np.pi / 180
+    yaw = -(yaw * np.pi / 180)
+    roll = roll * np.pi / 180
+    if tdx != None and tdy != None:
+        tdx = tdx
+        tdy = tdy
+    else:
+        height, width = image.shape[:2]
+        tdx = width / 2
+        tdy = height / 2
+    # PROJECT 3D TO 2D XY plane (Z = 0)
+    # X-Axis pointing to right. drawn in red
+    x1 = size * (cos(yaw) * cos(roll)) + tdx
+    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
+    # Y-Axis | drawn in green
+    x2 = size * (-cos(yaw) * sin(roll)) + tdx
+    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
+    # Z-Axis (out of the screen) drawn in yellow #it was blue
+    x3 = size * (sin(yaw)) + tdx
+    y3 = size * (-cos(yaw) * sin(pitch)) + tdy
+    z3 = size * (cos(pitch) * cos(yaw)) + tdy
+    if image is not None:
+        cv2.line(image, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 2) # BGR->red
+        cv2.line(image, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 2) # BGR->green
+        cv2.line(image, (int(tdx), int(tdy)), (int(x3), int(y3)), (0, 255, 255), 2) # BGR->blue
+    list_projection_xy = [sin(yaw), -cos(yaw) * sin(pitch)]
+    return list_projection_xy
+def visualize_vector(image, center, unit_vector, title="", color=(0, 0, 255)):
+    """
+    Draw the projected vector on the image plane and return the image
+    Args:
+        :image (numpy.ndarray): The image where the vector will be printed
+        :center (list): x, y coordinates in pixels of the starting point from where the vector is drawn
+        :unit_vector (list): vector of the gaze in the form [gx, gy]
+        :title (string): title displayed in the imshow function
+            (default is "")
+        :color (tuple): color value of the vector drawn on the image
+            (default is (0, 0, 255))
+    Returns:
+        :result (numpy.ndarray): The image with the vectors drawn
+    """
+    unit_vector_draw = [unit_vector[0] * image.shape[0]*0.15, unit_vector[1] * image.shape[0]*0.15]
+    point = [center[0] + unit_vector_draw[0], center[1] + unit_vector_draw[1]]
+    result = cv2.arrowedLine(image, (int(center[0]), int(center[1])), (int(point[0]), int(point[1])), color, thickness=4, tipLength=0.3)
+    return result
+def draw_key_points_pose(image, kpt, openpose=False):
+    """
+    Draw the key points and the lines connecting them; it expects the output of CenterNet (not OpenPose format)
+    Args:
+        :image (numpy.ndarray): The image where the lines connecting the key points will be printed
+        :kpt (list): list of lists of points detected for each person [[x1, y1, c1], [x2, y2, c2],...] where x and y represent the coordinates of each
+            point while c represents the confidence
+    Returns:
+        :img (numpy.ndarray): The image with the drawings of lines and key points
+    """
+    parts = body_parts_openpose if openpose else body_parts
+    kpt_score = None
+    threshold = 0.4
+    overlay = image.copy()
+    face_pts = face_points_openpose if openpose else face_points
+    for j in range(len(kpt)):
+        # 0 nose, 1/2 left/right eye, 3/4 left/right ear
+        color = color_pose["blue"]
+        if j == face_pts[0]:
+            color = color_pose["purple"]# naso
+        if j == face_pts[1]:
+            color = color_pose["green"]#["light_pink"]#Leye
+        if j == face_pts[2]:
+            color = color_pose["dark_pink"]#Reye
+        if j == face_pts[3]:
+            color = color_pose["light_orange"]#LEar
+        if j == face_pts[4]:
+            color = color_pose["yellow"]# REar
+        if openpose:
+            cv2.circle(image, (int(kpt[j][0]), int(kpt[j][1])), 1, color, 2)
+        else:
+            cv2.circle(image, (int(kpt[j][1]), int(kpt[j][0])), 1, color, 2)
+        # cv2.putText(img, pose_id_part[i], (int(kpts[j][i, 1] * img.shape[1]), int(kpts[j][i, 0] * img.shape[0])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1, cv2.LINE_AA)
+    for part in parts:
+        if int(kpt[part[0]][1]) != 0 and int(kpt[part[0]][0]) != 0 and int(kpt[part[1]][1]) != 0 and int(
+                kpt[part[1]][0]) != 0:
+            if openpose:
+                cv2.line(overlay, (int(kpt[part[0]][0]), int(kpt[part[0]][1])), (int(kpt[part[1]][0]), int(kpt[part[1]][1])), (255, 255, 255), 2)
+            else:
+                cv2.line(overlay, (int(kpt[part[0]][1]), int(kpt[part[0]][0])),
+                         (int(kpt[part[1]][1]), int(kpt[part[1]][0])), (255, 255, 255), 2)
+    alpha = 0.4
+    image = cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)
+    return image
+def draw_key_points_pose_zedcam(image, kpt, openpose=True):
+    """
+    Draw the key points and the lines connecting them; it expects the output of CenterNet (not OpenPose format)
+    Args:
+        :image (numpy.ndarray): The image where the lines connecting the key points will be printed
+        :kpt (list): list of lists of points detected for each person [[x1, y1, c1], [x2, y2, c2],...] where x and y represent the coordinates of each
+            point while c represents the confidence
+    Returns:
+        :img (numpy.ndarray): The image with the drawings of lines and key points
+    """
+    parts = body_parts_zedcam
+    kpt_score = None
+    threshold = 0.4
+    overlay = image.copy()
+    face_pts = face_points_zedcam
+    for j in range(len(kpt)):
+        # 0 nose, 1/2 left/right eye, 3/4 left/right ear
+        color = color_pose["blue"]
+        if j == face_pts[0]:  # naso
+            color = color_pose["purple"]
+        if j == face_pts[1]:
+            color = color_pose["light_pink"]
+        if j == face_pts[2]:
+            color = color_pose["dark_pink"]
+        if j == face_pts[3]:
+            color = color_pose["light_orange"]
+        if j == face_pts[4]:
+            color = color_pose["dark_orange"]
+        if openpose:
+            cv2.circle(image, (int(kpt[j][0]), int(kpt[j][1])), 1, color, 2)
+        else:
+            cv2.circle(image, (int(kpt[j][1]), int(kpt[j][0])), 1, color, 2)
+        # cv2.putText(img, pose_id_part[i], (int(kpts[j][i, 1] * img.shape[1]), int(kpts[j][i, 0] * img.shape[0])), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 1, cv2.LINE_AA)
+    for part in parts:
+        if int(kpt[part[0]][1]) != 0 and int(kpt[part[0]][0]) != 0 and int(kpt[part[1]][1]) != 0 and int(
+                kpt[part[1]][0]) != 0:
+            if openpose:
+                cv2.line(overlay, (int(kpt[part[0]][0]), int(kpt[part[0]][1])), (int(kpt[part[1]][0]), int(kpt[part[1]][1])), (255, 255, 255), 2)
+            else:
+                cv2.line(overlay, (int(kpt[part[0]][1]), int(kpt[part[0]][0])),
+                         (int(kpt[part[1]][1]), int(kpt[part[1]][0])), (255, 255, 255), 2)
+    alpha = 0.4
+    image = cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)
+    return image
+def plot_3d_points(list_points):
+    """
+    Plot points in 3D
+    Args:
+        :list_points: A list of lists representing the points; each point has (x, y, z) coordinates represented by the first, second and third element of each list
+    Returns:
+    """
+    if list_points == []:
+        return
+    import matplotlib.pyplot as plt
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')
+    for point in list_points:
+        ax.scatter(point[0], point[1], point[2], c=np.array(0), marker='o')
+    ax.set_xlabel('x')
+    ax.set_ylabel('y')
+    ax.set_zlabel('z')
+    plt.show()
+    return
+def draw_on_img(image, center, id_, res):
+    """
+    Draw arrow illustrating gaze direction on the image
+    Args:
+        :image (numpy.ndarray): The image where the vector will be printed
+        :center (list): x, y coordinates in pixels of the starting point from where the vector is drawn
+        :id_ (string): title displayed in the imshow function
+            (default is "")
+        :res (list): vector of the gaze in the form [gx, gy]
+    Returns:
+        :img_arrow (numpy.ndarray): The image with the vector drawn
+    """
+    res[0] *= image.shape[0]
+    res[1] *= image.shape[1]
+    norm1 = res / np.linalg.norm(res)
+    norm_aux = [norm1[0], norm1[1]]  # normalized vectors
+    norm1[0] *= image.shape[0]*0.15
+    norm1[1] *= image.shape[0]*0.15
+    point = center + norm1
+    img_arrow = cv2.arrowedLine(image.copy(), (int(center[1]), int(center[0])), (int(point[1]), int(point[0])), (0, 0, 255), thickness=2, tipLength=0.2)
+    return img_arrow, [norm_aux, center]
+def confusion_matrix(conf_matrix, target_names=None, title="", cmap=None):
+    """
+    Create the image of the confusion matrix given a matrix as input
+    Args:
+        :conf_matrix (list): list of lists that represent an MxM matrix e.g. [[v11, v12, v13], [v21, v22, v23], [v31, v32, v33]]
+        :target_names (list): list of target name of dimension M e.g. [[label1, label2, label3]]
+            (default is None)
+        :title (string): title string to be printed in the confusion matrix
+            (default is "")
+        :cmap (string): colormap that will be used by the confusion matrix
+            (default is None)
+    Returns:
+        :gbr (numpy.ndarray): The image where the lines connecting the key points will be printed
+    """
+    from laeo_per_frame.interaction_per_frame_uncertainty import LAEO_computation
+    import matplotlib.pyplot as plt
+    if not conf_matrix:
+        return []
+    # if cmap is None:
+    #     cmap = plt.get_cmap('Blues')
+    plt.rcParams['xtick.bottom'] = plt.rcParams['xtick.labelbottom'] = False
+    plt.rcParams['xtick.top'] = plt.rcParams['xtick.labeltop'] = True
+    fig, ax = plt.subplots(figsize=(6, 4))  # 2, 2, figsize=(6, 4))
+    cax = ax.imshow(conf_matrix)
+    for i in range(len(conf_matrix[0])):
+        for j in range(len(conf_matrix[1])):
+            ax.text(j, i, str(np.around(conf_matrix[i][j], 3)), va='center', ha='center', color="black")
+    if target_names is not None:
+        ax.set_xticks(np.arange(len(target_names)))
+        ax.set_yticks(np.arange(len(target_names)))
+        ax.set_xticklabels(target_names)
+        ax.set_yticklabels(target_names)
+    plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
+    fig.tight_layout()
+    fig.colorbar(cax)
+    # plt.show()
+    fig.canvas.draw()
+    width, height = fig.get_size_inches() * fig.get_dpi()
+    aux_img = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
+    gbr = aux_img[..., [2, 0, 1]].copy()
+    # cv2.imshow("1312", gbr)
+    # cv2.waitKey(0)
+    return gbr
+def join_images(image1, image2):
+    """
+    Join two images vertically into a new image with the height that is the maximum height of the two images passed as input and the width that is
+    the sum of the widths of the two images passed as input
+    Args:
+        :image1 (numpy.ndarray): The image that will be in the left part of the joined images
+        :image2 (numpy.ndarray): The image that will be in the right part of the joined images
+    Returns:
+        :joined_image (numpy.ndarray): The image that is the results of the merge of the two images passed as input
+    """
+    if type(image1) == list or type(image2) == list:
+        return None
+    image1_width, image1_height, image2_width, image2_height = image1.shape[1], image1.shape[0], image2.shape[1], image2.shape[0]
+    new_shape_height = max(image1_height, image2_height)
+    new_shape = (new_shape_height, image1_width + image2_width, 3)
+    joined_image = np.zeros(new_shape, dtype=np.uint8)
+    joined_image[:image1_height, :image1_width, :] = image1
+    joined_image[:image2_height, image1_width:, :] = image2
+    cv2.imshow("", cv2.resize(joined_image, (1200, 500)))
+    cv2.waitKey(0)
+    return joined_image
+def draw_axis_from_json(img, json_file):
+    if os.path.isfile(json_file):
+        cv2.imshow("", img)
+        cv2.waitKey(0)
+        with open(json_file) as f:
+            data = json.load(f)
+            print(data)
+            aux = data['people']
+            for elem in aux:
+                draw_axis(elem['yaw'][0], elem['pitch'][0], elem['roll'][0], img, elem['center_xy'][0], elem['center_xy'][1])
+        cv2.imshow("", img)
+        cv2.waitKey(0)
+    return
+def points_on_circumference(center=(0, 0), r=50, n=100):
+    return [(center[0] + (cos(2 * pi / n * x) * r), center[1] + (sin(2 * pi / n * x) * r)) for x in range(0, n + 1)]
+def draw_cones(yaw, pitch, roll, unc_yaw, unc_pitch, unc_roll, image=None, tdx=None, tdy=None, size=300):
+    """
+    Draw yaw pitch and roll axis on the image if passed as input and returns the vector containing the projection of the vector on the image plane
+    Args:
+        :yaw (float): value that represents the yaw rotation of the face
+        :pitch (float): value that represents the pitch rotation of the face
+        :roll (float): value that represents the roll rotation of the face
+        :image (numpy.ndarray): The image where the three vector will be printed
+            (default is None)
+        :tdx (float64): x coordinate from where the vector drawing start expressed in pixel coordinates
+            (default is None)
+        :tdy (float64): y coordinate from where the vector drawing start expressed in pixel coordinates
+            (default is None)
+        :size (int): value that will be multiplied to each x, y and z value that enlarge the "vector drawing"
+            (default is 50)
+    Returns:
+        :list_projection_xy (list): list containing the unit vector [x, y, z]
+    """
+    pitch = pitch * np.pi / 180
+    yaw = -(yaw * np.pi / 180)
+    roll = roll * np.pi / 180
+    if tdx != None and tdy != None:
+        tdx = tdx
+        tdy = tdy
+    else:
+        height, width = image.shape[:2]
+        tdx = width / 2
+        tdy = height / 2
+    # PROJECT 3D TO 2D XY plane (Z = 0)
+    # X-Axis pointing to right. drawn in red
+    x1 = size * (cos(yaw) * cos(roll)) + tdx
+    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
+    # Y-Axis | drawn in green
+    x2 = size * (-cos(yaw) * sin(roll)) + tdx
+    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
+    # Z-Axis (out of the screen) drawn in blue
+    x3 = size * (sin(yaw)) + tdx
+    y3 = size * (-cos(yaw) * sin(pitch)) + tdy
+    z3 = size * (cos(pitch) * cos(yaw)) + tdy
+    unc_mean = (unc_yaw + unc_pitch + unc_roll) / 3
+    radius = 12 * unc_mean
+    overlay = image.copy()
+    if image is not None:
+        # cv2.line(image, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 2)
+        # cv2.line(image, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 2)
+        cv2.line(image, (int(tdx), int(tdy)), (int(x3), int(y3)), (255, 0, 0), 2)
+        points = points_on_circumference((int(x3), int(y3)), radius, 400)
+        for point in points:
+            cv2.line(image, (int(tdx), int(tdy)), (int(point[0]), int(point[1])), (255, 0, 0), 2)
+        # cv2.circle(image, (int(x3), int(y3)), int(radius), (255, 0, 0), 2)
+    alpha = 0.5
+    image = cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)
+    # cv2.imshow("cc", image)
+    # cv2.waitKey(0)
+    # exit()
+    list_projection_xy = [sin(yaw), -cos(yaw) * sin(pitch)]
+    return list_projection_xy, image
+def draw_axis_3d(yaw, pitch, roll, image=None, tdx=None, tdy=None, size=50, yaw_uncertainty=-1, pitch_uncertainty=-1, roll_uncertainty=-1):
+    """
+    Draw yaw pitch and roll axis on the image if passed as input and returns the vector containing the projection of the vector on the image plane
+    Args:
+        :yaw (float): value that represents the yaw rotation of the face
+        :pitch (float): value that represents the pitch rotation of the face
+        :roll (float): value that represents the roll rotation of the face
+        :image (numpy.ndarray): The image where the three vector will be printed
+            (default is None)
+        :tdx (float64): x coordinate from where the vector drawing start expressed in pixel coordinates
+            (default is None)
+        :tdy (float64): y coordinate from where the vector drawing start expressed in pixel coordinates
+            (default is None)
+        :size (int): value that will be multiplied to each x, y and z value that enlarge the "vector drawing"
+            (default is 50)
+    Returns:
+        :list_projection_xy (list): list containing the unit vector [x, y, z]
+    """
+    pitch = pitch * np.pi / 180
+    yaw = -(yaw * np.pi / 180)
+    roll = roll * np.pi / 180
+    # print(yaw, pitch, roll)
+    if tdx != None and tdy != None:
+        tdx = tdx
+        tdy = tdy
+    else:
+        height, width = image.shape[:2]
+        tdx = width / 2
+        tdy = height / 2
+    # PROJECT 3D TO 2D XY plane (Z = 0)
+    # X-Axis pointing to right. drawn in red
+    x1 = size * (cos(yaw) * cos(roll)) + tdx
+    y1 = size * (cos(pitch) * sin(roll) + cos(roll) * sin(pitch) * sin(yaw)) + tdy
+    # Y-Axis | drawn in green
+    x2 = size * (-cos(yaw) * sin(roll)) + tdx
+    y2 = size * (cos(pitch) * cos(roll) - sin(pitch) * sin(yaw) * sin(roll)) + tdy
+    # Z-Axis (out of the screen) drawn in blue
+    x3 = size * (sin(yaw)) + tdx
+    y3 = size * (-cos(yaw) * sin(pitch)) + tdy
+    z3 = size * (cos(pitch) * cos(yaw)) + tdy
+    if image is not None:
+        cv2.line(image, (int(tdx), int(tdy)), (int(x1), int(y1)), (0, 0, 255), 2)
+        cv2.line(image, (int(tdx), int(tdy)), (int(x2), int(y2)), (0, 255, 0), 2)
+        cv2.line(image, (int(tdx), int(tdy)), (int(x3), int(y3)), (255, 0, 0), 2)
+    return image

utils/labels.py ADDED Viewed

	@@ -0,0 +1,333 @@

+coco_category_index = {
+    1: {'id': 1, 'name': 'person'},
+    2: {'id': 2, 'name': 'bicycle'},
+    3: {'id': 3, 'name': 'car'},
+    4: {'id': 4, 'name': 'motorcycle'},
+    5: {'id': 5, 'name': 'airplane'},
+    6: {'id': 6, 'name': 'bus'},
+    7: {'id': 7, 'name': 'train'},
+    8: {'id': 8, 'name': 'truck'},
+    9: {'id': 9, 'name': 'boat'},
+    10: {'id': 10, 'name': 'traffic light'},
+    11: {'id': 11, 'name': 'fire hydrant'},
+    13: {'id': 13, 'name': 'stop sign'},
+    14: {'id': 14, 'name': 'parking meter'},
+    15: {'id': 15, 'name': 'bench'},
+    16: {'id': 16, 'name': 'bird'},
+    17: {'id': 17, 'name': 'cat'},
+    18: {'id': 18, 'name': 'dog'},
+    19: {'id': 19, 'name': 'horse'},
+    20: {'id': 20, 'name': 'sheep'},
+    21: {'id': 21, 'name': 'cow'},
+    22: {'id': 22, 'name': 'elephant'},
+    23: {'id': 23, 'name': 'bear'},
+    24: {'id': 24, 'name': 'zebra'},
+    25: {'id': 25, 'name': 'giraffe'},
+    27: {'id': 27, 'name': 'backpack'},
+    28: {'id': 28, 'name': 'umbrella'},
+    31: {'id': 31, 'name': 'handbag'},
+    32: {'id': 32, 'name': 'tie'},
+    33: {'id': 33, 'name': 'suitcase'},
+    34: {'id': 34, 'name': 'frisbee'},
+    35: {'id': 35, 'name': 'skis'},
+    36: {'id': 36, 'name': 'snowboard'},
+    37: {'id': 37, 'name': 'sports ball'},
+    38: {'id': 38, 'name': 'kite'},
+    39: {'id': 39, 'name': 'baseball bat'},
+    40: {'id': 40, 'name': 'baseball glove'},
+    41: {'id': 41, 'name': 'skateboard'},
+    42: {'id': 42, 'name': 'surfboard'},
+    43: {'id': 43, 'name': 'tennis racket'},
+    44: {'id': 44, 'name': 'bottle'},
+    46: {'id': 46, 'name': 'wine glass'},
+    47: {'id': 47, 'name': 'cup'},
+    48: {'id': 48, 'name': 'fork'},
+    49: {'id': 49, 'name': 'knife'},
+    50: {'id': 50, 'name': 'spoon'},
+    51: {'id': 51, 'name': 'bowl'},
+    52: {'id': 52, 'name': 'banana'},
+    53: {'id': 53, 'name': 'apple'},
+    54: {'id': 54, 'name': 'sandwich'},
+    55: {'id': 55, 'name': 'orange'},
+    56: {'id': 56, 'name': 'broccoli'},
+    57: {'id': 57, 'name': 'carrot'},
+    58: {'id': 58, 'name': 'hot dog'},
+    59: {'id': 59, 'name': 'pizza'},
+    60: {'id': 60, 'name': 'donut'},
+    61: {'id': 61, 'name': 'cake'},
+    62: {'id': 62, 'name': 'chair'},
+    63: {'id': 63, 'name': 'couch'},
+    64: {'id': 64, 'name': 'potted plant'},
+    65: {'id': 65, 'name': 'bed'},
+    67: {'id': 67, 'name': 'dining table'},
+    70: {'id': 70, 'name': 'toilet'},
+    72: {'id': 72, 'name': 'tv'},
+    73: {'id': 73, 'name': 'laptop'},
+    74: {'id': 74, 'name': 'mouse'},
+    75: {'id': 75, 'name': 'remote'},
+    76: {'id': 76, 'name': 'keyboard'},
+    77: {'id': 77, 'name': 'cell phone'},
+    78: {'id': 78, 'name': 'microwave'},
+    79: {'id': 79, 'name': 'oven'},
+    80: {'id': 80, 'name': 'toaster'},
+    81: {'id': 81, 'name': 'sink'},
+    82: {'id': 82, 'name': 'refrigerator'},
+    84: {'id': 84, 'name': 'book'},
+    85: {'id': 85, 'name': 'clock'},
+    86: {'id': 86, 'name': 'vase'},
+    87: {'id': 87, 'name': 'scissors'},
+    88: {'id': 88, 'name': 'teddy bear'},
+    89: {'id': 89, 'name': 'hair drier'},
+    90: {'id': 90, 'name': 'toothbrush'},
+}
+rgb_colors = {
+    1: (240, 248, 255),
+    2: (250, 235, 215),
+    3: (0, 255, 255),
+    4: (127, 255, 212),
+    5: (240, 255, 255),
+    6: (245, 245, 220),
+    7: (255, 228, 196),
+    8: (255, 255, 255),
+    9: (255, 235, 205),
+    10: (0, 0, 255),
+    11: (138, 43, 226),
+    12: (165, 42, 42),
+    13: (222, 184, 135),
+    14: (95, 158, 160),
+    15: (127, 255, 0),
+    16: (210, 105, 30),
+    17: (255, 127, 80),
+    18: (100, 149, 237),
+    19: (255, 248, 220),
+    20: (220, 20, 60),
+    21: (0, 255, 255),
+    22: (0, 0, 139),
+    23: (0, 139, 139),
+    24: (184, 134, 11),
+    25: (169, 169, 169),
+    26: (0, 100, 0),
+    27: (169, 169, 169),
+    28: (189, 183, 107),
+    29: (139, 0, 139),
+    30: (85, 107, 47),
+    31: (255, 140, 0),
+    32: (153, 50, 204),
+    33: (139, 0, 0),
+    34: (233, 150, 122),
+    35: (143, 188, 143),
+    36: (72, 61, 139),
+    37: (47, 79, 79),
+    38: (47, 79, 79),
+    39: (0, 206, 209),
+    40: (148, 0, 211),
+    41: (255, 20, 147),
+    42: (0, 191, 255),
+    43: (105, 105, 105),
+    44: (105, 105, 105),
+    45: (30, 144, 255),
+    46: (178, 34, 34),
+    47: (255, 250, 240),
+    48: (34, 139, 34),
+    49: (255, 0, 255),
+    50: (220, 220, 220),
+    51: (248, 248, 255),
+    52: (255, 215, 0),
+    53: (218, 165, 32),
+    54: (128, 128, 128),
+    55: (0, 128, 0),
+    56: (173, 255, 47),
+    57: (128, 128, 128),
+    58: (240, 255, 240),
+    59: (255, 105, 180),
+    60: (205, 92, 92),
+    61: (75, 0, 130),
+    62: (255, 0, 122),
+    63: (240, 230, 140),
+    64: (230, 230, 250),
+    65: (255, 240, 245),
+    66: (124, 252, 0),
+    67: (255, 250, 205),
+    68: (173, 216, 230),
+    69: (240, 128, 128),
+    70: (224, 255, 255),
+    71: (250, 250, 210),
+    72: (211, 211, 211),
+    73: (144, 238, 144),
+    74: (211, 211, 211),
+    75: (255, 182, 193),
+    76: (255, 160, 122),
+    77: (32, 178, 170),
+    78: (135, 206, 250),
+    79: (119, 136, 153),
+    80: (119, 136, 153),
+    81: (176, 196, 222),
+    82: (255, 255, 224),
+    83: (0, 255, 0),
+    84: (50, 205, 50),
+    85: (250, 240, 230),
+    86: (255, 0, 255),
+    87: (128, 0, 0),
+    88: (102, 205, 170),
+    89: (0, 0, 205),
+    90: (186, 85, 211),
+}
+color_pose = {
+    "purple": (255, 0, 100),
+    "light_pink": (80, 0, 255),
+    "dark_pink": (220, 0, 255),
+    "light_orange": (0, 80, 255),
+    "dark_orange": (255, 220, 0.),
+    "yellow": (0, 220, 255),
+    "blue": (255, 0, 0),
+    "green": (0,255,0),
+}
+color_pose_normalized = {
+    "purple": (100/255., 0/255., 255/255.),
+    "light_pink": (255/255., 0/255., 80/255.),
+    "dark_pink": (255/255., 0/255., 220/255.),
+    "light_orange": (255/255., 80/255., 0/255.),
+    "dark_orange": (255/255., 220/255., 0/255.),
+    "blue": (0/255., 0/255., 255/255.)
+}
+pose_id_part = {
+    0: "Nose",# purple
+    1: "LEye",#light_pink
+    2: "REye",#dark_pink
+    3: "LEar",#light_orange
+    4: "REar",#yellow
+    5: "LShoulder",
+    6: "RShoulder",
+    7: "LElbow",
+    8: "RElbow",
+    9: "LWrist",
+    10: "RWrist",
+    11: "LHip",
+    12: "RHip",
+    13: "LKnee",
+    14: "RKnee",
+    15: "LAnkle",
+    16: "RAnkle"
+}
+rev_pose_id_part = {value: key for key, value in pose_id_part.items()}
+pose_id_part_openpose = {
+    0:  "Nose",
+    1:  "Neck",
+    2:  "RShoulder",
+    3:  "RElbow",
+    4:  "RWrist",
+    5:  "LShoulder",
+    6:  "LElbow",
+    7:  "LWrist",
+    8:  "MidHip",
+    9:  "RHip",
+    10: "RKnee",
+    11: "RAnkle",
+    12: "LHip",
+    13: "LKnee",
+    14: "LAnkle",
+    15: "REye",
+    16: "LEye",
+    17: "REar",
+    18: "LEar",
+    19: "LBigToe",
+    20: "LSmallToe",
+    21: "LHeel",
+    22: "RBigToe",
+    23: "RSmallToe",
+    24: "RHeel",
+    25: "Background"
+}
+pose_id_part_zedcam = {
+    0: "Nose",
+    1: "Neck",
+    2: "RShoulder",
+    3: "RElbow",
+    4: "RWrist",
+    5: "LShoulder",
+    6: "LElbow",
+    7: "LWrist",
+    8: "RHip",
+    9: "RKnee",
+    10: "RAnkle",
+    11: "LHip",
+    12: "LKnee",
+    13: "LAnkle",
+    14: "REye",
+    15: "LEye",
+    16: "REar",
+    17: "LEar",
+}
+pose_id_part_centernet = {
+    0:  "Nose",
+    1:  "Neck",
+    2:  "RShoulder",
+    3:  "RElbow",
+    4:  "RWrist",
+    5:  "LShoulder",
+    6:  "LElbow",
+    7:  "LWrist",
+    8:  "MidHip",
+    9:  "RHip",
+    10: "RKnee",
+    11: "RAnkle",
+    12: "LHip",
+    13: "LKnee",
+    14: "LAnkle",
+    15: "REye",
+    16: "LEye",
+    17: "REar",
+    18: "LEar",
+    19: "LBigToe",
+    20: "LSmallToe",
+    21: "LHeel",
+    22: "RBigToe",
+    23: "RSmallToe",
+    24: "RHeel",
+    25: "Background"
+}
+rev_pose_id_part_openpose = {value: key for key, value in pose_id_part_openpose.items()}
+face_category_index = {
+    1: {'id': 1, 'name': 'Face'},
+}
+tracking_colors = {
+    0: (255, 0, 0),
+    1: (0, 255, 0),
+    2: (0, 0, 255),
+    3: (255, 0, 255),
+    4: (255, 255, 0),
+    5: (0, 255, 255),
+    6: (255, 255, 255),
+    7: (0, 0, 0),
+    8: (128, 128, 128),
+    9: (128, 0, 0),
+    10: (0, 128, 0),
+    11: (0, 0, 128),
+    12: (128, 128, 0),
+    13: (128, 0, 128),
+    14: (0, 128, 128),
+}
+body_parts = [(5, 6), (5, 7), (6, 8), (7, 9), (8, 10), (11, 12), (5, 11), (6, 12), (11, 13), (12, 14), (13, 15), (14, 16)]
+body_parts_openpose = [(5, 2), (5, 6), (2, 3), (6, 7), (3, 4), (12, 9), (5, 12), (2, 9), (12, 13), (9, 10), (13, 14),
+                       (10, 11)]
+body_parts_zedcam = [(5, 2), (5, 6), (2, 3), (6, 7), (3, 4), (11, 8), (5, 11), (2, 8), (11, 12), (8, 9), (12, 13),
+                       (9, 10)]
+face_points = [0, 1, 2, 3, 4]
+face_points_openpose = [0, 16, 15, 18, 17]
+face_points_zedcam = [0, 14, 15, 16, 17]

utils/my_utils.py ADDED Viewed

	@@ -0,0 +1,1375 @@

+import numpy as np
+from scipy.spatial import distance as dist
+from utils.labels import pose_id_part, pose_id_part_openpose, rev_pose_id_part_openpose, rev_pose_id_part
+import cv2
+import os
+import json
+def rescale_bb(boxes, pad, im_width, im_height):
+    """
+    Modify in place the bounding box coordinates (percentage) to the new image width and height
+    Args:
+        :boxes (numpy.ndarray): Array of bounding box coordinates expressed in percentage [y_min, x_min, y_max, x_max]
+        :pad (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
+                        the second element represents the bottom padding (applied by resize_preserving_ar() function) and
+                        the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
+                        the coordinates changes)
+        :im_width (int): The new image width
+        :im_height (int): The new image height
+    Returns:
+    """
+    right_padding = pad[0]
+    bottom_padding = pad[1]
+    if bottom_padding != 0:
+        for box in boxes:
+            y_min, y_max = box[0] * im_height, box[2] * im_height  # to pixels
+            box[0], box[2] = y_min / (im_height - pad[1]), y_max / (im_height - pad[1])  # back to percentage
+    if right_padding != 0:
+        for box in boxes:
+            x_min, x_max = box[1] * im_width, box[3] * im_width  # to pixels
+            box[1], box[3] = x_min / (im_width - pad[0]), x_max / (im_width - pad[0])  # back to percentage
+def rescale_key_points(key_points, pad, im_width, im_height):
+    """
+    Modify in place the bounding box coordinates (percentage) to the new image width and height
+    Args:
+        :key_points (numpy.ndarray): Array of bounding box coordinates expressed in percentage [y_min, x_min, y_max, x_max]
+        :pad (tuple): The first element represents the right padding (applied by resize_preserving_ar() function);
+                        the second element represents the bottom padding (applied by resize_preserving_ar() function) and
+                        the third element is a tuple that is the shape of the image after resizing without the padding (this is useful for
+                        the coordinates changes)
+        :im_width (int): The new image width
+        :im_height (int): The new image height
+    Returns:
+    """
+    right_padding = pad[0]
+    bottom_padding = pad[1]
+    if bottom_padding != 0:
+        for aux in key_points:
+            for point in aux:  # x 1 y 0
+                y = point[0] * im_height
+                point[0] = y / (im_height - pad[1])
+    if right_padding != 0:
+        for aux in key_points:
+            for point in aux:
+                x = point[1] * im_width
+                point[1] = x / (im_width - pad[0])
+def change_coordinates_aspect_ratio(aux_key_points_array, img_person, img_person_resized):
+    """
+    Args:
+        :
+    Returns:
+        :
+    """
+    aux_key_points_array_ratio = []
+    ratio_h, ratio_w = img_person.shape[0] / (img_person_resized.shape[1]), img_person.shape[1] / (img_person_resized.shape[2])  # shape 0 batch 1
+    for elem in aux_key_points_array:
+        aux = np.zeros(3)
+        aux[0] = int((elem[0]) * ratio_h)
+        aux[1] = int(elem[1] * ratio_h)
+        aux[2] = int(elem[2])
+        aux_key_points_array_ratio.append(aux)
+    aux_key_points_array_ratio = np.array(aux_key_points_array_ratio, dtype=int)
+    return aux_key_points_array_ratio
+def parse_output_pose(heatmaps, offsets, threshold):
+    """
+    Parse the output pose (auxiliary function for tflite models)
+    Args:
+        :
+    Returns:
+        :
+    """
+    #
+    # heatmaps: 9x9x17 probability of appearance of each keypoint in the particular part of the image (9,9) -> used to locate position of the joints
+    # offsets: 9x9x34 used for calculation of the keypoint's position (first 17 x coords, the second 17 y coords)
+    #
+    joint_num = heatmaps.shape[-1]
+    pose_kps = np.zeros((joint_num, 3), np.uint32)
+    for i in range(heatmaps.shape[-1]):
+        joint_heatmap = heatmaps[..., i]
+        max_val_pos = np.squeeze(np.argwhere(joint_heatmap == np.max(joint_heatmap)))
+        remap_pos = np.array(max_val_pos / 8 * 257, dtype=np.int32)
+        pose_kps[i, 0] = int(remap_pos[0] + offsets[max_val_pos[0], max_val_pos[1], i])
+        pose_kps[i, 1] = int(remap_pos[1] + offsets[max_val_pos[0], max_val_pos[1], i + joint_num])
+        max_prob = np.max(joint_heatmap)
+        if max_prob > threshold:
+            if pose_kps[i, 0] < 257 and pose_kps[i, 1] < 257:
+                pose_kps[i, 2] = 1
+    return pose_kps
+def retrieve_xyz_from_detection(points_list, point_cloud_img):
+    """
+    Retrieve the xyz of the list of points passed as input (if we have the point cloud of the image)
+    Args:
+        :points_list (list): list of points for which we want to retrieve xyz information
+        :point_cloud_img (numpy.ndarray): numpy array containing XYZRGBA information of the image
+    Returns:
+        :xyz (list): list of lists of 3D points with XYZ information (left camera origin (0,0,0))
+    """
+    xyz = [[point_cloud_img[:, :, 0][point[1], point[0]], point_cloud_img[:, :, 1][point[1], point[0]], point_cloud_img[:, :, 2][point[1], point[0]]]
+           for point in points_list]
+    return xyz
+def retrieve_xyz_pose_points(point_cloud_image, key_points_score, key_points):
+    """Retrieve the key points from the point cloud to get the XYZ position in the 3D space
+    Args:
+        :point_cloud_image (numpy.ndarray):
+        :key_points_score (list):
+        :key_points (list):
+    Returns:
+        :xyz_pose: a list of lists representing the XYZ 3D coordinates of each key point (j is the index number of the id pose)
+    """
+    xyz_pose = []
+    for i in range(len(key_points_score)):
+        xyz_pose_aux = []
+        for j in range(len(key_points_score[i])):
+            # if key_points_score[i][j] > threshold:# and j < 5:
+            x, y = int(key_points[i][j][0] * point_cloud_image.shape[0]) - 1, int(key_points[i][j][1] * point_cloud_image.shape[1]) - 1
+            xyz_pose_aux.append([point_cloud_image[x, y, 0], point_cloud_image[x, y, 1], point_cloud_image[x, y, 2], key_points_score[i][j]])
+        xyz_pose.append(xyz_pose_aux)
+    return xyz_pose
+def compute_distance(points_list, min_distance=1.5):
+    """
+    Compute the distance between each point and find if there are points that are closer to each other that do not respect a certain distance
+    expressed in meter.
+    Args:
+        :points_list (list): list of points expressed in xyz 3D coordinates (meters)
+        :min_distance (float): minimum threshold for distances (if the l2 distance between two objects is lower than this value it is considered a violation)
+            (default is 1.5)
+    Returns:
+        :distance_matrix: matrix containing the distances between each points (diagonal 0)
+        :violate: set of points that violate the minimum distance threshold
+        :couple_points: list of lists of couple points that violate the min_distance threshold (to keep track of each couple)
+    """
+    if points_list is None or len(points_list) == 1 or len(points_list) == 0:
+        return None, None, None
+    else:  # if there are more than two points
+        violate = set()
+        couple_points = []
+        aux = np.array(points_list)
+        distance_matrix = dist.cdist(aux, aux, 'euclidean')
+        for i in range(0, distance_matrix.shape[0]):  # loop over the upper triangular of the distance matrix
+            for j in range(i + 1, distance_matrix.shape[1]):
+                if distance_matrix[i, j] < min_distance:
+                    # print("Distance between {} and {} is {:.2f} meters".format(i, j, distance_matrix[i, j]))
+                    violate.add(i)
+                    violate.add(j)
+                    couple_points.append((i, j))
+        return distance_matrix, violate, couple_points
+def initialize_video_recorder(output_path, output_depth_path, fps, shape):
+    """Initialize OpenCV video recorders that will be used to write each image/frame to a single video
+    Args:
+        :output (str): The file location where the recorded video will be saved
+        :output_depth (str): The file location where the recorded video with depth information will be saved
+        :fps (int): The frame per seconds of the output videos
+        :shape (tuple): The dimension of the output video (width, height)
+    Returns:
+        :writer (cv2.VideoWriter): The video writer used to save the video
+        :writer_depth (cv2.VideoWriter): The video writer used to save the video with depth information
+    """
+    if not os.path.isdir(os.path.split(output_path)[0]):
+        logger.error("Invalid path for the video writer; folder does not exist")
+        exit(1)
+    fourcc = cv2.VideoWriter_fourcc(*"MJPG")
+    writer = cv2.VideoWriter(output_path, fourcc, fps, shape, True)
+    writer_depth = None
+    if output_depth_path:
+        if not os.path.isdir(os.path.split(output_depth_path)[0]):
+            logger.error("Invalid path for the depth video writer; folder does not exist")
+            exit(1)
+        writer_depth = cv2.VideoWriter(output_depth_path, fourcc, fps, shape, True)
+    return writer, writer_depth
+def delete_items_from_array_aux(arr, i):
+    """
+    Auxiliary function that delete the item at a certain index from a numpy array
+    Args:
+        :arr (numpy.ndarray): Array of array where each element correspond to the four coordinates of bounding box expressed in percentage
+        :i (int): Index of the element to be deleted
+    Returns:
+        :arr_ret: the array without the element at index i
+    """
+    aux = arr.tolist()
+    aux.pop(i)
+    arr_ret = np.array(aux)
+    return arr_ret
+def fit_plane_least_square(xyz):
+    # find a plane that best fit xyz points using least squares
+    (rows, cols) = xyz.shape
+    g = np.ones((rows, 3))
+    g[:, 0] = xyz[:, 0]  # X
+    g[:, 1] = xyz[:, 1]  # Y
+    z = xyz[:, 2]
+    (a, b, c), _, rank, s = np.linalg.lstsq(g, z, rcond=None)
+    normal = (a, b, -1)
+    nn = np.linalg.norm(normal)
+    normal = normal / nn
+    point = np.array([0.0, 0.0, c])
+    d = -point.dot(normal)
+    return d, normal, point
+#
+# def plot_plane(data, normal, d):
+#     from mpl_toolkits.mplot3d import Axes3D
+#     import matplotlib.pyplot as plt
+#
+#     fig = plt.figure()
+#     ax = fig.gca(projection='3d')
+#
+#     # plot fitted plane
+#     maxx = np.max(data[:, 0])
+#     maxy = np.max(data[:, 1])
+#     minx = np.min(data[:, 0])
+#     miny = np.min(data[:, 1])
+#
+#     # compute needed points for plane plotting
+#     xx, yy = np.meshgrid([minx - 10, maxx + 10], [miny - 10, maxy + 10])
+#     z = (-normal[0] * xx - normal[1] * yy - d) * 1. / normal[2]
+#
+#     # plot plane
+#     ax.plot_surface(xx, yy, z, alpha=0.2)
+#
+#     ax.set_xlabel('x')
+#     ax.set_ylabel('y')
+#     ax.set_zlabel('z')
+#     plt.show()
+#
+#     return
+def shape_to_np(shape, dtype="int"):
+    """
+    Function used for the dlib facial detector; it determine the facial landmarks for the face region, then convert the facial landmark
+    (x, y)-coordinates to a NumPy array
+    Args:
+        :shape ():
+        :dtype ():
+            (Default is "int")
+    Returns:
+        :coordinates (list): list of x, y coordinates
+    """
+    # initialize the list of (x, y)-coordinates
+    coordinates = np.zeros((68, 2), dtype=dtype)
+    # loop over the 68 facial landmarks and convert them to a 2-tuple of (x, y)-coordinates
+    for i in range(0, 68):
+        coordinates[i] = (shape.part(i).x, shape.part(i).y)
+    # return the list of (x, y)-coordinates
+    return coordinates
+def rect_to_bb(rect):
+    """
+    Function used for the dlib facial detector; it converts dlib's rectangle to a tuple (x, y, w, h) where x and y represent xmin and ymin
+    coordinates while w and h represent the width and the height
+    Args:
+        :rect (dlib.rectangle): dlib rectangle object that represents the region of the image where a face is detected
+    Returns:
+        :res (tuple): tuple that represents the region of the image where a face is detected in the form x, y, w, h
+    """
+    # take a bounding predicted by dlib and convert it to the format (x, y, w, h) as we would normally do with OpenCV
+    x = rect.left()
+    y = rect.top()
+    w = rect.right() - x
+    h = rect.bottom() - y
+    # return a tuple of (x, y, w, h)
+    res = x, y, w, h
+    return res
+def enlarge_bb(y_min, x_min, y_max, x_max, im_width, im_height):
+    """
+    Enlarge the bounding box to include more background margin (used for face detection)
+    Args:
+        :y_min (int): the top y coordinate of the bounding box
+        :x_min (int): the left x coordinate of the bounding box
+        :y_max (int): the bottom y coordinate of the bounding box
+        :x_max (int): the right x coordinate of the bounding box
+        :im_width (int): The width of the image
+        :im_height (int): The height of the image
+    Returns:
+        :y_min (int): the top y coordinate of the bounding box after enlarging
+        :x_min (int): the left x coordinate of the bounding box after enlarging
+        :y_max (int): the bottom y coordinate of the bounding box after enlarging
+        :x_max (int): the right x coordinate of the bounding box after enlarging
+    """
+    y_min = int(max(0, y_min - abs(y_min - y_max) / 10))
+    y_max = int(min(im_height, y_max + abs(y_min - y_max) / 10))
+    x_min = int(max(0, x_min - abs(x_min - x_max) / 5))
+    x_max = int(min(im_width, x_max + abs(x_min - x_max) / 4))  # 5
+    x_max = int(min(x_max, im_width))
+    return y_min, x_min, y_max, x_max
+def linear_assignment(cost_matrix):
+    try:
+        import lap
+        _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
+        return np.array([[y[i], i] for i in x if i >= 0])
+    except ImportError:
+        from scipy.optimize import linear_sum_assignment
+        x, y = linear_sum_assignment(cost_matrix)
+        return np.array(list(zip(x, y)))
+def iou_batch(bb_test, bb_gt):
+    """
+    From SORT: Computes IUO between two bboxes in the form [x1,y1,x2,y2]
+    Args:
+        :bb_test ():
+        :bb_gt ():
+    Returns:
+    """
+    # print(bb_test, bb_gt)
+    bb_gt = np.expand_dims(bb_gt, 0)
+    bb_test = np.expand_dims(bb_test, 1)
+    xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0])
+    yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1])
+    xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2])
+    yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3])
+    w = np.maximum(0., xx2 - xx1)
+    h = np.maximum(0., yy2 - yy1)
+    wh = w * h
+    o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) + (bb_gt[..., 2] - bb_gt[..., 0]) * (
+            bb_gt[..., 3] - bb_gt[..., 1]) - wh)
+    return o
+def convert_bbox_to_z(bbox):
+    """
+    Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
+    the aspect ratio
+    Args:
+        :bbox ():
+    Returns:
+    """
+    w = bbox[2] - bbox[0]
+    h = bbox[3] - bbox[1]
+    x = bbox[0] + w / 2.
+    y = bbox[1] + h / 2.
+    s = w * h  # scale is just area
+    r = w / float(h) if float(h) != 0 else w
+    return np.array([x, y, s, r]).reshape((4, 1))
+def convert_x_to_bbox(x, score=None):
+    """
+    Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
+    [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
+    Args:
+        :x ():
+        :score ():
+            (Default is None)
+    Returns:
+    """
+    w = np.sqrt(x[2] * x[3])
+    h = x[2] / w
+    if score is None:
+        return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2.]).reshape((1, 4))
+    else:
+        return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score]).reshape((1, 5))
+def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
+    """
+    Assigns detections to tracked object (both represented as bounding boxes)
+    Returns 3 lists of matches, unmatched_detections and unmatched_trackers
+    Args:
+        :detections ():
+        :trackers ():
+        :iou_threshold ():
+            (Default is 0.3)
+    Returns:
+    """
+    if len(trackers) == 0:
+        return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
+    iou_matrix = iou_batch(detections, trackers)
+    # print("IOU MATRIX: ", iou_matrix)
+    if min(iou_matrix.shape) > 0:
+        a = (iou_matrix > iou_threshold).astype(np.int32)
+        if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+            matched_indices = np.stack(np.where(a), axis=1)
+        else:
+            matched_indices = linear_assignment(-iou_matrix)
+    else:
+        matched_indices = np.empty(shape=(0, 2))
+    unmatched_detections = []
+    for d, det in enumerate(detections):
+        if d not in matched_indices[:, 0]:
+            unmatched_detections.append(d)
+        unmatched_trackers = []
+    for t, trk in enumerate(trackers):
+        if t not in matched_indices[:, 1]:
+            unmatched_trackers.append(t)
+    # filter out matched with low IOU
+    matches = []
+    for m in matched_indices:
+        if iou_matrix[m[0], m[1]] < iou_threshold:
+            unmatched_detections.append(m[0])
+            unmatched_trackers.append(m[1])
+        else:
+            matches.append(m.reshape(1, 2))
+    if len(matches) == 0:
+        matches = np.empty((0, 2), dtype=int)
+    else:
+        matches = np.concatenate(matches, axis=0)
+    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+def find_face_from_key_points(key_points, bboxes, image, person=None, openpose=False, gazefollow=True):
+    """
+    Args:
+        key_points:
+        bboxes:
+        image:
+        person:
+        openpose:
+        gazefollow:
+    Returns:
+    """
+    im_width, im_height = image.shape[1], image.shape[0]
+    # key_points, bboxes = person.get_key_points()[-1], person.get_bboxes()[-1]
+    # print("PERSON ID:", person.get_id())
+    # 0 nose, 1/2 left/right eye, 3/4 left/right ear
+    # 5/6	leftShoulder/rightShoulder
+    # 7/8	leftElbow/rightElbow
+    # 9/10	leftWrist/rightWrist
+    # 11/12	leftHip/rightHip
+    # 13/14	leftKnee/rightKnee
+    # 15/16	leftAnkle/rightAnkle
+    # print(key_points)
+    face_points = key_points[:7]
+    if openpose:
+        face_points = []
+        for point in key_points[:7]:
+            # print(point[2], type(point[2]))
+            if point[2] > 0.0:
+                face_points.append(point)
+    # print("face1", face_points)
+    if len(face_points) == 0:
+        return None, []
+    # print("bboxe", bboxes, face_points)
+    if not gazefollow:
+        ct = compute_centroid(face_points)
+        x_min, y_min = ct[0] - 10, ct[1] - 15
+        x_max, y_max = ct[0] + 10, ct[1] + 10
+        y_min_bbox = y_min
+    elif gazefollow:
+        # [l_shoulder, r_shoulder] = key_points[5:]
+        # print(l_shoulder, r_shoulder)
+        print("FACE", face_points)
+        if len(face_points) == 1:
+            return None, []
+        x_min, y_min, _ = np.amin(face_points, axis=0)
+        x_max, y_max, _ = np.amax(face_points, axis=0)
+        # aux_diff =
+        # print("X: ", aux_diff)
+        # if aux_diff < 20:
+        #     x_max += 20
+        #     x_min -= 20
+        aux_diff = y_max - y_min
+        print("y: ", aux_diff)
+        if aux_diff < 50:  # rapporto xmax -xmin o altro
+            y_max += (x_max - x_min) / 1.4
+            y_min -= (x_max - x_min) / 1.2
+        # x_min -= 10
+        # x_max += 10
+        y_min_bbox = int(y_min)  # int(bboxes[1]) if bboxes is not None else y_min - (x_max-x_min)
+        # if bboxes is None:
+        #     y_max = y_max + (x_max-x_min)
+    y_min, x_min, y_max, x_max = enlarge_bb(y_min_bbox, x_min, y_max, x_max, im_width, im_height)
+    # print(y_min, x_min, y_max, x_max, y_max - y_min, x_max - x_min)
+    # if -1 < y_max - y_min < 5 and -1 < x_max - x_min < 5:  # due punti uguali
+    #     # print("AAAAA")
+    #     return None, []
+    face_image = image[y_min:y_max, x_min:x_max]
+    if person is not None:
+        # person.print_()
+        person.update_faces(face_image)
+        person.update_faces_coordinates([y_min, x_min, y_max, x_max])
+        # person.update_faces_key_points(face_points)
+        # person.print_()
+        return None
+    else:
+        return face_image, [y_min, x_min, y_max, x_max]
+def compute_interaction_cosine(head_position, target_position, gaze_direction):
+    """
+    Computes the interaction between two people using the angle of view.
+    The interaction in measured as the cosine of the angle formed by the line from person A to B and the gaze direction of person A.
+    Args:
+        :head_position (list): list of pixel coordinates [x, y] that represents the position of the head of person A
+        :target_position (list): list of pixel coordinates [x, y] that represents the position of head of person B
+        :gaze_direction (list): list that represents the gaze direction of the head of person A in the form [gx, gy]
+    Returns:
+        :val (float): value that describe the quantity of interaction
+    """
+    if head_position == target_position:
+        return 0  # or -1
+    else:
+        # direction from observer to target
+        direction = np.arctan2((target_position[1] - head_position[1]), (target_position[0] - head_position[0]))
+        direction_gaze = np.arctan2(gaze_direction[1], gaze_direction[0])
+        difference = direction - direction_gaze
+        # difference of the line joining observer -> target with the gazing direction,
+        val = np.cos(difference)
+        if val < 0:
+            return 0
+        else:
+            return val
+def compute_attention_from_vectors(list_objects):
+    """
+    Args:
+        :list_objects ():
+    Returns:
+    """
+    dict_person = dict()
+    id_list = []
+    for obj in list_objects:
+        if len(obj.get_key_points()) > 0:
+            # print("Object ID: ", obj.get_id(), "x: ", obj.get_poses_vector_norm()[-1][0], "y: ", obj.get_poses_vector_norm()[-1][1])
+            id_list.append(obj.get_id())
+            # print("kpts: ", obj.get_key_points()[-1])
+            aux = [obj.get_key_points()[-1][j][:2] for j in [0, 2, 1, 4, 3]]
+            dict_person[obj.get_id()] = [obj.get_poses_vector_norm()[-1], np.mean(aux, axis=0).tolist()]
+    attention_matrix = np.zeros((len(dict_person), len(dict_person)), dtype=np.float32)
+    for i in range(attention_matrix.shape[0]):
+        for j in range(attention_matrix.shape[1]):
+            if i == j:
+                continue
+            attention_matrix[i][j] = compute_interaction_cosine(dict_person[i][1], dict_person[j][1], dict_person[i][0])
+    return attention_matrix.tolist(), id_list
+def compute_attention_ypr(list_objects):
+    """
+    Args:
+        :list_objects ():
+    Returns:
+        :
+    """
+    for obj in list_objects:
+        if len(obj.get_key_points()) > 0:
+            print("Object ID: ", obj.get_id(), "yaw: ", obj.get_poses_ypr()[-1][0], "pitch: ", obj.get_poses_ypr()[-1][1], "roll: ",
+                  obj.get_poses_ypr()[-1][2])
+def save_key_points_to_json(ids, kpts, path_json, openpose=False):
+    """
+    Save key points to .json format according to Openpose output format
+    Args:
+        :kpts ():
+        :path_json ():
+    Returns:
+    """
+    # print(path_json)
+    dict_file = {"version": 1.3}
+    list_dict_person = []
+    for j in range(len(kpts)):
+        dict_person = {"person_id": [int(ids[j])],
+                       "face_keypoints_2d": [],
+                       "hand_left_keypoints_2d": [],
+                       "hand_right_keypoints_2d": [],
+                       "pose_keypoints_3d": [],
+                       "face_keypoints_3d": [],
+                       "hand_left_keypoints_3d": [],
+                       "hand_right_keypoints_3d": []}
+        kpts_openpose = np.zeros((25, 3))
+        for i, point in enumerate(kpts[j]):
+            if openpose:
+                idx_op = rev_pose_id_part_openpose[pose_id_part_openpose[i]]
+            else:
+                idx_op = rev_pose_id_part_openpose[pose_id_part[i]]
+                # print(idx_op, point[1], point[0], point[2])
+            kpts_openpose[idx_op] = [point[1], point[0], point[2]]  # x, y, conf
+        list_kpts_openpose = list(np.concatenate(kpts_openpose).ravel())
+        dict_person["pose_keypoints_2d"] = list_kpts_openpose
+        # print(dict_person)
+        list_dict_person.append(dict_person)
+    dict_file["people"] = list_dict_person
+    # Serializing json
+    json_object = json.dumps(dict_file, indent=4)
+    # Writing to sample.json
+    with open(path_json, "w") as outfile:
+        outfile.write(json_object)
+def json_to_poses(json_data):
+    """
+    Args:
+        :js_data ():
+    Returns:
+        :res ():
+    """
+    poses = []
+    confidences = []
+    ids = []
+    for arr in json_data["people"]:
+        ids.append(arr["person_id"])
+        confidences.append(arr["pose_keypoints_2d"][2::3])
+        aux = arr["pose_keypoints_2d"][2::3]
+        arr = np.delete(arr["pose_keypoints_2d"], slice(2, None, 3))
+        # print("B", list(zip(arr[::2], arr[1::2])))
+        poses.append(list(zip(arr[::2], arr[1::2], aux)))
+    return poses, confidences, ids
+def parse_json1(aux):
+    # print(aux['people'])
+    list_kpts = []
+    id_list = []
+    for person in aux['people']:
+        # print(len(person['pose_keypoints_2d']))
+        aux = person['pose_keypoints_2d']
+        aux_kpts = [[aux[i+1], aux[i], aux[i+2]] for i in range(0, 75, 3)]
+        # print(len(aux_kpts))
+        list_kpts.append(aux_kpts)
+        id_list.append(person['person_id'])
+    # print(list_kpts)
+    return list_kpts, id_list
+def load_poses_from_json1(json_filename):
+    """
+    Args:
+        :json_filename ():
+    Returns:
+        :poses, conf:
+    """
+    with open(json_filename) as data_file:
+        loaded = json.load(data_file)
+        zz = parse_json1(loaded)
+        return zz
+def load_poses_from_json(json_filename):
+    """
+    Args:
+        :json_filename ():
+    Returns:
+        :poses, conf:
+    """
+    with open(json_filename) as data_file:
+        loaded = json.load(data_file)
+        poses, conf, ids = json_to_poses(loaded)
+    if len(poses) < 1:  # != 1:
+        return None, None, None
+    else:
+        return poses, conf, ids
+def compute_head_features(img, pose, conf, open_pose=True):
+    """
+    Args:
+        img:
+        pose:
+        conf:
+        open_pose:
+    Returns:
+    """
+    joints = [0, 15, 16, 17, 18] if open_pose else [0, 2, 1, 4, 3]
+    n_joints_set = [pose[joint] for joint in joints if joint_set(pose[joint])]  # if open_pose else pose
+    if len(n_joints_set) < 1:
+        return None, None
+    centroid = compute_centroid(n_joints_set)
+    # for j in n_joints_set:
+    #     print(j, centroid)
+    max_dist = max([dist_2D([j[0], j[1]], centroid) for j in n_joints_set])
+    new_repr = [(np.array([pose[joint][0], pose[joint][1]]) - np.array(centroid)) for joint in joints] if open_pose else [
+        (np.array(pose[i]) - np.array(centroid)) for i in range(len(n_joints_set))]
+    result = []
+    for i in range(0, 5):
+        if joint_set(pose[joints[i]]):
+            if max_dist != 0.0:
+                result.append([new_repr[i][0] / max_dist, new_repr[i][1] / max_dist])
+            else:
+                result.append([new_repr[i][0], new_repr[i][1]])
+        else:
+            result.append([0, 0])
+    flat_list = [item for sublist in result for item in sublist]
+    conf_list = []
+    for j in joints:
+        conf_list.append(conf[j])
+    return flat_list, conf_list, centroid
+def compute_body_features(pose, conf):
+    """
+    Args:
+        pose:
+        conf:
+    Returns:
+    """
+    joints = [0, 15, 16, 17, 18]
+    alljoints = range(0, 25)
+    n_joints_set = [pose[joint] for joint in joints if joint_set(pose[joint])]
+    if len(n_joints_set) < 1:
+        return None, None
+    centroid = compute_centroid(n_joints_set)
+    n_joints_set = [pose[joint] for joint in alljoints if joint_set(pose[joint])]
+    max_dist = max([dist_2D(j, centroid) for j in n_joints_set])
+    new_repr = [(np.array(pose[joint]) - np.array(centroid)) for joint in alljoints]
+    result = []
+    for i in range(0, 25):
+        if joint_set(pose[i]):
+            result.append([new_repr[i][0] / max_dist, new_repr[i][1] / max_dist])
+        else:
+            result.append([0, 0])
+    flat_list = [item for sublist in result for item in sublist]
+    for j in alljoints:
+        flat_list.append(conf[j])
+    return flat_list, centroid
+def compute_centroid(points):
+    """
+    Args:
+        points:
+    Returns:
+    """
+    x, y = [], []
+    for point in points:
+        if len(point) == 3:
+            if point[2] > 0.0:
+                x.append(point[0])
+                y.append(point[1])
+        else:
+            x.append(point[0])
+            y.append(point[1])
+    # print(x, y)
+    if x == [] or y == []:
+        return [None, None]
+    mean_x = np.mean(x)
+    mean_y = np.mean(y)
+    return [mean_x, mean_y]
+def joint_set(p):
+    """
+    Args:
+        p:
+    Returns:
+    """
+    return p[0] != 0.0 or p[1] != 0.0
+def dist_2D(p1, p2):
+    """
+    Args:
+        p1:
+        p2:
+    Returns:
+    """
+    # print(p1)
+    # print(p2)
+    p1 = np.array(p1)
+    p2 = np.array(p2)
+    squared_dist = np.sum((p1 - p2) ** 2, axis=0)
+    return np.sqrt(squared_dist)
+def compute_head_centroid(pose):
+    """
+    Args:
+        pose:
+    Returns:
+    """
+    joints = [0, 15, 16, 17, 18]
+    n_joints_set = [pose[joint] for joint in joints if joint_set(pose[joint])]
+    # if len(n_joints_set) < 2:
+    #     return None
+    centroid = compute_centroid(n_joints_set)
+    return centroid
+def head_direction_to_json(path_json, norm_list, unc_list, ids_list, file_name):
+    dict_file = {}
+    list_dict_person = []
+    for k, i in enumerate(norm_list):
+        dict_person = {"id_person": [ids_list[k]],
+                       "norm_xy": [i[0][0].item(), i[0][1].item()],  # from numpy to native python type for json serilization
+                       "center_xy": [int(i[1][0]), int(i[1][1])],
+                       "uncertainty": [unc_list[k].item()]}
+        list_dict_person.append(dict_person)
+    dict_file["people"] = list_dict_person
+    json_object = json.dumps(dict_file, indent=4)
+    with open(path_json, "w") as outfile:
+        outfile.write(json_object)
+def ypr_to_json(path_json, yaw_list, pitch_list, roll_list, yaw_u_list, pitch_u_list, roll_u_list, ids_list, center_xy):
+    dict_file = {}
+    list_dict_person = []
+    for k in range(len(yaw_list)):
+        dict_person = {"id_person": [ids_list[k]],
+                       "yaw": [yaw_list[k].item()],
+                       "yaw_u": [yaw_u_list[k].item()],
+                       "pitch": [pitch_list[k].item()],
+                       "pitch_u": [pitch_u_list[k].item()],
+                       "roll": [roll_list[k].item()],
+                       "roll_u": [roll_u_list[k].item()],
+                       "center_xy": [int(center_xy[k][0]), int(center_xy[k][1])]}
+        list_dict_person.append(dict_person)
+    dict_file["people"] = list_dict_person
+    json_object = json.dumps(dict_file, indent=4)
+    with open(path_json, "w") as outfile:
+        outfile.write(json_object)
+    # exit()
+def save_keypoints_image(img, poses, suffix_, path_save=''):
+    """
+    Save the image with the key points drawn on it
+    Args:
+        img:
+        poses:
+        suffix_:
+    Returns:
+    """
+    aux = img.copy()
+    for point in poses:
+        for i, p in enumerate(point):
+            if i in [0, 15, 16, 17, 18]:
+                cv2.circle(aux, (int(p[0]), int(p[1])), 2, (0, 255, 0), 2)
+    cv2.imwrite(os.path.join(path_save, suffix_ + '.jpg'), aux)
+def unit_vector(vector):
+    """
+    Returns the unit vector of the vector.
+    Args:
+        vector:
+    Returns:
+    """
+    return vector / np.linalg.norm(vector)
+def angle_between(v1, v2):
+    """
+    Returns the angle in radians between vectors 'v1' and 'v2'::
+            angle_between((1, 0, 0), (0, 1, 0))
+            1.5707963267948966
+            angle_between((1, 0, 0), (1, 0, 0))
+            0.0
+            angle_between((1, 0, 0), (-1, 0, 0))
+            3.141592653589793
+    """
+    # if not unit vector
+    v1_u = unit_vector(tuple(v1))
+    v2_u = unit_vector(tuple(v2))
+    angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
+    return angle if angle < 1.80 else angle - 1.80
+def centroid_constraint(centroid, centroid_det, gazefollow=False):  # x y
+    """
+    Args:
+        centroid:
+        centroid_det:
+    Returns:
+    """
+    if centroid_det == [None, None]:
+        return False
+    if gazefollow == False:
+        if 0 < centroid_det[0] < 143 and 0 < centroid_det[1] < 24:  # centroid in the overprinted text of hour in the video
+            return False
+        if 0 < centroid_det[1] < 4:
+            return False
+        if centroid[0] - 3 < centroid_det[0] < centroid[0] + 3 and centroid[1] - 3 < centroid_det[1] < centroid[
+            1] + 3:  # detected centroid near the gt centroid
+            return True
+        else:
+            return False
+    else:
+        if int(centroid[0] - 30) < int(centroid_det[0]) < int(centroid[0] + 30) and int(centroid[1] - 30) < int(centroid_det[1]) < int(
+                centroid[1] + 30):  # detected centroid near the gt centroid
+            return True
+        else:
+            return False
+def initialize_video_reader(path_video):
+    """
+    Args:
+        path_video:
+    Returns:
+    """
+    cap = cv2.VideoCapture(path_video)
+    if cap is None or not cap.isOpened():
+        print('Warning: unable to open video source: ', path_video)
+        exit(-1)
+    return cap
+def distance_skeletons(kpts1, kpts2, dst_type):
+    """
+    Function to compute the distance between skeletons
+    #TO DO
+    Args:
+        kpts1:
+        kpts2:
+        dts_type:
+    Returns:
+    """
+    if len(kpts1) != len(kpts2):
+        print("Error: Different notation used for keypoints")
+        exit(-1)
+    print(len(kpts1), len(kpts2))
+    # to openpose notations
+    if len(kpts1) == len(kpts2) == 17:
+        kpts1, kpts2 = kpt_centernet_to_openpose(kpts1), kpt_centernet_to_openpose(kpts2)
+    print(len(kpts1), len(kpts2))
+    if len(kpts1) != 25 or len(kpts2) != 25:
+        print("Error")
+        exit(-1)
+    res_dist = 0
+    if dst_type == 'all_points':
+        for i, _ in enumerate(kpts1):
+            res_dist += dist_2D(kpts1[i][:2], kpts2[i][:2])
+        res_dist /= 25
+        return res_dist
+    elif dst_type == 'head_centroid':
+        top1_c, top2_c = compute_head_centroid(kpts1), compute_head_centroid(kpts2)
+        if top1_c == [None, None] or top2_c == [None, None]:
+            res_dist = 900
+        else:
+            res_dist = dist_2D(top1_c[:2], top2_c[:2])
+        return res_dist
+    elif dst_type == 'three_centroids':
+        #TO DO
+        # top1_c, top2_c = compute_centroid(kpts1[0, 15, 16, 17, 18]), compute_centroid(kpts2[0, 15, 16, 17, 18])
+        # mid1_c, mid2_c = compute_centroid(kpts1[2, 5, 9, 12]), compute_centroid(kpts2[2, 5, 9, 12])
+        # btm1_c, btm2_c = compute_centroid(kpts1[9, 12, 10, 13]), compute_centroid(kpts2[9, 12, 10, 13])
+        # res_dist = dist_2D(top1_c[:2], top2_c[:2]) + dist_2D(mid1_c[:2], mid2_c[:2]) + dist_2D(btm1_c[:2], btm2_c[:2])
+        # res_dist /= 3
+        # return res_dist
+        return None
+    elif dst_type == '':
+        print("dst_typ not valid")
+        exit(-1)
+def kpt_openpose_to_centernet(kpts):
+    """
+    Args:
+        kpts:
+    Returns:
+    """
+    #TO TEST
+    kpts_openpose = np.zeros((16, 3))
+    for i, point in enumerate(kpts):
+        idx_op = rev_pose_id_part[pose_id_part_openpose[i]]
+        kpts_openpose[idx_op] = [point[0], point[1], point[2]]
+    return kpts_openpose
+def kpt_centernet_to_openpose(kpts):
+    """
+    Args:
+        kpts:
+    Returns:
+    """
+    #TO TEST
+    kpts_openpose = np.zeros((25, 3))
+    for i, point in enumerate(kpts):
+        idx_op = rev_pose_id_part_openpose[pose_id_part[i]]
+        kpts_openpose[idx_op] = [point[1], point[0], point[2]]
+    return kpts_openpose
+def non_maxima_aux(det, kpt, threshold=15):  # threshold in pxels
+    # print("A", kpt, "\n", len(kpt))
+    indexes_to_delete = []
+    if len(kpt) == 0 or len(det) == 0:
+        return [], []
+    if len(kpt) == 1 or len(det) == 1:
+        return det, kpt
+    kpt_res = kpt.copy()
+    det_res_aux = det.copy()
+    for i in range(0, len(kpt)):
+        for j in range(i, len(kpt)):
+            if i == j:
+                continue
+            dist = distance_skeletons(kpt[i], kpt[j], 'head_centroid')
+            # print("DIST", i, j, dist)
+            if dist < threshold:
+                if j not in indexes_to_delete:
+                    indexes_to_delete.append(j)
+                # kpt_res.pop(j)
+    det_res = []
+    # print(indexes_to_delete)
+    indexes_to_delete = sorted(indexes_to_delete, reverse=True)
+    # print(len(kpt_res))
+    for index in indexes_to_delete:
+        kpt_res.pop(index)
+    det_res_aux = list(np.delete(det_res_aux, indexes_to_delete, axis=0))
+    det_res = np.array(det_res_aux)
+    return det_res, kpt_res
+def compute_centroid_list(points):
+    """
+    Args:
+        points:
+    Returns:
+    """
+    x, y = [], []
+    for i in range(0, len(points), 3):
+        if points[i + 2] > 0.0:  # confidence openpose
+            x.append(points[i])
+            y.append(points[i + 1])
+    if x == [] or y == []:
+        return [None, None]
+    mean_x = np.mean(x)
+    mean_y = np.mean(y)
+    return [mean_x, mean_y]
+def normalize_wrt_maximum_distance_point(points, file_name=''):
+    centroid = compute_centroid_list(points)
+    # centroid = [points[0], points[1]]
+    # print(centroid)
+    # exit()
+    max_dist_x, max_dist_y = 0, 0
+    for i in range(0, len(points), 3):
+        if points[i + 2] > 0.0:  # confidence openpose take only valid keypoints (if not detected (0, 0, 0)
+            distance_x = abs(points[i] - centroid[0])
+            distance_y = abs(points[i+1] - centroid[1])
+            # dist_aux.append(distance)
+            if distance_x > max_dist_x:
+                max_dist_x = distance_x
+            if distance_y > max_dist_y:
+                max_dist_y = distance_y
+        elif points[i + 2] == 0.0: # check for centernet people on borders with confidence 0
+            points[i] = 0
+            points[i+1] = 0
+    for i in range(0, len(points), 3):
+        if points[i + 2] > 0.0:
+            if max_dist_x != 0.0:
+                points[i] = (points[i] - centroid[0]) / max_dist_x
+            if max_dist_y != 0.0:
+                points[i + 1] = (points[i + 1] - centroid[1]) / max_dist_y
+            if max_dist_x == 0.0:  # only one point valid with some confidence value so it become (0,0, confidence)
+                points[i] = 0.0
+            if max_dist_y == 0.0:
+                points[i + 1] = 0.0
+    return points
+def retrieve_interest_points(kpts, detector):
+    """
+    :param kpts:
+    :return:
+    """
+    res_kpts = []
+    if detector == 'centernet':
+        face_points = [0, 1, 2, 3, 4]
+        for index in face_points:
+            res_kpts.append(kpts[index][1])
+            res_kpts.append(kpts[index][0])
+            res_kpts.append(kpts[index][2])
+    elif detector== 'zedcam':
+        face_points = [0, 14, 15, 16, 17]
+        for index in face_points:
+            res_kpts.append(kpts[index][0])
+            res_kpts.append(kpts[index][1])
+            res_kpts.append(kpts[index][2])
+    else:
+        # take only interest points (5 points of face)
+        face_points = [0, 16, 15, 18, 17]
+        for index in face_points:
+            res_kpts.append(kpts[index][0])
+            res_kpts.append(kpts[index][1])
+            res_kpts.append(kpts[index][2])
+    return res_kpts
+def create_bbox_from_openpose_keypoints(data):
+    # from labels import pose_id_part_openpose
+    bbox = list()
+    ids = list()
+    kpt = list()
+    kpt_scores = list()
+    for person in data['people']:
+        ids.append(person['person_id'][0])
+        kpt_temp = list()
+        kpt_score_temp = list()
+        # create bbox with min max each dimension
+        x, y = [], []
+        for i in pose_id_part_openpose:
+            if i < 25:
+                # kpt and kpts scores
+                kpt_temp.append([int(person['pose_keypoints_2d'][i * 3]), int(person['pose_keypoints_2d'][(i * 3) + 1]),
+                                 person['pose_keypoints_2d'][(i * 3) + 2]])
+                kpt_score_temp.append(person['pose_keypoints_2d'][(i * 3) + 2])
+                # check confidence != 0
+                if person['pose_keypoints_2d'][(3 * i) + 2]!=0:
+                    x.append(int(person['pose_keypoints_2d'][3 * i]))
+                    y.append(int(person['pose_keypoints_2d'][(3 * i) + 1]))
+        kpt_scores.append(kpt_score_temp)
+        kpt.append(kpt_temp)
+        xmax = max(x)
+        xmin = min(x)
+        ymax = max(y)
+        ymin = min(y)
+        bbox.append([xmin, ymin, xmax, ymax, 1])  # last value is for compatibility of centernet
+    return bbox, kpt, kpt_scores  # not to use scores
+def atoi(text):
+    return int(text) if text.isdigit() else text
+def natural_keys(text):
+    """
+           alist.sort(key=natural_keys) sorts in human order
+           http://nedbatchelder.com/blog/200712/human_sorting.html
+           (See Toothy's implementation in the comments)
+           """
+    import re
+    return [atoi(c) for c in re.split(r'(\d+)', text)]