diff --git a/README.md b/README.md
deleted file mode 100644
index ad5cb58c756fd70913b30639641c8159330f2ed1..0000000000000000000000000000000000000000
--- a/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
----
-title: CatVTON
-emoji: 馃憖
-colorFrom: gray
-colorTo: blue
-sdk: gradio
-sdk_version: 4.40.0
-app_file: app.py
-pinned: false
-license: cc-by-nc-sa-4.0
----
-
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
diff --git a/__pycache__/utils.cpython-39.pyc b/__pycache__/utils.cpython-39.pyc
index c74e08e5c75101b25900fe823a294cb9569b7643..6e064fcdc30055bbb94cc9af4caf1803b711c741 100644
Binary files a/__pycache__/utils.cpython-39.pyc and b/__pycache__/utils.cpython-39.pyc differ
diff --git a/app.py b/app.py
index fb3ad3a66902a58a9ffc54fac423b635abe089be..eee21897bf47c655c65c95094e673e94da499abc 100644
--- a/app.py
+++ b/app.py
@@ -1,10 +1,7 @@
import argparse
import os
-os.environ['CUDA_HOME'] = '/usr/local/cuda'
-os.environ['PATH'] = os.environ['PATH'] + ':/usr/local/cuda/bin'
-
from datetime import datetime
-import spaces
+
import gradio as gr
import numpy as np
import torch
@@ -12,7 +9,7 @@ from diffusers.image_processor import VaeImageProcessor
from huggingface_hub import snapshot_download
from PIL import Image
-from model.cloth_masker import AutoMaskerSeg, vis_mask
+from model.cloth_masker import AutoMasker, vis_mask
from model.pipeline import CatVTONPipeline
from utils import init_weight_dtype, resize_and_crop, resize_and_padding
@@ -85,12 +82,6 @@ def parse_args():
" flag passed with the `accelerate.launch` command. Use this argument to override the accelerate config."
),
)
- # parser.add_argument(
- # "--enable_condition_noise",
- # action="store_true",
- # default=True,
- # help="Whether or not to enable condition noise.",
- # )
args = parser.parse_args()
env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
@@ -123,13 +114,13 @@ pipeline = CatVTONPipeline(
)
# AutoMasker
mask_processor = VaeImageProcessor(vae_scale_factor=8, do_normalize=False, do_binarize=True, do_convert_grayscale=True)
-automasker = AutoMaskerSeg(
+automasker = AutoMasker(
densepose_ckpt=os.path.join(repo_path, "DensePose"),
- segformer_ckpt="mattmdjaga/segformer_b2_clothes",
+ schp_ckpt=os.path.join(repo_path, "SCHP"),
device='cuda',
)
-@spaces.GPU
+
def submit_function(
person_image,
cloth_image,
@@ -238,12 +229,9 @@ HEADER = """
-
-路 Thanks to ZeroGPU for providing A100 for this demo.
-路 To adapt to ZeroGPU, we replace SCHP with SegFormer which may result in differences from our own demo.
路 This demo and our weights are only open for **Non-commercial Use**.
-路 SafetyChecker is set to filter NSFW content, but it may block normal results too. Please adjust the `seed` for normal outcomes.
-
+路 SafetyChecker is set to filter NSFW content, but it may block normal results too. Please adjust the `seed` for normal outcomes.
+路 Thanks to ZeroGPU for providing GPU for Our HuggingFace Space.
"""
def app_gradio():
diff --git a/densepose/__init__.py b/densepose/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fc9e977ed3174e244414378dd85d48ea02e635e
--- /dev/null
+++ b/densepose/__init__.py
@@ -0,0 +1,22 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from .data.datasets import builtin # just to register data
+from .converters import builtin as builtin_converters # register converters
+from .config import (
+ add_densepose_config,
+ add_densepose_head_config,
+ add_hrnet_config,
+ add_dataset_category_config,
+ add_bootstrap_config,
+ load_bootstrap_config,
+)
+from .structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+from .evaluation import DensePoseCOCOEvaluator
+from .modeling.roi_heads import DensePoseROIHeads
+from .modeling.test_time_augmentation import (
+ DensePoseGeneralizedRCNNWithTTA,
+ DensePoseDatasetMapperTTA,
+)
+from .utils.transform import load_from_cfg
+from .modeling.hrfpn import build_hrfpn_backbone
diff --git a/densepose/__pycache__/__init__.cpython-39.pyc b/densepose/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a15ca8241664ac683ccd91f487b8ecdcf3a3e7bf
Binary files /dev/null and b/densepose/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/__pycache__/config.cpython-39.pyc b/densepose/__pycache__/config.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b4a11d27b90c0a05aa04569af23e8ce3a0fab50d
Binary files /dev/null and b/densepose/__pycache__/config.cpython-39.pyc differ
diff --git a/densepose/config.py b/densepose/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a06a09c80865ab987773511b2acc71e232b26ac
--- /dev/null
+++ b/densepose/config.py
@@ -0,0 +1,277 @@
+# -*- coding = utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+# pyre-ignore-all-errors
+
+from detectron2.config import CfgNode as CN
+
+
+def add_dataset_category_config(cfg: CN) -> None:
+ """
+ Add config for additional category-related dataset options
+ - category whitelisting
+ - category mapping
+ """
+ _C = cfg
+ _C.DATASETS.CATEGORY_MAPS = CN(new_allowed=True)
+ _C.DATASETS.WHITELISTED_CATEGORIES = CN(new_allowed=True)
+ # class to mesh mapping
+ _C.DATASETS.CLASS_TO_MESH_NAME_MAPPING = CN(new_allowed=True)
+
+
+def add_evaluation_config(cfg: CN) -> None:
+ _C = cfg
+ _C.DENSEPOSE_EVALUATION = CN()
+ # evaluator type, possible values:
+ # - "iou": evaluator for models that produce iou data
+ # - "cse": evaluator for models that produce cse data
+ _C.DENSEPOSE_EVALUATION.TYPE = "iou"
+ # storage for DensePose results, possible values:
+ # - "none": no explicit storage, all the results are stored in the
+ # dictionary with predictions, memory intensive;
+ # historically the default storage type
+ # - "ram": RAM storage, uses per-process RAM storage, which is
+ # reduced to a single process storage on later stages,
+ # less memory intensive
+ # - "file": file storage, uses per-process file-based storage,
+ # the least memory intensive, but may create bottlenecks
+ # on file system accesses
+ _C.DENSEPOSE_EVALUATION.STORAGE = "none"
+ # minimum threshold for IOU values: the lower its values is,
+ # the more matches are produced (and the higher the AP score)
+ _C.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD = 0.5
+ # Non-distributed inference is slower (at inference time) but can avoid RAM OOM
+ _C.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE = True
+ # evaluate mesh alignment based on vertex embeddings, only makes sense in CSE context
+ _C.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT = False
+ # meshes to compute mesh alignment for
+ _C.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES = []
+
+
+def add_bootstrap_config(cfg: CN) -> None:
+ """ """
+ _C = cfg
+ _C.BOOTSTRAP_DATASETS = []
+ _C.BOOTSTRAP_MODEL = CN()
+ _C.BOOTSTRAP_MODEL.WEIGHTS = ""
+ _C.BOOTSTRAP_MODEL.DEVICE = "cuda"
+
+
+def get_bootstrap_dataset_config() -> CN:
+ _C = CN()
+ _C.DATASET = ""
+ # ratio used to mix data loaders
+ _C.RATIO = 0.1
+ # image loader
+ _C.IMAGE_LOADER = CN(new_allowed=True)
+ _C.IMAGE_LOADER.TYPE = ""
+ _C.IMAGE_LOADER.BATCH_SIZE = 4
+ _C.IMAGE_LOADER.NUM_WORKERS = 4
+ _C.IMAGE_LOADER.CATEGORIES = []
+ _C.IMAGE_LOADER.MAX_COUNT_PER_CATEGORY = 1_000_000
+ _C.IMAGE_LOADER.CATEGORY_TO_CLASS_MAPPING = CN(new_allowed=True)
+ # inference
+ _C.INFERENCE = CN()
+ # batch size for model inputs
+ _C.INFERENCE.INPUT_BATCH_SIZE = 4
+ # batch size to group model outputs
+ _C.INFERENCE.OUTPUT_BATCH_SIZE = 2
+ # sampled data
+ _C.DATA_SAMPLER = CN(new_allowed=True)
+ _C.DATA_SAMPLER.TYPE = ""
+ _C.DATA_SAMPLER.USE_GROUND_TRUTH_CATEGORIES = False
+ # filter
+ _C.FILTER = CN(new_allowed=True)
+ _C.FILTER.TYPE = ""
+ return _C
+
+
+def load_bootstrap_config(cfg: CN) -> None:
+ """
+ Bootstrap datasets are given as a list of `dict` that are not automatically
+ converted into CfgNode. This method processes all bootstrap dataset entries
+ and ensures that they are in CfgNode format and comply with the specification
+ """
+ if not cfg.BOOTSTRAP_DATASETS:
+ return
+
+ bootstrap_datasets_cfgnodes = []
+ for dataset_cfg in cfg.BOOTSTRAP_DATASETS:
+ _C = get_bootstrap_dataset_config().clone()
+ _C.merge_from_other_cfg(CN(dataset_cfg))
+ bootstrap_datasets_cfgnodes.append(_C)
+ cfg.BOOTSTRAP_DATASETS = bootstrap_datasets_cfgnodes
+
+
+def add_densepose_head_cse_config(cfg: CN) -> None:
+ """
+ Add configuration options for Continuous Surface Embeddings (CSE)
+ """
+ _C = cfg
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE = CN()
+ # Dimensionality D of the embedding space
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE = 16
+ # Embedder specifications for various mesh IDs
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS = CN(new_allowed=True)
+ # normalization coefficient for embedding distances
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA = 0.01
+ # normalization coefficient for geodesic distances
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA = 0.01
+ # embedding loss weight
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT = 0.6
+ # embedding loss name, currently the following options are supported:
+ # - EmbeddingLoss: cross-entropy on vertex labels
+ # - SoftEmbeddingLoss: cross-entropy on vertex label combined with
+ # Gaussian penalty on distance between vertices
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME = "EmbeddingLoss"
+ # optimizer hyperparameters
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR = 1.0
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR = 1.0
+ # Shape to shape cycle consistency loss parameters:
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
+ # shape to shape cycle consistency loss weight
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.025
+ # norm type used for loss computation
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
+ # normalization term for embedding similarity matrices
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE = 0.05
+ # maximum number of vertices to include into shape to shape cycle loss
+ # if negative or zero, all vertices are considered
+ # if positive, random subset of vertices of given size is considered
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES = 4936
+ # Pixel to shape cycle consistency loss parameters:
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS = CN({"ENABLED": False})
+ # pixel to shape cycle consistency loss weight
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT = 0.0001
+ # norm type used for loss computation
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P = 2
+ # map images to all meshes and back (if false, use only gt meshes from the batch)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY = False
+ # Randomly select at most this number of pixels from every instance
+ # if negative or zero, all vertices are considered
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE = 100
+ # normalization factor for pixel to pixel distances (higher value = smoother distribution)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA = 5.0
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX = 0.05
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL = 0.05
+
+
+def add_densepose_head_config(cfg: CN) -> None:
+ """
+ Add config for densepose head.
+ """
+ _C = cfg
+
+ _C.MODEL.DENSEPOSE_ON = True
+
+ _C.MODEL.ROI_DENSEPOSE_HEAD = CN()
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NAME = ""
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS = 8
+ # Number of parts used for point labels
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES = 24
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL = 4
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM = 512
+ _C.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL = 3
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE = 2
+ _C.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE = 112
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE = "ROIAlignV2"
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION = 28
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO = 2
+ _C.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS = 2 # 15 or 2
+ # Overlap threshold for an RoI to be considered foreground (if >= FG_IOU_THRESHOLD)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD = 0.7
+ # Loss weights for annotation masks.(14 Parts)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS = 5.0
+ # Loss weights for surface parts. (24 Parts)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS = 1.0
+ # Loss weights for UV regression.
+ _C.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS = 0.01
+ # Coarse segmentation is trained using instance segmentation task data
+ _C.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS = False
+ # For Decoder
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON = True
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES = 256
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS = 256
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM = ""
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE = 4
+ # For DeepLab head
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB = CN()
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM = "GN"
+ _C.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON = 0
+ # Predictor class name, must be registered in DENSEPOSE_PREDICTOR_REGISTRY
+ # Some registered predictors:
+ # "DensePoseChartPredictor": predicts segmentation and UV coordinates for predefined charts
+ # "DensePoseChartWithConfidencePredictor": predicts segmentation, UV coordinates
+ # and associated confidences for predefined charts (default)
+ # "DensePoseEmbeddingWithConfidencePredictor": predicts segmentation, embeddings
+ # and associated confidences for CSE
+ _C.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME = "DensePoseChartWithConfidencePredictor"
+ # Loss class name, must be registered in DENSEPOSE_LOSS_REGISTRY
+ # Some registered losses:
+ # "DensePoseChartLoss": loss for chart-based models that estimate
+ # segmentation and UV coordinates
+ # "DensePoseChartWithConfidenceLoss": loss for chart-based models that estimate
+ # segmentation, UV coordinates and the corresponding confidences (default)
+ _C.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME = "DensePoseChartWithConfidenceLoss"
+ # Confidences
+ # Enable learning UV confidences (variances) along with the actual values
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE = CN({"ENABLED": False})
+ # UV confidence lower bound
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON = 0.01
+ # Enable learning segmentation confidences (variances) along with the actual values
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE = CN({"ENABLED": False})
+ # Segmentation confidence lower bound
+ _C.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON = 0.01
+ # Statistical model type for confidence learning, possible values:
+ # - "iid_iso": statistically independent identically distributed residuals
+ # with isotropic covariance
+ # - "indep_aniso": statistically independent residuals with anisotropic
+ # covariances
+ _C.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE = "iid_iso"
+ # List of angles for rotation in data augmentation during training
+ _C.INPUT.ROTATION_ANGLES = [0]
+ _C.TEST.AUG.ROTATION_ANGLES = () # Rotation TTA
+
+ add_densepose_head_cse_config(cfg)
+
+
+def add_hrnet_config(cfg: CN) -> None:
+ """
+ Add config for HRNet backbone.
+ """
+ _C = cfg
+
+ # For HigherHRNet w32
+ _C.MODEL.HRNET = CN()
+ _C.MODEL.HRNET.STEM_INPLANES = 64
+ _C.MODEL.HRNET.STAGE2 = CN()
+ _C.MODEL.HRNET.STAGE2.NUM_MODULES = 1
+ _C.MODEL.HRNET.STAGE2.NUM_BRANCHES = 2
+ _C.MODEL.HRNET.STAGE2.BLOCK = "BASIC"
+ _C.MODEL.HRNET.STAGE2.NUM_BLOCKS = [4, 4]
+ _C.MODEL.HRNET.STAGE2.NUM_CHANNELS = [32, 64]
+ _C.MODEL.HRNET.STAGE2.FUSE_METHOD = "SUM"
+ _C.MODEL.HRNET.STAGE3 = CN()
+ _C.MODEL.HRNET.STAGE3.NUM_MODULES = 4
+ _C.MODEL.HRNET.STAGE3.NUM_BRANCHES = 3
+ _C.MODEL.HRNET.STAGE3.BLOCK = "BASIC"
+ _C.MODEL.HRNET.STAGE3.NUM_BLOCKS = [4, 4, 4]
+ _C.MODEL.HRNET.STAGE3.NUM_CHANNELS = [32, 64, 128]
+ _C.MODEL.HRNET.STAGE3.FUSE_METHOD = "SUM"
+ _C.MODEL.HRNET.STAGE4 = CN()
+ _C.MODEL.HRNET.STAGE4.NUM_MODULES = 3
+ _C.MODEL.HRNET.STAGE4.NUM_BRANCHES = 4
+ _C.MODEL.HRNET.STAGE4.BLOCK = "BASIC"
+ _C.MODEL.HRNET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+ _C.MODEL.HRNET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
+ _C.MODEL.HRNET.STAGE4.FUSE_METHOD = "SUM"
+
+ _C.MODEL.HRNET.HRFPN = CN()
+ _C.MODEL.HRNET.HRFPN.OUT_CHANNELS = 256
+
+
+def add_densepose_config(cfg: CN) -> None:
+ add_densepose_head_config(cfg)
+ add_hrnet_config(cfg)
+ add_bootstrap_config(cfg)
+ add_dataset_category_config(cfg)
+ add_evaluation_config(cfg)
diff --git a/densepose/converters/__init__.py b/densepose/converters/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b700f44437bd4e68be358ed5aae62a22df8d88a
--- /dev/null
+++ b/densepose/converters/__init__.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .hflip import HFlipConverter
+from .to_mask import ToMaskConverter
+from .to_chart_result import ToChartResultConverter, ToChartResultConverterWithConfidences
+from .segm_to_mask import (
+ predictor_output_with_fine_and_coarse_segm_to_mask,
+ predictor_output_with_coarse_segm_to_mask,
+ resample_fine_and_coarse_segm_to_bbox,
+)
+from .chart_output_to_chart_result import (
+ densepose_chart_predictor_output_to_result,
+ densepose_chart_predictor_output_to_result_with_confidences,
+)
+from .chart_output_hflip import densepose_chart_predictor_output_hflip
diff --git a/densepose/converters/__pycache__/__init__.cpython-39.pyc b/densepose/converters/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f82189374d4076b34e739cb25d2895ce7d3fb7eb
Binary files /dev/null and b/densepose/converters/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/base.cpython-39.pyc b/densepose/converters/__pycache__/base.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..755a65623042931dd2e8761050ee09fa1ad5281b
Binary files /dev/null and b/densepose/converters/__pycache__/base.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/builtin.cpython-39.pyc b/densepose/converters/__pycache__/builtin.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..acd400757cff745028bb009e49bd17d8dd9a260a
Binary files /dev/null and b/densepose/converters/__pycache__/builtin.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/chart_output_hflip.cpython-39.pyc b/densepose/converters/__pycache__/chart_output_hflip.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b104343045eee9289ed2dc395b7be55fecf09187
Binary files /dev/null and b/densepose/converters/__pycache__/chart_output_hflip.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/chart_output_to_chart_result.cpython-39.pyc b/densepose/converters/__pycache__/chart_output_to_chart_result.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e76bffc24ce9be5576b4ac8bd8d1430e7645235
Binary files /dev/null and b/densepose/converters/__pycache__/chart_output_to_chart_result.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/hflip.cpython-39.pyc b/densepose/converters/__pycache__/hflip.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74c7c523c2b4f63298187a67949f2d6b1afbcdf0
Binary files /dev/null and b/densepose/converters/__pycache__/hflip.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/segm_to_mask.cpython-39.pyc b/densepose/converters/__pycache__/segm_to_mask.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3c992f1b0dedf2987ae482a6aaf0536923b4e238
Binary files /dev/null and b/densepose/converters/__pycache__/segm_to_mask.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/to_chart_result.cpython-39.pyc b/densepose/converters/__pycache__/to_chart_result.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87ba159e6bf5632712b2d8702b59f5ca2bb9c56f
Binary files /dev/null and b/densepose/converters/__pycache__/to_chart_result.cpython-39.pyc differ
diff --git a/densepose/converters/__pycache__/to_mask.cpython-39.pyc b/densepose/converters/__pycache__/to_mask.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b7b79181961e4f9b4960ed9e6a6f1c4258f05304
Binary files /dev/null and b/densepose/converters/__pycache__/to_mask.cpython-39.pyc differ
diff --git a/densepose/converters/base.py b/densepose/converters/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..25e3155a87b819fe526b7b2735e006aeb3a56dda
--- /dev/null
+++ b/densepose/converters/base.py
@@ -0,0 +1,95 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any, Tuple, Type
+import torch
+
+
+class BaseConverter:
+ """
+ Converter base class to be reused by various converters.
+ Converter allows one to convert data from various source types to a particular
+ destination type. Each source type needs to register its converter. The
+ registration for each source type is valid for all descendants of that type.
+ """
+
+ @classmethod
+ def register(cls, from_type: Type, converter: Any = None):
+ """
+ Registers a converter for the specified type.
+ Can be used as a decorator (if converter is None), or called as a method.
+
+ Args:
+ from_type (type): type to register the converter for;
+ all instances of this type will use the same converter
+ converter (callable): converter to be registered for the given
+ type; if None, this method is assumed to be a decorator for the converter
+ """
+
+ if converter is not None:
+ cls._do_register(from_type, converter)
+
+ def wrapper(converter: Any) -> Any:
+ cls._do_register(from_type, converter)
+ return converter
+
+ return wrapper
+
+ @classmethod
+ def _do_register(cls, from_type: Type, converter: Any):
+ cls.registry[from_type] = converter # pyre-ignore[16]
+
+ @classmethod
+ def _lookup_converter(cls, from_type: Type) -> Any:
+ """
+ Perform recursive lookup for the given type
+ to find registered converter. If a converter was found for some base
+ class, it gets registered for this class to save on further lookups.
+
+ Args:
+ from_type: type for which to find a converter
+ Return:
+ callable or None - registered converter or None
+ if no suitable entry was found in the registry
+ """
+ if from_type in cls.registry: # pyre-ignore[16]
+ return cls.registry[from_type]
+ for base in from_type.__bases__:
+ converter = cls._lookup_converter(base)
+ if converter is not None:
+ cls._do_register(from_type, converter)
+ return converter
+ return None
+
+ @classmethod
+ def convert(cls, instance: Any, *args, **kwargs):
+ """
+ Convert an instance to the destination type using some registered
+ converter. Does recursive lookup for base classes, so there's no need
+ for explicit registration for derived classes.
+
+ Args:
+ instance: source instance to convert to the destination type
+ Return:
+ An instance of the destination type obtained from the source instance
+ Raises KeyError, if no suitable converter found
+ """
+ instance_type = type(instance)
+ converter = cls._lookup_converter(instance_type)
+ if converter is None:
+ if cls.dst_type is None: # pyre-ignore[16]
+ output_type_str = "itself"
+ else:
+ output_type_str = cls.dst_type
+ raise KeyError(f"Could not find converter from {instance_type} to {output_type_str}")
+ return converter(instance, *args, **kwargs)
+
+
+IntTupleBox = Tuple[int, int, int, int]
+
+
+def make_int_box(box: torch.Tensor) -> IntTupleBox:
+ int_box = [0, 0, 0, 0]
+ int_box[0], int_box[1], int_box[2], int_box[3] = tuple(box.long().tolist())
+ return int_box[0], int_box[1], int_box[2], int_box[3]
diff --git a/densepose/converters/builtin.py b/densepose/converters/builtin.py
new file mode 100644
index 0000000000000000000000000000000000000000..5234410307d7bfff932da982ca44926afb729c23
--- /dev/null
+++ b/densepose/converters/builtin.py
@@ -0,0 +1,33 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from ..structures import DensePoseChartPredictorOutput, DensePoseEmbeddingPredictorOutput
+from . import (
+ HFlipConverter,
+ ToChartResultConverter,
+ ToChartResultConverterWithConfidences,
+ ToMaskConverter,
+ densepose_chart_predictor_output_hflip,
+ densepose_chart_predictor_output_to_result,
+ densepose_chart_predictor_output_to_result_with_confidences,
+ predictor_output_with_coarse_segm_to_mask,
+ predictor_output_with_fine_and_coarse_segm_to_mask,
+)
+
+ToMaskConverter.register(
+ DensePoseChartPredictorOutput, predictor_output_with_fine_and_coarse_segm_to_mask
+)
+ToMaskConverter.register(
+ DensePoseEmbeddingPredictorOutput, predictor_output_with_coarse_segm_to_mask
+)
+
+ToChartResultConverter.register(
+ DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result
+)
+
+ToChartResultConverterWithConfidences.register(
+ DensePoseChartPredictorOutput, densepose_chart_predictor_output_to_result_with_confidences
+)
+
+HFlipConverter.register(DensePoseChartPredictorOutput, densepose_chart_predictor_output_hflip)
diff --git a/densepose/converters/chart_output_hflip.py b/densepose/converters/chart_output_hflip.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7f0061c858c80b083d40807c0bdfb4dfcc5d86b
--- /dev/null
+++ b/densepose/converters/chart_output_hflip.py
@@ -0,0 +1,73 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from dataclasses import fields
+import torch
+
+from densepose.structures import DensePoseChartPredictorOutput, DensePoseTransformData
+
+
+def densepose_chart_predictor_output_hflip(
+ densepose_predictor_output: DensePoseChartPredictorOutput,
+ transform_data: DensePoseTransformData,
+) -> DensePoseChartPredictorOutput:
+ """
+ Change to take into account a Horizontal flip.
+ """
+ if len(densepose_predictor_output) > 0:
+
+ PredictorOutput = type(densepose_predictor_output)
+ output_dict = {}
+
+ for field in fields(densepose_predictor_output):
+ field_value = getattr(densepose_predictor_output, field.name)
+ # flip tensors
+ if isinstance(field_value, torch.Tensor):
+ setattr(densepose_predictor_output, field.name, torch.flip(field_value, [3]))
+
+ densepose_predictor_output = _flip_iuv_semantics_tensor(
+ densepose_predictor_output, transform_data
+ )
+ densepose_predictor_output = _flip_segm_semantics_tensor(
+ densepose_predictor_output, transform_data
+ )
+
+ for field in fields(densepose_predictor_output):
+ output_dict[field.name] = getattr(densepose_predictor_output, field.name)
+
+ return PredictorOutput(**output_dict)
+ else:
+ return densepose_predictor_output
+
+
+def _flip_iuv_semantics_tensor(
+ densepose_predictor_output: DensePoseChartPredictorOutput,
+ dp_transform_data: DensePoseTransformData,
+) -> DensePoseChartPredictorOutput:
+ point_label_symmetries = dp_transform_data.point_label_symmetries
+ uv_symmetries = dp_transform_data.uv_symmetries
+
+ N, C, H, W = densepose_predictor_output.u.shape
+ u_loc = (densepose_predictor_output.u[:, 1:, :, :].clamp(0, 1) * 255).long()
+ v_loc = (densepose_predictor_output.v[:, 1:, :, :].clamp(0, 1) * 255).long()
+ Iindex = torch.arange(C - 1, device=densepose_predictor_output.u.device)[
+ None, :, None, None
+ ].expand(N, C - 1, H, W)
+ densepose_predictor_output.u[:, 1:, :, :] = uv_symmetries["U_transforms"][Iindex, v_loc, u_loc]
+ densepose_predictor_output.v[:, 1:, :, :] = uv_symmetries["V_transforms"][Iindex, v_loc, u_loc]
+
+ for el in ["fine_segm", "u", "v"]:
+ densepose_predictor_output.__dict__[el] = densepose_predictor_output.__dict__[el][
+ :, point_label_symmetries, :, :
+ ]
+ return densepose_predictor_output
+
+
+def _flip_segm_semantics_tensor(
+ densepose_predictor_output: DensePoseChartPredictorOutput, dp_transform_data
+):
+ if densepose_predictor_output.coarse_segm.shape[1] > 2:
+ densepose_predictor_output.coarse_segm = densepose_predictor_output.coarse_segm[
+ :, dp_transform_data.mask_label_symmetries, :, :
+ ]
+ return densepose_predictor_output
diff --git a/densepose/converters/chart_output_to_chart_result.py b/densepose/converters/chart_output_to_chart_result.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2e9c2280a60f80d2e32861a392fc78b3148cac8
--- /dev/null
+++ b/densepose/converters/chart_output_to_chart_result.py
@@ -0,0 +1,190 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Dict
+import torch
+from torch.nn import functional as F
+
+from detectron2.structures.boxes import Boxes, BoxMode
+
+from ..structures import (
+ DensePoseChartPredictorOutput,
+ DensePoseChartResult,
+ DensePoseChartResultWithConfidences,
+)
+from . import resample_fine_and_coarse_segm_to_bbox
+from .base import IntTupleBox, make_int_box
+
+
+def resample_uv_tensors_to_bbox(
+ u: torch.Tensor,
+ v: torch.Tensor,
+ labels: torch.Tensor,
+ box_xywh_abs: IntTupleBox,
+) -> torch.Tensor:
+ """
+ Resamples U and V coordinate estimates for the given bounding box
+
+ Args:
+ u (tensor [1, C, H, W] of float): U coordinates
+ v (tensor [1, C, H, W] of float): V coordinates
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
+ outputs for the given bounding box
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+ Return:
+ Resampled U and V coordinates - a tensor [2, H, W] of float
+ """
+ x, y, w, h = box_xywh_abs
+ w = max(int(w), 1)
+ h = max(int(h), 1)
+ u_bbox = F.interpolate(u, (h, w), mode="bilinear", align_corners=False)
+ v_bbox = F.interpolate(v, (h, w), mode="bilinear", align_corners=False)
+ uv = torch.zeros([2, h, w], dtype=torch.float32, device=u.device)
+ for part_id in range(1, u_bbox.size(1)):
+ uv[0][labels == part_id] = u_bbox[0, part_id][labels == part_id]
+ uv[1][labels == part_id] = v_bbox[0, part_id][labels == part_id]
+ return uv
+
+
+def resample_uv_to_bbox(
+ predictor_output: DensePoseChartPredictorOutput,
+ labels: torch.Tensor,
+ box_xywh_abs: IntTupleBox,
+) -> torch.Tensor:
+ """
+ Resamples U and V coordinate estimates for the given bounding box
+
+ Args:
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+ output to be resampled
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
+ outputs for the given bounding box
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+ Return:
+ Resampled U and V coordinates - a tensor [2, H, W] of float
+ """
+ return resample_uv_tensors_to_bbox(
+ predictor_output.u,
+ predictor_output.v,
+ labels,
+ box_xywh_abs,
+ )
+
+
+def densepose_chart_predictor_output_to_result(
+ predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
+) -> DensePoseChartResult:
+ """
+ Convert densepose chart predictor outputs to results
+
+ Args:
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+ output to be converted to results, must contain only 1 output
+ boxes (Boxes): bounding box that corresponds to the predictor output,
+ must contain only 1 bounding box
+ Return:
+ DensePose chart-based result (DensePoseChartResult)
+ """
+ assert len(predictor_output) == 1 and len(boxes) == 1, (
+ f"Predictor output to result conversion can operate only single outputs"
+ f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
+ )
+
+ boxes_xyxy_abs = boxes.tensor.clone()
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ box_xywh = make_int_box(boxes_xywh_abs[0])
+
+ labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
+ uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
+ return DensePoseChartResult(labels=labels, uv=uv)
+
+
+def resample_confidences_to_bbox(
+ predictor_output: DensePoseChartPredictorOutput,
+ labels: torch.Tensor,
+ box_xywh_abs: IntTupleBox,
+) -> Dict[str, torch.Tensor]:
+ """
+ Resamples confidences for the given bounding box
+
+ Args:
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+ output to be resampled
+ labels (tensor [H, W] of long): labels obtained by resampling segmentation
+ outputs for the given bounding box
+ box_xywh_abs (tuple of 4 int): bounding box that corresponds to predictor outputs
+ Return:
+ Resampled confidences - a dict of [H, W] tensors of float
+ """
+
+ x, y, w, h = box_xywh_abs
+ w = max(int(w), 1)
+ h = max(int(h), 1)
+
+ confidence_names = [
+ "sigma_1",
+ "sigma_2",
+ "kappa_u",
+ "kappa_v",
+ "fine_segm_confidence",
+ "coarse_segm_confidence",
+ ]
+ confidence_results = {key: None for key in confidence_names}
+ confidence_names = [
+ key for key in confidence_names if getattr(predictor_output, key) is not None
+ ]
+ confidence_base = torch.zeros([h, w], dtype=torch.float32, device=predictor_output.u.device)
+
+ # assign data from channels that correspond to the labels
+ for key in confidence_names:
+ resampled_confidence = F.interpolate(
+ getattr(predictor_output, key),
+ (h, w),
+ mode="bilinear",
+ align_corners=False,
+ )
+ result = confidence_base.clone()
+ for part_id in range(1, predictor_output.u.size(1)):
+ if resampled_confidence.size(1) != predictor_output.u.size(1):
+ # confidence is not part-based, don't try to fill it part by part
+ continue
+ result[labels == part_id] = resampled_confidence[0, part_id][labels == part_id]
+
+ if resampled_confidence.size(1) != predictor_output.u.size(1):
+ # confidence is not part-based, fill the data with the first channel
+ # (targeted for segmentation confidences that have only 1 channel)
+ result = resampled_confidence[0, 0]
+
+ confidence_results[key] = result
+
+ return confidence_results # pyre-ignore[7]
+
+
+def densepose_chart_predictor_output_to_result_with_confidences(
+ predictor_output: DensePoseChartPredictorOutput, boxes: Boxes
+) -> DensePoseChartResultWithConfidences:
+ """
+ Convert densepose chart predictor outputs to results
+
+ Args:
+ predictor_output (DensePoseChartPredictorOutput): DensePose predictor
+ output with confidences to be converted to results, must contain only 1 output
+ boxes (Boxes): bounding box that corresponds to the predictor output,
+ must contain only 1 bounding box
+ Return:
+ DensePose chart-based result with confidences (DensePoseChartResultWithConfidences)
+ """
+ assert len(predictor_output) == 1 and len(boxes) == 1, (
+ f"Predictor output to result conversion can operate only single outputs"
+ f", got {len(predictor_output)} predictor outputs and {len(boxes)} boxes"
+ )
+
+ boxes_xyxy_abs = boxes.tensor.clone()
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ box_xywh = make_int_box(boxes_xywh_abs[0])
+
+ labels = resample_fine_and_coarse_segm_to_bbox(predictor_output, box_xywh).squeeze(0)
+ uv = resample_uv_to_bbox(predictor_output, labels, box_xywh)
+ confidences = resample_confidences_to_bbox(predictor_output, labels, box_xywh)
+ return DensePoseChartResultWithConfidences(labels=labels, uv=uv, **confidences)
diff --git a/densepose/converters/hflip.py b/densepose/converters/hflip.py
new file mode 100644
index 0000000000000000000000000000000000000000..711b73b3701adfd0217132519aea46f30f9ed74a
--- /dev/null
+++ b/densepose/converters/hflip.py
@@ -0,0 +1,36 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any
+
+from .base import BaseConverter
+
+
+class HFlipConverter(BaseConverter):
+ """
+ Converts various DensePose predictor outputs to DensePose results.
+ Each DensePose predictor output type has to register its convertion strategy.
+ """
+
+ registry = {}
+ dst_type = None
+
+ @classmethod
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+ # inconsistently.
+ def convert(cls, predictor_outputs: Any, transform_data: Any, *args, **kwargs):
+ """
+ Performs an horizontal flip on DensePose predictor outputs.
+ Does recursive lookup for base classes, so there's no need
+ for explicit registration for derived classes.
+
+ Args:
+ predictor_outputs: DensePose predictor output to be converted to BitMasks
+ transform_data: Anything useful for the flip
+ Return:
+ An instance of the same type as predictor_outputs
+ """
+ return super(HFlipConverter, cls).convert(
+ predictor_outputs, transform_data, *args, **kwargs
+ )
diff --git a/densepose/converters/segm_to_mask.py b/densepose/converters/segm_to_mask.py
new file mode 100644
index 0000000000000000000000000000000000000000..e5843a2186f441aa9cb48b680fd67051aa1236f6
--- /dev/null
+++ b/densepose/converters/segm_to_mask.py
@@ -0,0 +1,152 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any
+import torch
+from torch.nn import functional as F
+
+from detectron2.structures import BitMasks, Boxes, BoxMode
+
+from .base import IntTupleBox, make_int_box
+from .to_mask import ImageSizeType
+
+
+def resample_coarse_segm_tensor_to_bbox(coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox):
+ """
+ Resample coarse segmentation tensor to the given
+ bounding box and derive labels for each pixel of the bounding box
+
+ Args:
+ coarse_segm: float tensor of shape [1, K, Hout, Wout]
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+ corner coordinates, width (W) and height (H)
+ Return:
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+ """
+ x, y, w, h = box_xywh_abs
+ w = max(int(w), 1)
+ h = max(int(h), 1)
+ labels = F.interpolate(coarse_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+ return labels
+
+
+def resample_fine_and_coarse_segm_tensors_to_bbox(
+ fine_segm: torch.Tensor, coarse_segm: torch.Tensor, box_xywh_abs: IntTupleBox
+):
+ """
+ Resample fine and coarse segmentation tensors to the given
+ bounding box and derive labels for each pixel of the bounding box
+
+ Args:
+ fine_segm: float tensor of shape [1, C, Hout, Wout]
+ coarse_segm: float tensor of shape [1, K, Hout, Wout]
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+ corner coordinates, width (W) and height (H)
+ Return:
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+ """
+ x, y, w, h = box_xywh_abs
+ w = max(int(w), 1)
+ h = max(int(h), 1)
+ # coarse segmentation
+ coarse_segm_bbox = F.interpolate(
+ coarse_segm,
+ (h, w),
+ mode="bilinear",
+ align_corners=False,
+ ).argmax(dim=1)
+ # combined coarse and fine segmentation
+ labels = (
+ F.interpolate(fine_segm, (h, w), mode="bilinear", align_corners=False).argmax(dim=1)
+ * (coarse_segm_bbox > 0).long()
+ )
+ return labels
+
+
+def resample_fine_and_coarse_segm_to_bbox(predictor_output: Any, box_xywh_abs: IntTupleBox):
+ """
+ Resample fine and coarse segmentation outputs from a predictor to the given
+ bounding box and derive labels for each pixel of the bounding box
+
+ Args:
+ predictor_output: DensePose predictor output that contains segmentation
+ results to be resampled
+ box_xywh_abs (tuple of 4 int): bounding box given by its upper-left
+ corner coordinates, width (W) and height (H)
+ Return:
+ Labels for each pixel of the bounding box, a long tensor of size [1, H, W]
+ """
+ return resample_fine_and_coarse_segm_tensors_to_bbox(
+ predictor_output.fine_segm,
+ predictor_output.coarse_segm,
+ box_xywh_abs,
+ )
+
+
+def predictor_output_with_coarse_segm_to_mask(
+ predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
+) -> BitMasks:
+ """
+ Convert predictor output with coarse and fine segmentation to a mask.
+ Assumes that predictor output has the following attributes:
+ - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
+ unnormalized scores for N instances; D is the number of coarse
+ segmentation labels, H and W is the resolution of the estimate
+
+ Args:
+ predictor_output: DensePose predictor output to be converted to mask
+ boxes (Boxes): bounding boxes that correspond to the DensePose
+ predictor outputs
+ image_size_hw (tuple [int, int]): image height Himg and width Wimg
+ Return:
+ BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
+ a mask of the size of the image for each instance
+ """
+ H, W = image_size_hw
+ boxes_xyxy_abs = boxes.tensor.clone()
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ N = len(boxes_xywh_abs)
+ masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
+ for i in range(len(boxes_xywh_abs)):
+ box_xywh = make_int_box(boxes_xywh_abs[i])
+ box_mask = resample_coarse_segm_tensor_to_bbox(predictor_output[i].coarse_segm, box_xywh)
+ x, y, w, h = box_xywh
+ masks[i, y : y + h, x : x + w] = box_mask
+
+ return BitMasks(masks)
+
+
+def predictor_output_with_fine_and_coarse_segm_to_mask(
+ predictor_output: Any, boxes: Boxes, image_size_hw: ImageSizeType
+) -> BitMasks:
+ """
+ Convert predictor output with coarse and fine segmentation to a mask.
+ Assumes that predictor output has the following attributes:
+ - coarse_segm (tensor of size [N, D, H, W]): coarse segmentation
+ unnormalized scores for N instances; D is the number of coarse
+ segmentation labels, H and W is the resolution of the estimate
+ - fine_segm (tensor of size [N, C, H, W]): fine segmentation
+ unnormalized scores for N instances; C is the number of fine
+ segmentation labels, H and W is the resolution of the estimate
+
+ Args:
+ predictor_output: DensePose predictor output to be converted to mask
+ boxes (Boxes): bounding boxes that correspond to the DensePose
+ predictor outputs
+ image_size_hw (tuple [int, int]): image height Himg and width Wimg
+ Return:
+ BitMasks that contain a bool tensor of size [N, Himg, Wimg] with
+ a mask of the size of the image for each instance
+ """
+ H, W = image_size_hw
+ boxes_xyxy_abs = boxes.tensor.clone()
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ N = len(boxes_xywh_abs)
+ masks = torch.zeros((N, H, W), dtype=torch.bool, device=boxes.tensor.device)
+ for i in range(len(boxes_xywh_abs)):
+ box_xywh = make_int_box(boxes_xywh_abs[i])
+ labels_i = resample_fine_and_coarse_segm_to_bbox(predictor_output[i], box_xywh)
+ x, y, w, h = box_xywh
+ masks[i, y : y + h, x : x + w] = labels_i > 0
+ return BitMasks(masks)
diff --git a/densepose/converters/to_chart_result.py b/densepose/converters/to_chart_result.py
new file mode 100644
index 0000000000000000000000000000000000000000..82e126a922ff8ac4d8ebc3008f67d3928b982c25
--- /dev/null
+++ b/densepose/converters/to_chart_result.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any
+
+from detectron2.structures import Boxes
+
+from ..structures import DensePoseChartResult, DensePoseChartResultWithConfidences
+from .base import BaseConverter
+
+
+class ToChartResultConverter(BaseConverter):
+ """
+ Converts various DensePose predictor outputs to DensePose results.
+ Each DensePose predictor output type has to register its convertion strategy.
+ """
+
+ registry = {}
+ dst_type = DensePoseChartResult
+
+ @classmethod
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+ # inconsistently.
+ def convert(cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs) -> DensePoseChartResult:
+ """
+ Convert DensePose predictor outputs to DensePoseResult using some registered
+ converter. Does recursive lookup for base classes, so there's no need
+ for explicit registration for derived classes.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor output to be
+ converted to BitMasks
+ boxes (Boxes): bounding boxes that correspond to the DensePose
+ predictor outputs
+ Return:
+ An instance of DensePoseResult. If no suitable converter was found, raises KeyError
+ """
+ return super(ToChartResultConverter, cls).convert(predictor_outputs, boxes, *args, **kwargs)
+
+
+class ToChartResultConverterWithConfidences(BaseConverter):
+ """
+ Converts various DensePose predictor outputs to DensePose results.
+ Each DensePose predictor output type has to register its convertion strategy.
+ """
+
+ registry = {}
+ dst_type = DensePoseChartResultWithConfidences
+
+ @classmethod
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+ # inconsistently.
+ def convert(
+ cls, predictor_outputs: Any, boxes: Boxes, *args, **kwargs
+ ) -> DensePoseChartResultWithConfidences:
+ """
+ Convert DensePose predictor outputs to DensePoseResult with confidences
+ using some registered converter. Does recursive lookup for base classes,
+ so there's no need for explicit registration for derived classes.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor output with confidences
+ to be converted to BitMasks
+ boxes (Boxes): bounding boxes that correspond to the DensePose
+ predictor outputs
+ Return:
+ An instance of DensePoseResult. If no suitable converter was found, raises KeyError
+ """
+ return super(ToChartResultConverterWithConfidences, cls).convert(
+ predictor_outputs, boxes, *args, **kwargs
+ )
diff --git a/densepose/converters/to_mask.py b/densepose/converters/to_mask.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a47e2a7d7aa5f0d9c41ab46a4f1806184b7b4ba
--- /dev/null
+++ b/densepose/converters/to_mask.py
@@ -0,0 +1,51 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any, Tuple
+
+from detectron2.structures import BitMasks, Boxes
+
+from .base import BaseConverter
+
+ImageSizeType = Tuple[int, int]
+
+
+class ToMaskConverter(BaseConverter):
+ """
+ Converts various DensePose predictor outputs to masks
+ in bit mask format (see `BitMasks`). Each DensePose predictor output type
+ has to register its convertion strategy.
+ """
+
+ registry = {}
+ dst_type = BitMasks
+
+ @classmethod
+ # pyre-fixme[14]: `convert` overrides method defined in `BaseConverter`
+ # inconsistently.
+ def convert(
+ cls,
+ densepose_predictor_outputs: Any,
+ boxes: Boxes,
+ image_size_hw: ImageSizeType,
+ *args,
+ **kwargs
+ ) -> BitMasks:
+ """
+ Convert DensePose predictor outputs to BitMasks using some registered
+ converter. Does recursive lookup for base classes, so there's no need
+ for explicit registration for derived classes.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor output to be
+ converted to BitMasks
+ boxes (Boxes): bounding boxes that correspond to the DensePose
+ predictor outputs
+ image_size_hw (tuple [int, int]): image height and width
+ Return:
+ An instance of `BitMasks`. If no suitable converter was found, raises KeyError
+ """
+ return super(ToMaskConverter, cls).convert(
+ densepose_predictor_outputs, boxes, image_size_hw, *args, **kwargs
+ )
diff --git a/densepose/data/__init__.py b/densepose/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5278887bd723f1606debd3de09b7e3e0ff5b3a03
--- /dev/null
+++ b/densepose/data/__init__.py
@@ -0,0 +1,27 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .meshes import builtin
+from .build import (
+ build_detection_test_loader,
+ build_detection_train_loader,
+ build_combined_loader,
+ build_frame_selector,
+ build_inference_based_loaders,
+ has_inference_based_loaders,
+ BootstrapDatasetFactoryCatalog,
+)
+from .combined_loader import CombinedDataLoader
+from .dataset_mapper import DatasetMapper
+from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
+from .image_list_dataset import ImageListDataset
+from .utils import is_relative_local_path, maybe_prepend_base_path
+
+# ensure the builtin datasets are registered
+from . import datasets
+
+# ensure the bootstrap datasets builders are registered
+from . import build
+
+__all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/densepose/data/__pycache__/__init__.cpython-39.pyc b/densepose/data/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8685f36c53fcc1a31a2cbf7251fc343032cf7492
Binary files /dev/null and b/densepose/data/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/data/__pycache__/build.cpython-39.pyc b/densepose/data/__pycache__/build.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..426b867d001b02f7701d82979f2df747fde869d5
Binary files /dev/null and b/densepose/data/__pycache__/build.cpython-39.pyc differ
diff --git a/densepose/data/__pycache__/combined_loader.cpython-39.pyc b/densepose/data/__pycache__/combined_loader.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b31a974d8aee7630c6eb9a24761cc26d08206ff9
Binary files /dev/null and b/densepose/data/__pycache__/combined_loader.cpython-39.pyc differ
diff --git a/densepose/data/__pycache__/dataset_mapper.cpython-39.pyc b/densepose/data/__pycache__/dataset_mapper.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c3e18273902fa94162b5660580a09df5205714a
Binary files /dev/null and b/densepose/data/__pycache__/dataset_mapper.cpython-39.pyc differ
diff --git a/densepose/data/__pycache__/image_list_dataset.cpython-39.pyc b/densepose/data/__pycache__/image_list_dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9869b599bb055ea352748613007bc6021d79e6aa
Binary files /dev/null and b/densepose/data/__pycache__/image_list_dataset.cpython-39.pyc differ
diff --git a/densepose/data/__pycache__/inference_based_loader.cpython-39.pyc b/densepose/data/__pycache__/inference_based_loader.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..735a797223587514aea5d3a8163e5df8e9b14e72
Binary files /dev/null and b/densepose/data/__pycache__/inference_based_loader.cpython-39.pyc differ
diff --git a/densepose/data/__pycache__/utils.cpython-39.pyc b/densepose/data/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ed14f9d1c32c4fa5db57c0f8d5b995ce3bea129d
Binary files /dev/null and b/densepose/data/__pycache__/utils.cpython-39.pyc differ
diff --git a/densepose/data/build.py b/densepose/data/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..06e8e8f782e75b27b8bb1ec387dd49ccdae8dbb3
--- /dev/null
+++ b/densepose/data/build.py
@@ -0,0 +1,738 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import itertools
+import logging
+import numpy as np
+from collections import UserDict, defaultdict
+from dataclasses import dataclass
+from typing import Any, Callable, Collection, Dict, Iterable, List, Optional, Sequence, Tuple
+import torch
+from torch.utils.data.dataset import Dataset
+
+from detectron2.config import CfgNode
+from detectron2.data.build import build_detection_test_loader as d2_build_detection_test_loader
+from detectron2.data.build import build_detection_train_loader as d2_build_detection_train_loader
+from detectron2.data.build import (
+ load_proposals_into_dataset,
+ print_instances_class_histogram,
+ trivial_batch_collator,
+ worker_init_reset_seed,
+)
+from detectron2.data.catalog import DatasetCatalog, Metadata, MetadataCatalog
+from detectron2.data.samplers import TrainingSampler
+from detectron2.utils.comm import get_world_size
+
+from densepose.config import get_bootstrap_dataset_config
+from densepose.modeling import build_densepose_embedder
+
+from .combined_loader import CombinedDataLoader, Loader
+from .dataset_mapper import DatasetMapper
+from .datasets.coco import DENSEPOSE_CSE_KEYS_WITHOUT_MASK, DENSEPOSE_IUV_KEYS_WITHOUT_MASK
+from .datasets.dataset_type import DatasetType
+from .inference_based_loader import InferenceBasedLoader, ScoreBasedFilter
+from .samplers import (
+ DensePoseConfidenceBasedSampler,
+ DensePoseCSEConfidenceBasedSampler,
+ DensePoseCSEUniformSampler,
+ DensePoseUniformSampler,
+ MaskFromDensePoseSampler,
+ PredictionToGroundTruthSampler,
+)
+from .transform import ImageResizeTransform
+from .utils import get_category_to_class_mapping, get_class_to_mesh_name_mapping
+from .video import (
+ FirstKFramesSelector,
+ FrameSelectionStrategy,
+ LastKFramesSelector,
+ RandomKFramesSelector,
+ VideoKeyframeDataset,
+ video_list_from_file,
+)
+
+__all__ = ["build_detection_train_loader", "build_detection_test_loader"]
+
+
+Instance = Dict[str, Any]
+InstancePredicate = Callable[[Instance], bool]
+
+
+def _compute_num_images_per_worker(cfg: CfgNode) -> int:
+ num_workers = get_world_size()
+ images_per_batch = cfg.SOLVER.IMS_PER_BATCH
+ assert (
+ images_per_batch % num_workers == 0
+ ), "SOLVER.IMS_PER_BATCH ({}) must be divisible by the number of workers ({}).".format(
+ images_per_batch, num_workers
+ )
+ assert (
+ images_per_batch >= num_workers
+ ), "SOLVER.IMS_PER_BATCH ({}) must be larger than the number of workers ({}).".format(
+ images_per_batch, num_workers
+ )
+ images_per_worker = images_per_batch // num_workers
+ return images_per_worker
+
+
+def _map_category_id_to_contiguous_id(dataset_name: str, dataset_dicts: Iterable[Instance]) -> None:
+ meta = MetadataCatalog.get(dataset_name)
+ for dataset_dict in dataset_dicts:
+ for ann in dataset_dict["annotations"]:
+ ann["category_id"] = meta.thing_dataset_id_to_contiguous_id[ann["category_id"]]
+
+
+@dataclass
+class _DatasetCategory:
+ """
+ Class representing category data in a dataset:
+ - id: category ID, as specified in the dataset annotations file
+ - name: category name, as specified in the dataset annotations file
+ - mapped_id: category ID after applying category maps (DATASETS.CATEGORY_MAPS config option)
+ - mapped_name: category name after applying category maps
+ - dataset_name: dataset in which the category is defined
+
+ For example, when training models in a class-agnostic manner, one could take LVIS 1.0
+ dataset and map the animal categories to the same category as human data from COCO:
+ id = 225
+ name = "cat"
+ mapped_id = 1
+ mapped_name = "person"
+ dataset_name = "lvis_v1_animals_dp_train"
+ """
+
+ id: int
+ name: str
+ mapped_id: int
+ mapped_name: str
+ dataset_name: str
+
+
+_MergedCategoriesT = Dict[int, List[_DatasetCategory]]
+
+
+def _add_category_id_to_contiguous_id_maps_to_metadata(
+ merged_categories: _MergedCategoriesT,
+) -> None:
+ merged_categories_per_dataset = {}
+ for contiguous_cat_id, cat_id in enumerate(sorted(merged_categories.keys())):
+ for cat in merged_categories[cat_id]:
+ if cat.dataset_name not in merged_categories_per_dataset:
+ merged_categories_per_dataset[cat.dataset_name] = defaultdict(list)
+ merged_categories_per_dataset[cat.dataset_name][cat_id].append(
+ (
+ contiguous_cat_id,
+ cat,
+ )
+ )
+
+ logger = logging.getLogger(__name__)
+ for dataset_name, merged_categories in merged_categories_per_dataset.items():
+ meta = MetadataCatalog.get(dataset_name)
+ if not hasattr(meta, "thing_classes"):
+ meta.thing_classes = []
+ meta.thing_dataset_id_to_contiguous_id = {}
+ meta.thing_dataset_id_to_merged_id = {}
+ else:
+ meta.thing_classes.clear()
+ meta.thing_dataset_id_to_contiguous_id.clear()
+ meta.thing_dataset_id_to_merged_id.clear()
+ logger.info(f"Dataset {dataset_name}: category ID to contiguous ID mapping:")
+ for _cat_id, categories in sorted(merged_categories.items()):
+ added_to_thing_classes = False
+ for contiguous_cat_id, cat in categories:
+ if not added_to_thing_classes:
+ meta.thing_classes.append(cat.mapped_name)
+ added_to_thing_classes = True
+ meta.thing_dataset_id_to_contiguous_id[cat.id] = contiguous_cat_id
+ meta.thing_dataset_id_to_merged_id[cat.id] = cat.mapped_id
+ logger.info(f"{cat.id} ({cat.name}) -> {contiguous_cat_id}")
+
+
+def _maybe_create_general_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ def has_annotations(instance: Instance) -> bool:
+ return "annotations" in instance
+
+ def has_only_crowd_anotations(instance: Instance) -> bool:
+ for ann in instance["annotations"]:
+ if ann.get("is_crowd", 0) == 0:
+ return False
+ return True
+
+ def general_keep_instance_predicate(instance: Instance) -> bool:
+ return has_annotations(instance) and not has_only_crowd_anotations(instance)
+
+ if not cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS:
+ return None
+ return general_keep_instance_predicate
+
+
+def _maybe_create_keypoints_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+
+ min_num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+
+ def has_sufficient_num_keypoints(instance: Instance) -> bool:
+ num_kpts = sum(
+ (np.array(ann["keypoints"][2::3]) > 0).sum()
+ for ann in instance["annotations"]
+ if "keypoints" in ann
+ )
+ return num_kpts >= min_num_keypoints
+
+ if cfg.MODEL.KEYPOINT_ON and (min_num_keypoints > 0):
+ return has_sufficient_num_keypoints
+ return None
+
+
+def _maybe_create_mask_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ if not cfg.MODEL.MASK_ON:
+ return None
+
+ def has_mask_annotations(instance: Instance) -> bool:
+ return any("segmentation" in ann for ann in instance["annotations"])
+
+ return has_mask_annotations
+
+
+def _maybe_create_densepose_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ if not cfg.MODEL.DENSEPOSE_ON:
+ return None
+
+ use_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+
+ def has_densepose_annotations(instance: Instance) -> bool:
+ for ann in instance["annotations"]:
+ if all(key in ann for key in DENSEPOSE_IUV_KEYS_WITHOUT_MASK) or all(
+ key in ann for key in DENSEPOSE_CSE_KEYS_WITHOUT_MASK
+ ):
+ return True
+ if use_masks and "segmentation" in ann:
+ return True
+ return False
+
+ return has_densepose_annotations
+
+
+def _maybe_create_specific_keep_instance_predicate(cfg: CfgNode) -> Optional[InstancePredicate]:
+ specific_predicate_creators = [
+ _maybe_create_keypoints_keep_instance_predicate,
+ _maybe_create_mask_keep_instance_predicate,
+ _maybe_create_densepose_keep_instance_predicate,
+ ]
+ predicates = [creator(cfg) for creator in specific_predicate_creators]
+ predicates = [p for p in predicates if p is not None]
+ if not predicates:
+ return None
+
+ def combined_predicate(instance: Instance) -> bool:
+ return any(p(instance) for p in predicates)
+
+ return combined_predicate
+
+
+def _get_train_keep_instance_predicate(cfg: CfgNode):
+ general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+ combined_specific_keep_predicate = _maybe_create_specific_keep_instance_predicate(cfg)
+
+ def combined_general_specific_keep_predicate(instance: Instance) -> bool:
+ return general_keep_predicate(instance) and combined_specific_keep_predicate(instance)
+
+ if (general_keep_predicate is None) and (combined_specific_keep_predicate is None):
+ return None
+ if general_keep_predicate is None:
+ return combined_specific_keep_predicate
+ if combined_specific_keep_predicate is None:
+ return general_keep_predicate
+ return combined_general_specific_keep_predicate
+
+
+def _get_test_keep_instance_predicate(cfg: CfgNode):
+ general_keep_predicate = _maybe_create_general_keep_instance_predicate(cfg)
+ return general_keep_predicate
+
+
+def _maybe_filter_and_map_categories(
+ dataset_name: str, dataset_dicts: List[Instance]
+) -> List[Instance]:
+ meta = MetadataCatalog.get(dataset_name)
+ category_id_map = meta.thing_dataset_id_to_contiguous_id
+ filtered_dataset_dicts = []
+ for dataset_dict in dataset_dicts:
+ anns = []
+ for ann in dataset_dict["annotations"]:
+ cat_id = ann["category_id"]
+ if cat_id not in category_id_map:
+ continue
+ ann["category_id"] = category_id_map[cat_id]
+ anns.append(ann)
+ dataset_dict["annotations"] = anns
+ filtered_dataset_dicts.append(dataset_dict)
+ return filtered_dataset_dicts
+
+
+def _add_category_whitelists_to_metadata(cfg: CfgNode) -> None:
+ for dataset_name, whitelisted_cat_ids in cfg.DATASETS.WHITELISTED_CATEGORIES.items():
+ meta = MetadataCatalog.get(dataset_name)
+ meta.whitelisted_categories = whitelisted_cat_ids
+ logger = logging.getLogger(__name__)
+ logger.info(
+ "Whitelisted categories for dataset {}: {}".format(
+ dataset_name, meta.whitelisted_categories
+ )
+ )
+
+
+def _add_category_maps_to_metadata(cfg: CfgNode) -> None:
+ for dataset_name, category_map in cfg.DATASETS.CATEGORY_MAPS.items():
+ category_map = {
+ int(cat_id_src): int(cat_id_dst) for cat_id_src, cat_id_dst in category_map.items()
+ }
+ meta = MetadataCatalog.get(dataset_name)
+ meta.category_map = category_map
+ logger = logging.getLogger(__name__)
+ logger.info("Category maps for dataset {}: {}".format(dataset_name, meta.category_map))
+
+
+def _add_category_info_to_bootstrapping_metadata(dataset_name: str, dataset_cfg: CfgNode) -> None:
+ meta = MetadataCatalog.get(dataset_name)
+ meta.category_to_class_mapping = get_category_to_class_mapping(dataset_cfg)
+ meta.categories = dataset_cfg.CATEGORIES
+ meta.max_count_per_category = dataset_cfg.MAX_COUNT_PER_CATEGORY
+ logger = logging.getLogger(__name__)
+ logger.info(
+ "Category to class mapping for dataset {}: {}".format(
+ dataset_name, meta.category_to_class_mapping
+ )
+ )
+
+
+def _maybe_add_class_to_mesh_name_map_to_metadata(dataset_names: List[str], cfg: CfgNode) -> None:
+ for dataset_name in dataset_names:
+ meta = MetadataCatalog.get(dataset_name)
+ if not hasattr(meta, "class_to_mesh_name"):
+ meta.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
+
+
+def _merge_categories(dataset_names: Collection[str]) -> _MergedCategoriesT:
+ merged_categories = defaultdict(list)
+ category_names = {}
+ for dataset_name in dataset_names:
+ meta = MetadataCatalog.get(dataset_name)
+ whitelisted_categories = meta.get("whitelisted_categories")
+ category_map = meta.get("category_map", {})
+ cat_ids = (
+ whitelisted_categories if whitelisted_categories is not None else meta.categories.keys()
+ )
+ for cat_id in cat_ids:
+ cat_name = meta.categories[cat_id]
+ cat_id_mapped = category_map.get(cat_id, cat_id)
+ if cat_id_mapped == cat_id or cat_id_mapped in cat_ids:
+ category_names[cat_id] = cat_name
+ else:
+ category_names[cat_id] = str(cat_id_mapped)
+ # assign temporary mapped category name, this name can be changed
+ # during the second pass, since mapped ID can correspond to a category
+ # from a different dataset
+ cat_name_mapped = meta.categories[cat_id_mapped]
+ merged_categories[cat_id_mapped].append(
+ _DatasetCategory(
+ id=cat_id,
+ name=cat_name,
+ mapped_id=cat_id_mapped,
+ mapped_name=cat_name_mapped,
+ dataset_name=dataset_name,
+ )
+ )
+ # second pass to assign proper mapped category names
+ for cat_id, categories in merged_categories.items():
+ for cat in categories:
+ if cat_id in category_names and cat.mapped_name != category_names[cat_id]:
+ cat.mapped_name = category_names[cat_id]
+
+ return merged_categories
+
+
+def _warn_if_merged_different_categories(merged_categories: _MergedCategoriesT) -> None:
+ logger = logging.getLogger(__name__)
+ for cat_id in merged_categories:
+ merged_categories_i = merged_categories[cat_id]
+ first_cat_name = merged_categories_i[0].name
+ if len(merged_categories_i) > 1 and not all(
+ cat.name == first_cat_name for cat in merged_categories_i[1:]
+ ):
+ cat_summary_str = ", ".join(
+ [f"{cat.id} ({cat.name}) from {cat.dataset_name}" for cat in merged_categories_i]
+ )
+ logger.warning(
+ f"Merged category {cat_id} corresponds to the following categories: "
+ f"{cat_summary_str}"
+ )
+
+
+def combine_detection_dataset_dicts(
+ dataset_names: Collection[str],
+ keep_instance_predicate: Optional[InstancePredicate] = None,
+ proposal_files: Optional[Collection[str]] = None,
+) -> List[Instance]:
+ """
+ Load and prepare dataset dicts for training / testing
+
+ Args:
+ dataset_names (Collection[str]): a list of dataset names
+ keep_instance_predicate (Callable: Dict[str, Any] -> bool): predicate
+ applied to instance dicts which defines whether to keep the instance
+ proposal_files (Collection[str]): if given, a list of object proposal files
+ that match each dataset in `dataset_names`.
+ """
+ assert len(dataset_names)
+ if proposal_files is None:
+ proposal_files = [None] * len(dataset_names)
+ assert len(dataset_names) == len(proposal_files)
+ # load datasets and metadata
+ dataset_name_to_dicts = {}
+ for dataset_name in dataset_names:
+ dataset_name_to_dicts[dataset_name] = DatasetCatalog.get(dataset_name)
+ assert len(dataset_name_to_dicts), f"Dataset '{dataset_name}' is empty!"
+ # merge categories, requires category metadata to be loaded
+ # cat_id -> [(orig_cat_id, cat_name, dataset_name)]
+ merged_categories = _merge_categories(dataset_names)
+ _warn_if_merged_different_categories(merged_categories)
+ merged_category_names = [
+ merged_categories[cat_id][0].mapped_name for cat_id in sorted(merged_categories)
+ ]
+ # map to contiguous category IDs
+ _add_category_id_to_contiguous_id_maps_to_metadata(merged_categories)
+ # load annotations and dataset metadata
+ for dataset_name, proposal_file in zip(dataset_names, proposal_files):
+ dataset_dicts = dataset_name_to_dicts[dataset_name]
+ assert len(dataset_dicts), f"Dataset '{dataset_name}' is empty!"
+ if proposal_file is not None:
+ dataset_dicts = load_proposals_into_dataset(dataset_dicts, proposal_file)
+ dataset_dicts = _maybe_filter_and_map_categories(dataset_name, dataset_dicts)
+ print_instances_class_histogram(dataset_dicts, merged_category_names)
+ dataset_name_to_dicts[dataset_name] = dataset_dicts
+
+ if keep_instance_predicate is not None:
+ all_datasets_dicts_plain = [
+ d
+ for d in itertools.chain.from_iterable(dataset_name_to_dicts.values())
+ if keep_instance_predicate(d)
+ ]
+ else:
+ all_datasets_dicts_plain = list(
+ itertools.chain.from_iterable(dataset_name_to_dicts.values())
+ )
+ return all_datasets_dicts_plain
+
+
+def build_detection_train_loader(cfg: CfgNode, mapper=None):
+ """
+ A data loader is created in a way similar to that of Detectron2.
+ The main differences are:
+ - it allows to combine datasets with different but compatible object category sets
+
+ The data loader is created by the following steps:
+ 1. Use the dataset names in config to query :class:`DatasetCatalog`, and obtain a list of dicts.
+ 2. Start workers to work on the dicts. Each worker will:
+ * Map each metadata dict into another format to be consumed by the model.
+ * Batch them by simply putting dicts into a list.
+ The batched ``list[mapped_dict]`` is what this dataloader will return.
+
+ Args:
+ cfg (CfgNode): the config
+ mapper (callable): a callable which takes a sample (dict) from dataset and
+ returns the format to be consumed by the model.
+ By default it will be `DatasetMapper(cfg, True)`.
+
+ Returns:
+ an infinite iterator of training data
+ """
+
+ _add_category_whitelists_to_metadata(cfg)
+ _add_category_maps_to_metadata(cfg)
+ _maybe_add_class_to_mesh_name_map_to_metadata(cfg.DATASETS.TRAIN, cfg)
+ dataset_dicts = combine_detection_dataset_dicts(
+ cfg.DATASETS.TRAIN,
+ keep_instance_predicate=_get_train_keep_instance_predicate(cfg),
+ proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+ )
+ if mapper is None:
+ mapper = DatasetMapper(cfg, True)
+ return d2_build_detection_train_loader(cfg, dataset=dataset_dicts, mapper=mapper)
+
+
+def build_detection_test_loader(cfg, dataset_name, mapper=None):
+ """
+ Similar to `build_detection_train_loader`.
+ But this function uses the given `dataset_name` argument (instead of the names in cfg),
+ and uses batch size 1.
+
+ Args:
+ cfg: a detectron2 CfgNode
+ dataset_name (str): a name of the dataset that's available in the DatasetCatalog
+ mapper (callable): a callable which takes a sample (dict) from dataset
+ and returns the format to be consumed by the model.
+ By default it will be `DatasetMapper(cfg, False)`.
+
+ Returns:
+ DataLoader: a torch DataLoader, that loads the given detection
+ dataset, with test-time transformation and batching.
+ """
+ _add_category_whitelists_to_metadata(cfg)
+ _add_category_maps_to_metadata(cfg)
+ _maybe_add_class_to_mesh_name_map_to_metadata([dataset_name], cfg)
+ dataset_dicts = combine_detection_dataset_dicts(
+ [dataset_name],
+ keep_instance_predicate=_get_test_keep_instance_predicate(cfg),
+ proposal_files=(
+ [cfg.DATASETS.PROPOSAL_FILES_TEST[list(cfg.DATASETS.TEST).index(dataset_name)]]
+ if cfg.MODEL.LOAD_PROPOSALS
+ else None
+ ),
+ )
+ sampler = None
+ if not cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE:
+ sampler = torch.utils.data.SequentialSampler(dataset_dicts)
+ if mapper is None:
+ mapper = DatasetMapper(cfg, False)
+ return d2_build_detection_test_loader(
+ dataset_dicts, mapper=mapper, num_workers=cfg.DATALOADER.NUM_WORKERS, sampler=sampler
+ )
+
+
+def build_frame_selector(cfg: CfgNode):
+ strategy = FrameSelectionStrategy(cfg.STRATEGY)
+ if strategy == FrameSelectionStrategy.RANDOM_K:
+ frame_selector = RandomKFramesSelector(cfg.NUM_IMAGES)
+ elif strategy == FrameSelectionStrategy.FIRST_K:
+ frame_selector = FirstKFramesSelector(cfg.NUM_IMAGES)
+ elif strategy == FrameSelectionStrategy.LAST_K:
+ frame_selector = LastKFramesSelector(cfg.NUM_IMAGES)
+ elif strategy == FrameSelectionStrategy.ALL:
+ frame_selector = None
+ # pyre-fixme[61]: `frame_selector` may not be initialized here.
+ return frame_selector
+
+
+def build_transform(cfg: CfgNode, data_type: str):
+ if cfg.TYPE == "resize":
+ if data_type == "image":
+ return ImageResizeTransform(cfg.MIN_SIZE, cfg.MAX_SIZE)
+ raise ValueError(f"Unknown transform {cfg.TYPE} for data type {data_type}")
+
+
+def build_combined_loader(cfg: CfgNode, loaders: Collection[Loader], ratios: Sequence[float]):
+ images_per_worker = _compute_num_images_per_worker(cfg)
+ return CombinedDataLoader(loaders, images_per_worker, ratios)
+
+
+def build_bootstrap_dataset(dataset_name: str, cfg: CfgNode) -> Sequence[torch.Tensor]:
+ """
+ Build dataset that provides data to bootstrap on
+
+ Args:
+ dataset_name (str): Name of the dataset, needs to have associated metadata
+ to load the data
+ cfg (CfgNode): bootstrapping config
+ Returns:
+ Sequence[Tensor] - dataset that provides image batches, Tensors of size
+ [N, C, H, W] of type float32
+ """
+ logger = logging.getLogger(__name__)
+ _add_category_info_to_bootstrapping_metadata(dataset_name, cfg)
+ meta = MetadataCatalog.get(dataset_name)
+ factory = BootstrapDatasetFactoryCatalog.get(meta.dataset_type)
+ dataset = None
+ if factory is not None:
+ dataset = factory(meta, cfg)
+ if dataset is None:
+ logger.warning(f"Failed to create dataset {dataset_name} of type {meta.dataset_type}")
+ return dataset
+
+
+def build_data_sampler(cfg: CfgNode, sampler_cfg: CfgNode, embedder: Optional[torch.nn.Module]):
+ if sampler_cfg.TYPE == "densepose_uniform":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseUniformSampler(count_per_class=sampler_cfg.COUNT_PER_CLASS),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif sampler_cfg.TYPE == "densepose_UV_confidence":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseConfidenceBasedSampler(
+ confidence_channel="sigma_2",
+ count_per_class=sampler_cfg.COUNT_PER_CLASS,
+ search_proportion=0.5,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif sampler_cfg.TYPE == "densepose_fine_segm_confidence":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseConfidenceBasedSampler(
+ confidence_channel="fine_segm_confidence",
+ count_per_class=sampler_cfg.COUNT_PER_CLASS,
+ search_proportion=0.5,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif sampler_cfg.TYPE == "densepose_coarse_segm_confidence":
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseConfidenceBasedSampler(
+ confidence_channel="coarse_segm_confidence",
+ count_per_class=sampler_cfg.COUNT_PER_CLASS,
+ search_proportion=0.5,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif sampler_cfg.TYPE == "densepose_cse_uniform":
+ assert embedder is not None
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseCSEUniformSampler(
+ cfg=cfg,
+ use_gt_categories=sampler_cfg.USE_GROUND_TRUTH_CATEGORIES,
+ embedder=embedder,
+ count_per_class=sampler_cfg.COUNT_PER_CLASS,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+ elif sampler_cfg.TYPE == "densepose_cse_coarse_segm_confidence":
+ assert embedder is not None
+ data_sampler = PredictionToGroundTruthSampler()
+ # transform densepose pred -> gt
+ data_sampler.register_sampler(
+ "pred_densepose",
+ "gt_densepose",
+ DensePoseCSEConfidenceBasedSampler(
+ cfg=cfg,
+ use_gt_categories=sampler_cfg.USE_GROUND_TRUTH_CATEGORIES,
+ embedder=embedder,
+ confidence_channel="coarse_segm_confidence",
+ count_per_class=sampler_cfg.COUNT_PER_CLASS,
+ search_proportion=0.5,
+ ),
+ )
+ data_sampler.register_sampler("pred_densepose", "gt_masks", MaskFromDensePoseSampler())
+ return data_sampler
+
+ raise ValueError(f"Unknown data sampler type {sampler_cfg.TYPE}")
+
+
+def build_data_filter(cfg: CfgNode):
+ if cfg.TYPE == "detection_score":
+ min_score = cfg.MIN_VALUE
+ return ScoreBasedFilter(min_score=min_score)
+ raise ValueError(f"Unknown data filter type {cfg.TYPE}")
+
+
+def build_inference_based_loader(
+ cfg: CfgNode,
+ dataset_cfg: CfgNode,
+ model: torch.nn.Module,
+ embedder: Optional[torch.nn.Module] = None,
+) -> InferenceBasedLoader:
+ """
+ Constructs data loader based on inference results of a model.
+ """
+ dataset = build_bootstrap_dataset(dataset_cfg.DATASET, dataset_cfg.IMAGE_LOADER)
+ meta = MetadataCatalog.get(dataset_cfg.DATASET)
+ training_sampler = TrainingSampler(len(dataset))
+ data_loader = torch.utils.data.DataLoader(
+ dataset, # pyre-ignore[6]
+ batch_size=dataset_cfg.IMAGE_LOADER.BATCH_SIZE,
+ sampler=training_sampler,
+ num_workers=dataset_cfg.IMAGE_LOADER.NUM_WORKERS,
+ collate_fn=trivial_batch_collator,
+ worker_init_fn=worker_init_reset_seed,
+ )
+ return InferenceBasedLoader(
+ model,
+ data_loader=data_loader,
+ data_sampler=build_data_sampler(cfg, dataset_cfg.DATA_SAMPLER, embedder),
+ data_filter=build_data_filter(dataset_cfg.FILTER),
+ shuffle=True,
+ batch_size=dataset_cfg.INFERENCE.OUTPUT_BATCH_SIZE,
+ inference_batch_size=dataset_cfg.INFERENCE.INPUT_BATCH_SIZE,
+ category_to_class_mapping=meta.category_to_class_mapping,
+ )
+
+
+def has_inference_based_loaders(cfg: CfgNode) -> bool:
+ """
+ Returns True, if at least one inferense-based loader must
+ be instantiated for training
+ """
+ return len(cfg.BOOTSTRAP_DATASETS) > 0
+
+
+def build_inference_based_loaders(
+ cfg: CfgNode, model: torch.nn.Module
+) -> Tuple[List[InferenceBasedLoader], List[float]]:
+ loaders = []
+ ratios = []
+ embedder = build_densepose_embedder(cfg).to(device=model.device) # pyre-ignore[16]
+ for dataset_spec in cfg.BOOTSTRAP_DATASETS:
+ dataset_cfg = get_bootstrap_dataset_config().clone()
+ dataset_cfg.merge_from_other_cfg(CfgNode(dataset_spec))
+ loader = build_inference_based_loader(cfg, dataset_cfg, model, embedder)
+ loaders.append(loader)
+ ratios.append(dataset_cfg.RATIO)
+ return loaders, ratios
+
+
+def build_video_list_dataset(meta: Metadata, cfg: CfgNode):
+ video_list_fpath = meta.video_list_fpath
+ video_base_path = meta.video_base_path
+ category = meta.category
+ if cfg.TYPE == "video_keyframe":
+ frame_selector = build_frame_selector(cfg.SELECT)
+ transform = build_transform(cfg.TRANSFORM, data_type="image")
+ video_list = video_list_from_file(video_list_fpath, video_base_path)
+ keyframe_helper_fpath = getattr(cfg, "KEYFRAME_HELPER", None)
+ return VideoKeyframeDataset(
+ video_list, category, frame_selector, transform, keyframe_helper_fpath
+ )
+
+
+class _BootstrapDatasetFactoryCatalog(UserDict):
+ """
+ A global dictionary that stores information about bootstrapped datasets creation functions
+ from metadata and config, for diverse DatasetType
+ """
+
+ def register(self, dataset_type: DatasetType, factory: Callable[[Metadata, CfgNode], Dataset]):
+ """
+ Args:
+ dataset_type (DatasetType): a DatasetType e.g. DatasetType.VIDEO_LIST
+ factory (Callable[Metadata, CfgNode]): a callable which takes Metadata and cfg
+ arguments and returns a dataset object.
+ """
+ assert dataset_type not in self, "Dataset '{}' is already registered!".format(dataset_type)
+ self[dataset_type] = factory
+
+
+BootstrapDatasetFactoryCatalog = _BootstrapDatasetFactoryCatalog()
+BootstrapDatasetFactoryCatalog.register(DatasetType.VIDEO_LIST, build_video_list_dataset)
diff --git a/densepose/data/combined_loader.py b/densepose/data/combined_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..c038c23a3b436b1cc6c29427c8dbf940f56250c9
--- /dev/null
+++ b/densepose/data/combined_loader.py
@@ -0,0 +1,46 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import random
+from collections import deque
+from typing import Any, Collection, Deque, Iterable, Iterator, List, Sequence
+
+Loader = Iterable[Any]
+
+
+def _pooled_next(iterator: Iterator[Any], pool: Deque[Any]):
+ if not pool:
+ pool.extend(next(iterator))
+ return pool.popleft()
+
+
+class CombinedDataLoader:
+ """
+ Combines data loaders using the provided sampling ratios
+ """
+
+ BATCH_COUNT = 100
+
+ def __init__(self, loaders: Collection[Loader], batch_size: int, ratios: Sequence[float]):
+ self.loaders = loaders
+ self.batch_size = batch_size
+ self.ratios = ratios
+
+ def __iter__(self) -> Iterator[List[Any]]:
+ iters = [iter(loader) for loader in self.loaders]
+ indices = []
+ pool = [deque()] * len(iters)
+ # infinite iterator, as in D2
+ while True:
+ if not indices:
+ # just a buffer of indices, its size doesn't matter
+ # as long as it's a multiple of batch_size
+ k = self.batch_size * self.BATCH_COUNT
+ indices = random.choices(range(len(self.loaders)), self.ratios, k=k)
+ try:
+ batch = [_pooled_next(iters[i], pool[i]) for i in indices[: self.batch_size]]
+ except StopIteration:
+ break
+ indices = indices[self.batch_size :]
+ yield batch
diff --git a/densepose/data/dataset_mapper.py b/densepose/data/dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..5537a94c0811f7f6849f534612222e8dc154b59d
--- /dev/null
+++ b/densepose/data/dataset_mapper.py
@@ -0,0 +1,170 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import copy
+import logging
+from typing import Any, Dict, List, Tuple
+import torch
+
+from detectron2.data import MetadataCatalog
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.layers import ROIAlign
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+from densepose.structures import DensePoseDataRelative, DensePoseList, DensePoseTransformData
+
+
+def build_augmentation(cfg, is_train):
+ logger = logging.getLogger(__name__)
+ result = utils.build_augmentation(cfg, is_train)
+ if is_train:
+ random_rotation = T.RandomRotation(
+ cfg.INPUT.ROTATION_ANGLES, expand=False, sample_style="choice"
+ )
+ result.append(random_rotation)
+ logger.info("DensePose-specific augmentation used in training: " + str(random_rotation))
+ return result
+
+
+class DatasetMapper:
+ """
+ A customized version of `detectron2.data.DatasetMapper`
+ """
+
+ def __init__(self, cfg, is_train=True):
+ self.augmentation = build_augmentation(cfg, is_train)
+
+ # fmt: off
+ self.img_format = cfg.INPUT.FORMAT
+ self.mask_on = (
+ cfg.MODEL.MASK_ON or (
+ cfg.MODEL.DENSEPOSE_ON
+ and cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS)
+ )
+ self.keypoint_on = cfg.MODEL.KEYPOINT_ON
+ self.densepose_on = cfg.MODEL.DENSEPOSE_ON
+ assert not cfg.MODEL.LOAD_PROPOSALS, "not supported yet"
+ # fmt: on
+ if self.keypoint_on and is_train:
+ # Flip only makes sense in training
+ self.keypoint_hflip_indices = utils.create_keypoint_hflip_indices(cfg.DATASETS.TRAIN)
+ else:
+ self.keypoint_hflip_indices = None
+
+ if self.densepose_on:
+ densepose_transform_srcs = [
+ MetadataCatalog.get(ds).densepose_transform_src
+ for ds in cfg.DATASETS.TRAIN + cfg.DATASETS.TEST
+ ]
+ assert len(densepose_transform_srcs) > 0
+ # TODO: check that DensePose transformation data is the same for
+ # all the datasets. Otherwise one would have to pass DB ID with
+ # each entry to select proper transformation data. For now, since
+ # all DensePose annotated data uses the same data semantics, we
+ # omit this check.
+ densepose_transform_data_fpath = PathManager.get_local_path(densepose_transform_srcs[0])
+ self.densepose_transform_data = DensePoseTransformData.load(
+ densepose_transform_data_fpath
+ )
+
+ self.is_train = is_train
+
+ def __call__(self, dataset_dict):
+ """
+ Args:
+ dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+ Returns:
+ dict: a format that builtin models in detectron2 accept
+ """
+ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below
+ image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+ utils.check_image_size(dataset_dict, image)
+
+ image, transforms = T.apply_transform_gens(self.augmentation, image)
+ image_shape = image.shape[:2] # h, w
+ dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))
+
+ if not self.is_train:
+ dataset_dict.pop("annotations", None)
+ return dataset_dict
+
+ for anno in dataset_dict["annotations"]:
+ if not self.mask_on:
+ anno.pop("segmentation", None)
+ if not self.keypoint_on:
+ anno.pop("keypoints", None)
+
+ # USER: Implement additional transformations if you have other types of data
+ # USER: Don't call transpose_densepose if you don't need
+ annos = [
+ self._transform_densepose(
+ utils.transform_instance_annotations(
+ obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
+ ),
+ transforms,
+ )
+ for obj in dataset_dict.pop("annotations")
+ if obj.get("iscrowd", 0) == 0
+ ]
+
+ if self.mask_on:
+ self._add_densepose_masks_as_segmentation(annos, image_shape)
+
+ instances = utils.annotations_to_instances(annos, image_shape, mask_format="bitmask")
+ densepose_annotations = [obj.get("densepose") for obj in annos]
+ if densepose_annotations and not all(v is None for v in densepose_annotations):
+ instances.gt_densepose = DensePoseList(
+ densepose_annotations, instances.gt_boxes, image_shape
+ )
+
+ dataset_dict["instances"] = instances[instances.gt_boxes.nonempty()]
+ return dataset_dict
+
+ def _transform_densepose(self, annotation, transforms):
+ if not self.densepose_on:
+ return annotation
+
+ # Handle densepose annotations
+ is_valid, reason_not_valid = DensePoseDataRelative.validate_annotation(annotation)
+ if is_valid:
+ densepose_data = DensePoseDataRelative(annotation, cleanup=True)
+ densepose_data.apply_transform(transforms, self.densepose_transform_data)
+ annotation["densepose"] = densepose_data
+ else:
+ # logger = logging.getLogger(__name__)
+ # logger.debug("Could not load DensePose annotation: {}".format(reason_not_valid))
+ DensePoseDataRelative.cleanup_annotation(annotation)
+ # NOTE: annotations for certain instances may be unavailable.
+ # 'None' is accepted by the DensePostList data structure.
+ annotation["densepose"] = None
+ return annotation
+
+ def _add_densepose_masks_as_segmentation(
+ self, annotations: List[Dict[str, Any]], image_shape_hw: Tuple[int, int]
+ ):
+ for obj in annotations:
+ if ("densepose" not in obj) or ("segmentation" in obj):
+ continue
+ # DP segmentation: torch.Tensor [S, S] of float32, S=256
+ segm_dp = torch.zeros_like(obj["densepose"].segm)
+ segm_dp[obj["densepose"].segm > 0] = 1
+ segm_h, segm_w = segm_dp.shape
+ bbox_segm_dp = torch.tensor((0, 0, segm_h - 1, segm_w - 1), dtype=torch.float32)
+ # image bbox
+ x0, y0, x1, y1 = (
+ v.item() for v in BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)
+ )
+ segm_aligned = (
+ ROIAlign((y1 - y0, x1 - x0), 1.0, 0, aligned=True)
+ .forward(segm_dp.view(1, 1, *segm_dp.shape), bbox_segm_dp)
+ .squeeze()
+ )
+ image_mask = torch.zeros(*image_shape_hw, dtype=torch.float32)
+ image_mask[y0:y1, x0:x1] = segm_aligned
+ # segmentation for BitMask: np.array [H, W] of bool
+ obj["segmentation"] = image_mask >= 0.5
diff --git a/densepose/data/datasets/__init__.py b/densepose/data/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccf0cf1c1dd2e21e096bd7c849150d9c261b9b4f
--- /dev/null
+++ b/densepose/data/datasets/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from . import builtin # ensure the builtin datasets are registered
+
+__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
diff --git a/densepose/data/datasets/__pycache__/__init__.cpython-39.pyc b/densepose/data/datasets/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..61935bbf49810ed1060457b39f303040bedf3995
Binary files /dev/null and b/densepose/data/datasets/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/data/datasets/__pycache__/builtin.cpython-39.pyc b/densepose/data/datasets/__pycache__/builtin.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..63f05943d99425298d42f2d18e65d6b707edce98
Binary files /dev/null and b/densepose/data/datasets/__pycache__/builtin.cpython-39.pyc differ
diff --git a/densepose/data/datasets/__pycache__/chimpnsee.cpython-39.pyc b/densepose/data/datasets/__pycache__/chimpnsee.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0fa4c5f0e50fd5dc7e21d849d5c115ea94f54394
Binary files /dev/null and b/densepose/data/datasets/__pycache__/chimpnsee.cpython-39.pyc differ
diff --git a/densepose/data/datasets/__pycache__/coco.cpython-39.pyc b/densepose/data/datasets/__pycache__/coco.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1ffa9197dfb155499693b54226ac1f856481c4e8
Binary files /dev/null and b/densepose/data/datasets/__pycache__/coco.cpython-39.pyc differ
diff --git a/densepose/data/datasets/__pycache__/dataset_type.cpython-39.pyc b/densepose/data/datasets/__pycache__/dataset_type.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..355cf766241d344aaa9d0b0def6bc2a01a2c583b
Binary files /dev/null and b/densepose/data/datasets/__pycache__/dataset_type.cpython-39.pyc differ
diff --git a/densepose/data/datasets/__pycache__/lvis.cpython-39.pyc b/densepose/data/datasets/__pycache__/lvis.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9599f29ee346020a1fab05b3b4d3a4943e447493
Binary files /dev/null and b/densepose/data/datasets/__pycache__/lvis.cpython-39.pyc differ
diff --git a/densepose/data/datasets/builtin.py b/densepose/data/datasets/builtin.py
new file mode 100644
index 0000000000000000000000000000000000000000..759c295e064b29c7968ec7db5e78d3d4de033578
--- /dev/null
+++ b/densepose/data/datasets/builtin.py
@@ -0,0 +1,18 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from .chimpnsee import register_dataset as register_chimpnsee_dataset
+from .coco import BASE_DATASETS as BASE_COCO_DATASETS
+from .coco import DATASETS as COCO_DATASETS
+from .coco import register_datasets as register_coco_datasets
+from .lvis import DATASETS as LVIS_DATASETS
+from .lvis import register_datasets as register_lvis_datasets
+
+DEFAULT_DATASETS_ROOT = "datasets"
+
+
+register_coco_datasets(COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+register_coco_datasets(BASE_COCO_DATASETS, DEFAULT_DATASETS_ROOT)
+register_lvis_datasets(LVIS_DATASETS, DEFAULT_DATASETS_ROOT)
+
+register_chimpnsee_dataset(DEFAULT_DATASETS_ROOT) # pyre-ignore[19]
diff --git a/densepose/data/datasets/chimpnsee.py b/densepose/data/datasets/chimpnsee.py
new file mode 100644
index 0000000000000000000000000000000000000000..7a0ee3768597f730f8230f52807a953148350f16
--- /dev/null
+++ b/densepose/data/datasets/chimpnsee.py
@@ -0,0 +1,31 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Optional
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+
+from ..utils import maybe_prepend_base_path
+from .dataset_type import DatasetType
+
+CHIMPNSEE_DATASET_NAME = "chimpnsee"
+
+
+def register_dataset(datasets_root: Optional[str] = None) -> None:
+ def empty_load_callback():
+ pass
+
+ video_list_fpath = maybe_prepend_base_path(
+ datasets_root,
+ "chimpnsee/cdna.eva.mpg.de/video_list.txt",
+ )
+ video_base_path = maybe_prepend_base_path(datasets_root, "chimpnsee/cdna.eva.mpg.de")
+
+ DatasetCatalog.register(CHIMPNSEE_DATASET_NAME, empty_load_callback)
+ MetadataCatalog.get(CHIMPNSEE_DATASET_NAME).set(
+ dataset_type=DatasetType.VIDEO_LIST,
+ video_list_fpath=video_list_fpath,
+ video_base_path=video_base_path,
+ category="chimpanzee",
+ )
diff --git a/densepose/data/datasets/coco.py b/densepose/data/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..47c9a5e1dc7422a970b7804277f9ba07841bc714
--- /dev/null
+++ b/densepose/data/datasets/coco.py
@@ -0,0 +1,434 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import contextlib
+import io
+import logging
+import os
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+from ..utils import maybe_prepend_base_path
+
+DENSEPOSE_MASK_KEY = "dp_masks"
+DENSEPOSE_IUV_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_I", "dp_U", "dp_V"]
+DENSEPOSE_CSE_KEYS_WITHOUT_MASK = ["dp_x", "dp_y", "dp_vertex", "ref_model"]
+DENSEPOSE_ALL_POSSIBLE_KEYS = set(
+ DENSEPOSE_IUV_KEYS_WITHOUT_MASK + DENSEPOSE_CSE_KEYS_WITHOUT_MASK + [DENSEPOSE_MASK_KEY]
+)
+DENSEPOSE_METADATA_URL_PREFIX = "https://dl.fbaipublicfiles.com/densepose/data/"
+
+
+@dataclass
+class CocoDatasetInfo:
+ name: str
+ images_root: str
+ annotations_fpath: str
+
+
+DATASETS = [
+ CocoDatasetInfo(
+ name="densepose_coco_2014_train",
+ images_root="coco/train2014",
+ annotations_fpath="coco/annotations/densepose_train2014.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_minival",
+ images_root="coco/val2014",
+ annotations_fpath="coco/annotations/densepose_minival2014.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_minival_100",
+ images_root="coco/val2014",
+ annotations_fpath="coco/annotations/densepose_minival2014_100.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_valminusminival",
+ images_root="coco/val2014",
+ annotations_fpath="coco/annotations/densepose_valminusminival2014.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_train_cse",
+ images_root="coco/train2014",
+ annotations_fpath="coco_cse/densepose_train2014_cse.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_minival_cse",
+ images_root="coco/val2014",
+ annotations_fpath="coco_cse/densepose_minival2014_cse.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_minival_100_cse",
+ images_root="coco/val2014",
+ annotations_fpath="coco_cse/densepose_minival2014_100_cse.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_coco_2014_valminusminival_cse",
+ images_root="coco/val2014",
+ annotations_fpath="coco_cse/densepose_valminusminival2014_cse.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_chimps",
+ images_root="densepose_chimps/images",
+ annotations_fpath="densepose_chimps/densepose_chimps_densepose.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_chimps_cse_train",
+ images_root="densepose_chimps/images",
+ annotations_fpath="densepose_chimps/densepose_chimps_cse_train.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_chimps_cse_val",
+ images_root="densepose_chimps/images",
+ annotations_fpath="densepose_chimps/densepose_chimps_cse_val.json",
+ ),
+ CocoDatasetInfo(
+ name="posetrack2017_train",
+ images_root="posetrack2017/posetrack_data_2017",
+ annotations_fpath="posetrack2017/densepose_posetrack_train2017.json",
+ ),
+ CocoDatasetInfo(
+ name="posetrack2017_val",
+ images_root="posetrack2017/posetrack_data_2017",
+ annotations_fpath="posetrack2017/densepose_posetrack_val2017.json",
+ ),
+ CocoDatasetInfo(
+ name="lvis_v05_train",
+ images_root="coco/train2017",
+ annotations_fpath="lvis/lvis_v0.5_plus_dp_train.json",
+ ),
+ CocoDatasetInfo(
+ name="lvis_v05_val",
+ images_root="coco/val2017",
+ annotations_fpath="lvis/lvis_v0.5_plus_dp_val.json",
+ ),
+]
+
+
+BASE_DATASETS = [
+ CocoDatasetInfo(
+ name="base_coco_2017_train",
+ images_root="coco/train2017",
+ annotations_fpath="coco/annotations/instances_train2017.json",
+ ),
+ CocoDatasetInfo(
+ name="base_coco_2017_val",
+ images_root="coco/val2017",
+ annotations_fpath="coco/annotations/instances_val2017.json",
+ ),
+ CocoDatasetInfo(
+ name="base_coco_2017_val_100",
+ images_root="coco/val2017",
+ annotations_fpath="coco/annotations/instances_val2017_100.json",
+ ),
+]
+
+
+def get_metadata(base_path: Optional[str]) -> Dict[str, Any]:
+ """
+ Returns metadata associated with COCO DensePose datasets
+
+ Args:
+ base_path: Optional[str]
+ Base path used to load metadata from
+
+ Returns:
+ Dict[str, Any]
+ Metadata in the form of a dictionary
+ """
+ meta = {
+ "densepose_transform_src": maybe_prepend_base_path(base_path, "UV_symmetry_transforms.mat"),
+ "densepose_smpl_subdiv": maybe_prepend_base_path(base_path, "SMPL_subdiv.mat"),
+ "densepose_smpl_subdiv_transform": maybe_prepend_base_path(
+ base_path,
+ "SMPL_SUBDIV_TRANSFORM.mat",
+ ),
+ }
+ return meta
+
+
+def _load_coco_annotations(json_file: str):
+ """
+ Load COCO annotations from a JSON file
+
+ Args:
+ json_file: str
+ Path to the file to load annotations from
+ Returns:
+ Instance of `pycocotools.coco.COCO` that provides access to annotations
+ data
+ """
+ from pycocotools.coco import COCO
+
+ logger = logging.getLogger(__name__)
+ timer = Timer()
+ with contextlib.redirect_stdout(io.StringIO()):
+ coco_api = COCO(json_file)
+ if timer.seconds() > 1:
+ logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+ return coco_api
+
+
+def _add_categories_metadata(dataset_name: str, categories: List[Dict[str, Any]]):
+ meta = MetadataCatalog.get(dataset_name)
+ meta.categories = {c["id"]: c["name"] for c in categories}
+ logger = logging.getLogger(__name__)
+ logger.info("Dataset {} categories: {}".format(dataset_name, meta.categories))
+
+
+def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]):
+ if "minival" in json_file:
+ # Skip validation on COCO2014 valminusminival and minival annotations
+ # The ratio of buggy annotations there is tiny and does not affect accuracy
+ # Therefore we explicitly white-list them
+ return
+ ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+ assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+ json_file
+ )
+
+
+def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ if "bbox" not in ann_dict:
+ return
+ obj["bbox"] = ann_dict["bbox"]
+ obj["bbox_mode"] = BoxMode.XYWH_ABS
+
+
+def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ if "segmentation" not in ann_dict:
+ return
+ segm = ann_dict["segmentation"]
+ if not isinstance(segm, dict):
+ # filter out invalid polygons (< 3 points)
+ segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+ if len(segm) == 0:
+ return
+ obj["segmentation"] = segm
+
+
+def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ if "keypoints" not in ann_dict:
+ return
+ keypts = ann_dict["keypoints"] # list[int]
+ for idx, v in enumerate(keypts):
+ if idx % 3 != 2:
+ # COCO's segmentation coordinates are floating points in [0, H or W],
+ # but keypoint coordinates are integers in [0, H-1 or W-1]
+ # Therefore we assume the coordinates are "pixel indices" and
+ # add 0.5 to convert to floating point coordinates.
+ keypts[idx] = v + 0.5
+ obj["keypoints"] = keypts
+
+
+def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]):
+ for key in DENSEPOSE_ALL_POSSIBLE_KEYS:
+ if key in ann_dict:
+ obj[key] = ann_dict[key]
+
+
+def _combine_images_with_annotations(
+ dataset_name: str,
+ image_root: str,
+ img_datas: Iterable[Dict[str, Any]],
+ ann_datas: Iterable[Iterable[Dict[str, Any]]],
+):
+
+ ann_keys = ["iscrowd", "category_id"]
+ dataset_dicts = []
+ contains_video_frame_info = False
+
+ for img_dict, ann_dicts in zip(img_datas, ann_datas):
+ record = {}
+ record["file_name"] = os.path.join(image_root, img_dict["file_name"])
+ record["height"] = img_dict["height"]
+ record["width"] = img_dict["width"]
+ record["image_id"] = img_dict["id"]
+ record["dataset"] = dataset_name
+ if "frame_id" in img_dict:
+ record["frame_id"] = img_dict["frame_id"]
+ record["video_id"] = img_dict.get("vid_id", None)
+ contains_video_frame_info = True
+ objs = []
+ for ann_dict in ann_dicts:
+ assert ann_dict["image_id"] == record["image_id"]
+ assert ann_dict.get("ignore", 0) == 0
+ obj = {key: ann_dict[key] for key in ann_keys if key in ann_dict}
+ _maybe_add_bbox(obj, ann_dict)
+ _maybe_add_segm(obj, ann_dict)
+ _maybe_add_keypoints(obj, ann_dict)
+ _maybe_add_densepose(obj, ann_dict)
+ objs.append(obj)
+ record["annotations"] = objs
+ dataset_dicts.append(record)
+ if contains_video_frame_info:
+ create_video_frame_mapping(dataset_name, dataset_dicts)
+ return dataset_dicts
+
+
+def get_contiguous_id_to_category_id_map(metadata):
+ cat_id_2_cont_id = metadata.thing_dataset_id_to_contiguous_id
+ cont_id_2_cat_id = {}
+ for cat_id, cont_id in cat_id_2_cont_id.items():
+ if cont_id in cont_id_2_cat_id:
+ continue
+ cont_id_2_cat_id[cont_id] = cat_id
+ return cont_id_2_cat_id
+
+
+def maybe_filter_categories_cocoapi(dataset_name, coco_api):
+ meta = MetadataCatalog.get(dataset_name)
+ cont_id_2_cat_id = get_contiguous_id_to_category_id_map(meta)
+ cat_id_2_cont_id = meta.thing_dataset_id_to_contiguous_id
+ # filter categories
+ cats = []
+ for cat in coco_api.dataset["categories"]:
+ cat_id = cat["id"]
+ if cat_id not in cat_id_2_cont_id:
+ continue
+ cont_id = cat_id_2_cont_id[cat_id]
+ if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id] == cat_id):
+ cats.append(cat)
+ coco_api.dataset["categories"] = cats
+ # filter annotations, if multiple categories are mapped to a single
+ # contiguous ID, use only one category ID and map all annotations to that category ID
+ anns = []
+ for ann in coco_api.dataset["annotations"]:
+ cat_id = ann["category_id"]
+ if cat_id not in cat_id_2_cont_id:
+ continue
+ cont_id = cat_id_2_cont_id[cat_id]
+ ann["category_id"] = cont_id_2_cat_id[cont_id]
+ anns.append(ann)
+ coco_api.dataset["annotations"] = anns
+ # recreate index
+ coco_api.createIndex()
+
+
+def maybe_filter_and_map_categories_cocoapi(dataset_name, coco_api):
+ meta = MetadataCatalog.get(dataset_name)
+ category_id_map = meta.thing_dataset_id_to_contiguous_id
+ # map categories
+ cats = []
+ for cat in coco_api.dataset["categories"]:
+ cat_id = cat["id"]
+ if cat_id not in category_id_map:
+ continue
+ cat["id"] = category_id_map[cat_id]
+ cats.append(cat)
+ coco_api.dataset["categories"] = cats
+ # map annotation categories
+ anns = []
+ for ann in coco_api.dataset["annotations"]:
+ cat_id = ann["category_id"]
+ if cat_id not in category_id_map:
+ continue
+ ann["category_id"] = category_id_map[cat_id]
+ anns.append(ann)
+ coco_api.dataset["annotations"] = anns
+ # recreate index
+ coco_api.createIndex()
+
+
+def create_video_frame_mapping(dataset_name, dataset_dicts):
+ mapping = defaultdict(dict)
+ for d in dataset_dicts:
+ video_id = d.get("video_id")
+ if video_id is None:
+ continue
+ mapping[video_id].update({d["frame_id"]: d["file_name"]})
+ MetadataCatalog.get(dataset_name).set(video_frame_mapping=mapping)
+
+
+def load_coco_json(annotations_json_file: str, image_root: str, dataset_name: str):
+ """
+ Loads a JSON file with annotations in COCO instances format.
+ Replaces `detectron2.data.datasets.coco.load_coco_json` to handle metadata
+ in a more flexible way. Postpones category mapping to a later stage to be
+ able to combine several datasets with different (but coherent) sets of
+ categories.
+
+ Args:
+
+ annotations_json_file: str
+ Path to the JSON file with annotations in COCO instances format.
+ image_root: str
+ directory that contains all the images
+ dataset_name: str
+ the name that identifies a dataset, e.g. "densepose_coco_2014_train"
+ extra_annotation_keys: Optional[List[str]]
+ If provided, these keys are used to extract additional data from
+ the annotations.
+ """
+ coco_api = _load_coco_annotations(PathManager.get_local_path(annotations_json_file))
+ _add_categories_metadata(dataset_name, coco_api.loadCats(coco_api.getCatIds()))
+ # sort indices for reproducible results
+ img_ids = sorted(coco_api.imgs.keys())
+ # imgs is a list of dicts, each looks something like:
+ # {'license': 4,
+ # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+ # 'file_name': 'COCO_val2014_000000001268.jpg',
+ # 'height': 427,
+ # 'width': 640,
+ # 'date_captured': '2013-11-17 05:57:24',
+ # 'id': 1268}
+ imgs = coco_api.loadImgs(img_ids)
+ logger = logging.getLogger(__name__)
+ logger.info("Loaded {} images in COCO format from {}".format(len(imgs), annotations_json_file))
+ # anns is a list[list[dict]], where each dict is an annotation
+ # record for an object. The inner list enumerates the objects in an image
+ # and the outer list enumerates over images.
+ anns = [coco_api.imgToAnns[img_id] for img_id in img_ids]
+ _verify_annotations_have_unique_ids(annotations_json_file, anns)
+ dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
+ return dataset_records
+
+
+def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[str] = None):
+ """
+ Registers provided COCO DensePose dataset
+
+ Args:
+ dataset_data: CocoDatasetInfo
+ Dataset data
+ datasets_root: Optional[str]
+ Datasets root folder (default: None)
+ """
+ annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
+ images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root)
+
+ def load_annotations():
+ return load_coco_json(
+ annotations_json_file=annotations_fpath,
+ image_root=images_root,
+ dataset_name=dataset_data.name,
+ )
+
+ DatasetCatalog.register(dataset_data.name, load_annotations)
+ MetadataCatalog.get(dataset_data.name).set(
+ json_file=annotations_fpath,
+ image_root=images_root,
+ **get_metadata(DENSEPOSE_METADATA_URL_PREFIX)
+ )
+
+
+def register_datasets(
+ datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[str] = None
+):
+ """
+ Registers provided COCO DensePose datasets
+
+ Args:
+ datasets_data: Iterable[CocoDatasetInfo]
+ An iterable of dataset datas
+ datasets_root: Optional[str]
+ Datasets root folder (default: None)
+ """
+ for dataset_data in datasets_data:
+ register_dataset(dataset_data, datasets_root)
diff --git a/densepose/data/datasets/dataset_type.py b/densepose/data/datasets/dataset_type.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e546f2aa74b4586d97618d41c69432ed01e21e9
--- /dev/null
+++ b/densepose/data/datasets/dataset_type.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from enum import Enum
+
+
+class DatasetType(Enum):
+ """
+ Dataset type, mostly used for datasets that contain data to bootstrap models on
+ """
+
+ VIDEO_LIST = "video_list"
diff --git a/densepose/data/datasets/lvis.py b/densepose/data/datasets/lvis.py
new file mode 100644
index 0000000000000000000000000000000000000000..e90caac4bb429f9500a98998df18d238254a709e
--- /dev/null
+++ b/densepose/data/datasets/lvis.py
@@ -0,0 +1,259 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import logging
+import os
+from typing import Any, Dict, Iterable, List, Optional
+from fvcore.common.timer import Timer
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets.lvis import get_lvis_instances_meta
+from detectron2.structures import BoxMode
+from detectron2.utils.file_io import PathManager
+
+from ..utils import maybe_prepend_base_path
+from .coco import (
+ DENSEPOSE_ALL_POSSIBLE_KEYS,
+ DENSEPOSE_METADATA_URL_PREFIX,
+ CocoDatasetInfo,
+ get_metadata,
+)
+
+DATASETS = [
+ CocoDatasetInfo(
+ name="densepose_lvis_v1_ds1_train_v1",
+ images_root="coco_",
+ annotations_fpath="lvis/densepose_lvis_v1_ds1_train_v1.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_lvis_v1_ds1_val_v1",
+ images_root="coco_",
+ annotations_fpath="lvis/densepose_lvis_v1_ds1_val_v1.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_lvis_v1_ds2_train_v1",
+ images_root="coco_",
+ annotations_fpath="lvis/densepose_lvis_v1_ds2_train_v1.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_lvis_v1_ds2_val_v1",
+ images_root="coco_",
+ annotations_fpath="lvis/densepose_lvis_v1_ds2_val_v1.json",
+ ),
+ CocoDatasetInfo(
+ name="densepose_lvis_v1_ds1_val_animals_100",
+ images_root="coco_",
+ annotations_fpath="lvis/densepose_lvis_v1_val_animals_100_v2.json",
+ ),
+]
+
+
+def _load_lvis_annotations(json_file: str):
+ """
+ Load COCO annotations from a JSON file
+
+ Args:
+ json_file: str
+ Path to the file to load annotations from
+ Returns:
+ Instance of `pycocotools.coco.COCO` that provides access to annotations
+ data
+ """
+ from lvis import LVIS
+
+ json_file = PathManager.get_local_path(json_file)
+ logger = logging.getLogger(__name__)
+ timer = Timer()
+ lvis_api = LVIS(json_file)
+ if timer.seconds() > 1:
+ logger.info("Loading {} takes {:.2f} seconds.".format(json_file, timer.seconds()))
+ return lvis_api
+
+
+def _add_categories_metadata(dataset_name: str) -> None:
+ metadict = get_lvis_instances_meta(dataset_name)
+ categories = metadict["thing_classes"]
+ metadata = MetadataCatalog.get(dataset_name)
+ metadata.categories = {i + 1: categories[i] for i in range(len(categories))}
+ logger = logging.getLogger(__name__)
+ logger.info(f"Dataset {dataset_name} has {len(categories)} categories")
+
+
+def _verify_annotations_have_unique_ids(json_file: str, anns: List[List[Dict[str, Any]]]) -> None:
+ ann_ids = [ann["id"] for anns_per_image in anns for ann in anns_per_image]
+ assert len(set(ann_ids)) == len(ann_ids), "Annotation ids in '{}' are not unique!".format(
+ json_file
+ )
+
+
+def _maybe_add_bbox(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None:
+ if "bbox" not in ann_dict:
+ return
+ obj["bbox"] = ann_dict["bbox"]
+ obj["bbox_mode"] = BoxMode.XYWH_ABS
+
+
+def _maybe_add_segm(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None:
+ if "segmentation" not in ann_dict:
+ return
+ segm = ann_dict["segmentation"]
+ if not isinstance(segm, dict):
+ # filter out invalid polygons (< 3 points)
+ segm = [poly for poly in segm if len(poly) % 2 == 0 and len(poly) >= 6]
+ if len(segm) == 0:
+ return
+ obj["segmentation"] = segm
+
+
+def _maybe_add_keypoints(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None:
+ if "keypoints" not in ann_dict:
+ return
+ keypts = ann_dict["keypoints"] # list[int]
+ for idx, v in enumerate(keypts):
+ if idx % 3 != 2:
+ # COCO's segmentation coordinates are floating points in [0, H or W],
+ # but keypoint coordinates are integers in [0, H-1 or W-1]
+ # Therefore we assume the coordinates are "pixel indices" and
+ # add 0.5 to convert to floating point coordinates.
+ keypts[idx] = v + 0.5
+ obj["keypoints"] = keypts
+
+
+def _maybe_add_densepose(obj: Dict[str, Any], ann_dict: Dict[str, Any]) -> None:
+ for key in DENSEPOSE_ALL_POSSIBLE_KEYS:
+ if key in ann_dict:
+ obj[key] = ann_dict[key]
+
+
+def _combine_images_with_annotations(
+ dataset_name: str,
+ image_root: str,
+ img_datas: Iterable[Dict[str, Any]],
+ ann_datas: Iterable[Iterable[Dict[str, Any]]],
+):
+
+ dataset_dicts = []
+
+ def get_file_name(img_root, img_dict):
+ # Determine the path including the split folder ("train2017", "val2017", "test2017") from
+ # the coco_url field. Example:
+ # 'coco_url': 'http://images.cocodataset.org/train2017/000000155379.jpg'
+ split_folder, file_name = img_dict["coco_url"].split("/")[-2:]
+ return os.path.join(img_root + split_folder, file_name)
+
+ for img_dict, ann_dicts in zip(img_datas, ann_datas):
+ record = {}
+ record["file_name"] = get_file_name(image_root, img_dict)
+ record["height"] = img_dict["height"]
+ record["width"] = img_dict["width"]
+ record["not_exhaustive_category_ids"] = img_dict.get("not_exhaustive_category_ids", [])
+ record["neg_category_ids"] = img_dict.get("neg_category_ids", [])
+ record["image_id"] = img_dict["id"]
+ record["dataset"] = dataset_name
+
+ objs = []
+ for ann_dict in ann_dicts:
+ assert ann_dict["image_id"] == record["image_id"]
+ obj = {}
+ _maybe_add_bbox(obj, ann_dict)
+ obj["iscrowd"] = ann_dict.get("iscrowd", 0)
+ obj["category_id"] = ann_dict["category_id"]
+ _maybe_add_segm(obj, ann_dict)
+ _maybe_add_keypoints(obj, ann_dict)
+ _maybe_add_densepose(obj, ann_dict)
+ objs.append(obj)
+ record["annotations"] = objs
+ dataset_dicts.append(record)
+ return dataset_dicts
+
+
+def load_lvis_json(annotations_json_file: str, image_root: str, dataset_name: str):
+ """
+ Loads a JSON file with annotations in LVIS instances format.
+ Replaces `detectron2.data.datasets.coco.load_lvis_json` to handle metadata
+ in a more flexible way. Postpones category mapping to a later stage to be
+ able to combine several datasets with different (but coherent) sets of
+ categories.
+
+ Args:
+
+ annotations_json_file: str
+ Path to the JSON file with annotations in COCO instances format.
+ image_root: str
+ directory that contains all the images
+ dataset_name: str
+ the name that identifies a dataset, e.g. "densepose_coco_2014_train"
+ extra_annotation_keys: Optional[List[str]]
+ If provided, these keys are used to extract additional data from
+ the annotations.
+ """
+ lvis_api = _load_lvis_annotations(PathManager.get_local_path(annotations_json_file))
+
+ _add_categories_metadata(dataset_name)
+
+ # sort indices for reproducible results
+ img_ids = sorted(lvis_api.imgs.keys())
+ # imgs is a list of dicts, each looks something like:
+ # {'license': 4,
+ # 'url': 'http://farm6.staticflickr.com/5454/9413846304_881d5e5c3b_z.jpg',
+ # 'file_name': 'COCO_val2014_000000001268.jpg',
+ # 'height': 427,
+ # 'width': 640,
+ # 'date_captured': '2013-11-17 05:57:24',
+ # 'id': 1268}
+ imgs = lvis_api.load_imgs(img_ids)
+ logger = logging.getLogger(__name__)
+ logger.info("Loaded {} images in LVIS format from {}".format(len(imgs), annotations_json_file))
+ # anns is a list[list[dict]], where each dict is an annotation
+ # record for an object. The inner list enumerates the objects in an image
+ # and the outer list enumerates over images.
+ anns = [lvis_api.img_ann_map[img_id] for img_id in img_ids]
+
+ _verify_annotations_have_unique_ids(annotations_json_file, anns)
+ dataset_records = _combine_images_with_annotations(dataset_name, image_root, imgs, anns)
+ return dataset_records
+
+
+def register_dataset(dataset_data: CocoDatasetInfo, datasets_root: Optional[str] = None) -> None:
+ """
+ Registers provided LVIS DensePose dataset
+
+ Args:
+ dataset_data: CocoDatasetInfo
+ Dataset data
+ datasets_root: Optional[str]
+ Datasets root folder (default: None)
+ """
+ annotations_fpath = maybe_prepend_base_path(datasets_root, dataset_data.annotations_fpath)
+ images_root = maybe_prepend_base_path(datasets_root, dataset_data.images_root)
+
+ def load_annotations():
+ return load_lvis_json(
+ annotations_json_file=annotations_fpath,
+ image_root=images_root,
+ dataset_name=dataset_data.name,
+ )
+
+ DatasetCatalog.register(dataset_data.name, load_annotations)
+ MetadataCatalog.get(dataset_data.name).set(
+ json_file=annotations_fpath,
+ image_root=images_root,
+ evaluator_type="lvis",
+ **get_metadata(DENSEPOSE_METADATA_URL_PREFIX),
+ )
+
+
+def register_datasets(
+ datasets_data: Iterable[CocoDatasetInfo], datasets_root: Optional[str] = None
+) -> None:
+ """
+ Registers provided LVIS DensePose datasets
+
+ Args:
+ datasets_data: Iterable[CocoDatasetInfo]
+ An iterable of dataset datas
+ datasets_root: Optional[str]
+ Datasets root folder (default: None)
+ """
+ for dataset_data in datasets_data:
+ register_dataset(dataset_data, datasets_root)
diff --git a/densepose/data/image_list_dataset.py b/densepose/data/image_list_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..503bf647d7810f4b45cb3a442370ddbbf8e7f2a3
--- /dev/null
+++ b/densepose/data/image_list_dataset.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import logging
+import numpy as np
+from typing import Any, Callable, Dict, List, Optional, Union
+import torch
+from torch.utils.data.dataset import Dataset
+
+from detectron2.data.detection_utils import read_image
+
+ImageTransform = Callable[[torch.Tensor], torch.Tensor]
+
+
+class ImageListDataset(Dataset):
+ """
+ Dataset that provides images from a list.
+ """
+
+ _EMPTY_IMAGE = torch.empty((0, 3, 1, 1))
+
+ def __init__(
+ self,
+ image_list: List[str],
+ category_list: Union[str, List[str], None] = None,
+ transform: Optional[ImageTransform] = None,
+ ):
+ """
+ Args:
+ image_list (List[str]): list of paths to image files
+ category_list (Union[str, List[str], None]): list of animal categories for
+ each image. If it is a string, or None, this applies to all images
+ """
+ if type(category_list) is list:
+ self.category_list = category_list
+ else:
+ self.category_list = [category_list] * len(image_list)
+ assert len(image_list) == len(
+ self.category_list
+ ), "length of image and category lists must be equal"
+ self.image_list = image_list
+ self.transform = transform
+
+ def __getitem__(self, idx: int) -> Dict[str, Any]:
+ """
+ Gets selected images from the list
+
+ Args:
+ idx (int): video index in the video list file
+ Returns:
+ A dictionary containing two keys:
+ images (torch.Tensor): tensor of size [N, 3, H, W] (N = 1, or 0 for _EMPTY_IMAGE)
+ categories (List[str]): categories of the frames
+ """
+ categories = [self.category_list[idx]]
+ fpath = self.image_list[idx]
+ transform = self.transform
+
+ try:
+ image = torch.from_numpy(np.ascontiguousarray(read_image(fpath, format="BGR")))
+ image = image.permute(2, 0, 1).unsqueeze(0).float() # HWC -> NCHW
+ if transform is not None:
+ image = transform(image)
+ return {"images": image, "categories": categories}
+ except (OSError, RuntimeError) as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(f"Error opening image file container {fpath}: {e}")
+
+ return {"images": self._EMPTY_IMAGE, "categories": []}
+
+ def __len__(self):
+ return len(self.image_list)
diff --git a/densepose/data/inference_based_loader.py b/densepose/data/inference_based_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..b643935cb7cbcaa06f66ca1c459ef25c5753cffd
--- /dev/null
+++ b/densepose/data/inference_based_loader.py
@@ -0,0 +1,174 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import random
+from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple
+import torch
+from torch import nn
+
+SampledData = Any
+ModelOutput = Any
+
+
+def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]:
+ """
+ Group elements of an iterable by chunks of size `n`, e.g.
+ grouper(range(9), 4) ->
+ (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None)
+ """
+ it = iter(iterable)
+ while True:
+ values = []
+ for _ in range(n):
+ try:
+ value = next(it)
+ except StopIteration:
+ if values:
+ values.extend([fillvalue] * (n - len(values)))
+ yield tuple(values)
+ return
+ values.append(value)
+ yield tuple(values)
+
+
+class ScoreBasedFilter:
+ """
+ Filters entries in model output based on their scores
+ Discards all entries with score less than the specified minimum
+ """
+
+ def __init__(self, min_score: float = 0.8):
+ self.min_score = min_score
+
+ def __call__(self, model_output: ModelOutput) -> ModelOutput:
+ for model_output_i in model_output:
+ instances = model_output_i["instances"]
+ if not instances.has("scores"):
+ continue
+ instances_filtered = instances[instances.scores >= self.min_score]
+ model_output_i["instances"] = instances_filtered
+ return model_output
+
+
+class InferenceBasedLoader:
+ """
+ Data loader based on results inferred by a model. Consists of:
+ - a data loader that provides batches of images
+ - a model that is used to infer the results
+ - a data sampler that converts inferred results to annotations
+ """
+
+ def __init__(
+ self,
+ model: nn.Module,
+ data_loader: Iterable[List[Dict[str, Any]]],
+ data_sampler: Optional[Callable[[ModelOutput], List[SampledData]]] = None,
+ data_filter: Optional[Callable[[ModelOutput], ModelOutput]] = None,
+ shuffle: bool = True,
+ batch_size: int = 4,
+ inference_batch_size: int = 4,
+ drop_last: bool = False,
+ category_to_class_mapping: Optional[dict] = None,
+ ):
+ """
+ Constructor
+
+ Args:
+ model (torch.nn.Module): model used to produce data
+ data_loader (Iterable[List[Dict[str, Any]]]): iterable that provides
+ dictionaries with "images" and "categories" fields to perform inference on
+ data_sampler (Callable: ModelOutput -> SampledData): functor
+ that produces annotation data from inference results;
+ (optional, default: None)
+ data_filter (Callable: ModelOutput -> ModelOutput): filter
+ that selects model outputs for further processing
+ (optional, default: None)
+ shuffle (bool): if True, the input images get shuffled
+ batch_size (int): batch size for the produced annotation data
+ inference_batch_size (int): batch size for input images
+ drop_last (bool): if True, drop the last batch if it is undersized
+ category_to_class_mapping (dict): category to class mapping
+ """
+ self.model = model
+ self.model.eval()
+ self.data_loader = data_loader
+ self.data_sampler = data_sampler
+ self.data_filter = data_filter
+ self.shuffle = shuffle
+ self.batch_size = batch_size
+ self.inference_batch_size = inference_batch_size
+ self.drop_last = drop_last
+ if category_to_class_mapping is not None:
+ self.category_to_class_mapping = category_to_class_mapping
+ else:
+ self.category_to_class_mapping = {}
+
+ def __iter__(self) -> Iterator[List[SampledData]]:
+ for batch in self.data_loader:
+ # batch : List[Dict[str: Tensor[N, C, H, W], str: Optional[str]]]
+ # images_batch : Tensor[N, C, H, W]
+ # image : Tensor[C, H, W]
+ images_and_categories = [
+ {"image": image, "category": category}
+ for element in batch
+ for image, category in zip(element["images"], element["categories"])
+ ]
+ if not images_and_categories:
+ continue
+ if self.shuffle:
+ random.shuffle(images_and_categories)
+ yield from self._produce_data(images_and_categories) # pyre-ignore[6]
+
+ def _produce_data(
+ self, images_and_categories: List[Tuple[torch.Tensor, Optional[str]]]
+ ) -> Iterator[List[SampledData]]:
+ """
+ Produce batches of data from images
+
+ Args:
+ images_and_categories (List[Tuple[torch.Tensor, Optional[str]]]):
+ list of images and corresponding categories to process
+
+ Returns:
+ Iterator over batches of data sampled from model outputs
+ """
+ data_batches: List[SampledData] = []
+ category_to_class_mapping = self.category_to_class_mapping
+ batched_images_and_categories = _grouper(images_and_categories, self.inference_batch_size)
+ for batch in batched_images_and_categories:
+ batch = [
+ {
+ "image": image_and_category["image"].to(self.model.device),
+ "category": image_and_category["category"],
+ }
+ for image_and_category in batch
+ if image_and_category is not None
+ ]
+ if not batch:
+ continue
+ with torch.no_grad():
+ model_output = self.model(batch)
+ for model_output_i, batch_i in zip(model_output, batch):
+ assert len(batch_i["image"].shape) == 3
+ model_output_i["image"] = batch_i["image"]
+ instance_class = category_to_class_mapping.get(batch_i["category"], 0)
+ model_output_i["instances"].dataset_classes = torch.tensor(
+ [instance_class] * len(model_output_i["instances"])
+ )
+ model_output_filtered = (
+ model_output if self.data_filter is None else self.data_filter(model_output)
+ )
+ data = (
+ model_output_filtered
+ if self.data_sampler is None
+ else self.data_sampler(model_output_filtered)
+ )
+ for data_i in data:
+ if len(data_i["instances"]):
+ data_batches.append(data_i)
+ if len(data_batches) >= self.batch_size:
+ yield data_batches[: self.batch_size]
+ data_batches = data_batches[self.batch_size :]
+ if not self.drop_last and data_batches:
+ yield data_batches
diff --git a/densepose/data/meshes/__init__.py b/densepose/data/meshes/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7552c251b2225af62212aae69d4ce273608f7a67
--- /dev/null
+++ b/densepose/data/meshes/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from . import builtin
+
+__all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")]
diff --git a/densepose/data/meshes/__pycache__/__init__.cpython-39.pyc b/densepose/data/meshes/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b73877f82c9da09bbc6452e13ef1ad70f0334acc
Binary files /dev/null and b/densepose/data/meshes/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/data/meshes/__pycache__/builtin.cpython-39.pyc b/densepose/data/meshes/__pycache__/builtin.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d5ea957980c9c3cb9e510ecd14334dfedd26bbc
Binary files /dev/null and b/densepose/data/meshes/__pycache__/builtin.cpython-39.pyc differ
diff --git a/densepose/data/meshes/__pycache__/catalog.cpython-39.pyc b/densepose/data/meshes/__pycache__/catalog.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e7d310612b2685e0400e70419d39f3016093880
Binary files /dev/null and b/densepose/data/meshes/__pycache__/catalog.cpython-39.pyc differ
diff --git a/densepose/data/meshes/builtin.py b/densepose/data/meshes/builtin.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc8ec8418852dc344d7c4bd9f6c5fdd049b30a6d
--- /dev/null
+++ b/densepose/data/meshes/builtin.py
@@ -0,0 +1,103 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from .catalog import MeshInfo, register_meshes
+
+DENSEPOSE_MESHES_DIR = "https://dl.fbaipublicfiles.com/densepose/meshes/"
+
+MESHES = [
+ MeshInfo(
+ name="smpl_27554",
+ data="smpl_27554.pkl",
+ geodists="geodists/geodists_smpl_27554.pkl",
+ symmetry="symmetry/symmetry_smpl_27554.pkl",
+ texcoords="texcoords/texcoords_smpl_27554.pkl",
+ ),
+ MeshInfo(
+ name="chimp_5029",
+ data="chimp_5029.pkl",
+ geodists="geodists/geodists_chimp_5029.pkl",
+ symmetry="symmetry/symmetry_chimp_5029.pkl",
+ texcoords="texcoords/texcoords_chimp_5029.pkl",
+ ),
+ MeshInfo(
+ name="cat_5001",
+ data="cat_5001.pkl",
+ geodists="geodists/geodists_cat_5001.pkl",
+ symmetry="symmetry/symmetry_cat_5001.pkl",
+ texcoords="texcoords/texcoords_cat_5001.pkl",
+ ),
+ MeshInfo(
+ name="cat_7466",
+ data="cat_7466.pkl",
+ geodists="geodists/geodists_cat_7466.pkl",
+ symmetry="symmetry/symmetry_cat_7466.pkl",
+ texcoords="texcoords/texcoords_cat_7466.pkl",
+ ),
+ MeshInfo(
+ name="sheep_5004",
+ data="sheep_5004.pkl",
+ geodists="geodists/geodists_sheep_5004.pkl",
+ symmetry="symmetry/symmetry_sheep_5004.pkl",
+ texcoords="texcoords/texcoords_sheep_5004.pkl",
+ ),
+ MeshInfo(
+ name="zebra_5002",
+ data="zebra_5002.pkl",
+ geodists="geodists/geodists_zebra_5002.pkl",
+ symmetry="symmetry/symmetry_zebra_5002.pkl",
+ texcoords="texcoords/texcoords_zebra_5002.pkl",
+ ),
+ MeshInfo(
+ name="horse_5004",
+ data="horse_5004.pkl",
+ geodists="geodists/geodists_horse_5004.pkl",
+ symmetry="symmetry/symmetry_horse_5004.pkl",
+ texcoords="texcoords/texcoords_zebra_5002.pkl",
+ ),
+ MeshInfo(
+ name="giraffe_5002",
+ data="giraffe_5002.pkl",
+ geodists="geodists/geodists_giraffe_5002.pkl",
+ symmetry="symmetry/symmetry_giraffe_5002.pkl",
+ texcoords="texcoords/texcoords_giraffe_5002.pkl",
+ ),
+ MeshInfo(
+ name="elephant_5002",
+ data="elephant_5002.pkl",
+ geodists="geodists/geodists_elephant_5002.pkl",
+ symmetry="symmetry/symmetry_elephant_5002.pkl",
+ texcoords="texcoords/texcoords_elephant_5002.pkl",
+ ),
+ MeshInfo(
+ name="dog_5002",
+ data="dog_5002.pkl",
+ geodists="geodists/geodists_dog_5002.pkl",
+ symmetry="symmetry/symmetry_dog_5002.pkl",
+ texcoords="texcoords/texcoords_dog_5002.pkl",
+ ),
+ MeshInfo(
+ name="dog_7466",
+ data="dog_7466.pkl",
+ geodists="geodists/geodists_dog_7466.pkl",
+ symmetry="symmetry/symmetry_dog_7466.pkl",
+ texcoords="texcoords/texcoords_dog_7466.pkl",
+ ),
+ MeshInfo(
+ name="cow_5002",
+ data="cow_5002.pkl",
+ geodists="geodists/geodists_cow_5002.pkl",
+ symmetry="symmetry/symmetry_cow_5002.pkl",
+ texcoords="texcoords/texcoords_cow_5002.pkl",
+ ),
+ MeshInfo(
+ name="bear_4936",
+ data="bear_4936.pkl",
+ geodists="geodists/geodists_bear_4936.pkl",
+ symmetry="symmetry/symmetry_bear_4936.pkl",
+ texcoords="texcoords/texcoords_bear_4936.pkl",
+ ),
+]
+
+register_meshes(MESHES, DENSEPOSE_MESHES_DIR)
diff --git a/densepose/data/meshes/catalog.py b/densepose/data/meshes/catalog.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae624a8aa21fb11cc3c3f7ee467f28b896959781
--- /dev/null
+++ b/densepose/data/meshes/catalog.py
@@ -0,0 +1,73 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import logging
+from collections import UserDict
+from dataclasses import dataclass
+from typing import Iterable, Optional
+
+from ..utils import maybe_prepend_base_path
+
+
+@dataclass
+class MeshInfo:
+ name: str
+ data: str
+ geodists: Optional[str] = None
+ symmetry: Optional[str] = None
+ texcoords: Optional[str] = None
+
+
+class _MeshCatalog(UserDict):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.mesh_ids = {}
+ self.mesh_names = {}
+ self.max_mesh_id = -1
+
+ def __setitem__(self, key, value):
+ if key in self:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Overwriting mesh catalog entry '{key}': old value {self[key]}"
+ f", new value {value}"
+ )
+ mesh_id = self.mesh_ids[key]
+ else:
+ self.max_mesh_id += 1
+ mesh_id = self.max_mesh_id
+ super().__setitem__(key, value)
+ self.mesh_ids[key] = mesh_id
+ self.mesh_names[mesh_id] = key
+
+ def get_mesh_id(self, shape_name: str) -> int:
+ return self.mesh_ids[shape_name]
+
+ def get_mesh_name(self, mesh_id: int) -> str:
+ return self.mesh_names[mesh_id]
+
+
+MeshCatalog = _MeshCatalog()
+
+
+def register_mesh(mesh_info: MeshInfo, base_path: Optional[str]) -> None:
+ geodists, symmetry, texcoords = mesh_info.geodists, mesh_info.symmetry, mesh_info.texcoords
+ if geodists:
+ geodists = maybe_prepend_base_path(base_path, geodists)
+ if symmetry:
+ symmetry = maybe_prepend_base_path(base_path, symmetry)
+ if texcoords:
+ texcoords = maybe_prepend_base_path(base_path, texcoords)
+ MeshCatalog[mesh_info.name] = MeshInfo(
+ name=mesh_info.name,
+ data=maybe_prepend_base_path(base_path, mesh_info.data),
+ geodists=geodists,
+ symmetry=symmetry,
+ texcoords=texcoords,
+ )
+
+
+def register_meshes(mesh_infos: Iterable[MeshInfo], base_path: Optional[str]) -> None:
+ for mesh_info in mesh_infos:
+ register_mesh(mesh_info, base_path)
diff --git a/densepose/data/samplers/__init__.py b/densepose/data/samplers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bf28288d8929c1b250720a2c6decfc9978dd903
--- /dev/null
+++ b/densepose/data/samplers/__init__.py
@@ -0,0 +1,10 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .densepose_uniform import DensePoseUniformSampler
+from .densepose_confidence_based import DensePoseConfidenceBasedSampler
+from .densepose_cse_uniform import DensePoseCSEUniformSampler
+from .densepose_cse_confidence_based import DensePoseCSEConfidenceBasedSampler
+from .mask_from_densepose import MaskFromDensePoseSampler
+from .prediction_to_gt import PredictionToGroundTruthSampler
diff --git a/densepose/data/samplers/__pycache__/__init__.cpython-39.pyc b/densepose/data/samplers/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e579fdd88793d74e11c1b7b42d66414a54583596
Binary files /dev/null and b/densepose/data/samplers/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/densepose_base.cpython-39.pyc b/densepose/data/samplers/__pycache__/densepose_base.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ba5378faf8a96cd7e354c6b01ac2b20218ee1ae9
Binary files /dev/null and b/densepose/data/samplers/__pycache__/densepose_base.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/densepose_confidence_based.cpython-39.pyc b/densepose/data/samplers/__pycache__/densepose_confidence_based.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ed3c6dd119f5f5618d46c8dd1c9bf5964e76abd
Binary files /dev/null and b/densepose/data/samplers/__pycache__/densepose_confidence_based.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/densepose_cse_base.cpython-39.pyc b/densepose/data/samplers/__pycache__/densepose_cse_base.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..59fa080b6e017af44015e1465931a67d316a8e5c
Binary files /dev/null and b/densepose/data/samplers/__pycache__/densepose_cse_base.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/densepose_cse_confidence_based.cpython-39.pyc b/densepose/data/samplers/__pycache__/densepose_cse_confidence_based.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..53e87d53d9e478af918a4f4a6a7545dd86ccb5b4
Binary files /dev/null and b/densepose/data/samplers/__pycache__/densepose_cse_confidence_based.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/densepose_cse_uniform.cpython-39.pyc b/densepose/data/samplers/__pycache__/densepose_cse_uniform.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..37adef4fbcb2361b9d6e007e89fad0b2771ade7f
Binary files /dev/null and b/densepose/data/samplers/__pycache__/densepose_cse_uniform.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/densepose_uniform.cpython-39.pyc b/densepose/data/samplers/__pycache__/densepose_uniform.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4e38dc210286789848ac9d3370014c81b67f60ae
Binary files /dev/null and b/densepose/data/samplers/__pycache__/densepose_uniform.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/mask_from_densepose.cpython-39.pyc b/densepose/data/samplers/__pycache__/mask_from_densepose.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..390b44b34d532f4110706819d021ec8f3f5640cb
Binary files /dev/null and b/densepose/data/samplers/__pycache__/mask_from_densepose.cpython-39.pyc differ
diff --git a/densepose/data/samplers/__pycache__/prediction_to_gt.cpython-39.pyc b/densepose/data/samplers/__pycache__/prediction_to_gt.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e99aa2aae13552c1b2940d07829153b7585c3c7
Binary files /dev/null and b/densepose/data/samplers/__pycache__/prediction_to_gt.cpython-39.pyc differ
diff --git a/densepose/data/samplers/densepose_base.py b/densepose/data/samplers/densepose_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..260413a5b65853d12b4cdb1bcff906f02ed7d63c
--- /dev/null
+++ b/densepose/data/samplers/densepose_base.py
@@ -0,0 +1,205 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any, Dict, List, Tuple
+import torch
+from torch.nn import functional as F
+
+from detectron2.structures import BoxMode, Instances
+
+from densepose.converters import ToChartResultConverter
+from densepose.converters.base import IntTupleBox, make_int_box
+from densepose.structures import DensePoseDataRelative, DensePoseList
+
+
+class DensePoseBaseSampler:
+ """
+ Base DensePose sampler to produce DensePose data from DensePose predictions.
+ Samples for each class are drawn according to some distribution over all pixels estimated
+ to belong to that class.
+ """
+
+ def __init__(self, count_per_class: int = 8):
+ """
+ Constructor
+
+ Args:
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category
+ """
+ self.count_per_class = count_per_class
+
+ def __call__(self, instances: Instances) -> DensePoseList:
+ """
+ Convert DensePose predictions (an instance of `DensePoseChartPredictorOutput`)
+ into DensePose annotations data (an instance of `DensePoseList`)
+ """
+ boxes_xyxy_abs = instances.pred_boxes.tensor.clone().cpu()
+ boxes_xywh_abs = BoxMode.convert(boxes_xyxy_abs, BoxMode.XYXY_ABS, BoxMode.XYWH_ABS)
+ dp_datas = []
+ for i in range(len(boxes_xywh_abs)):
+ annotation_i = self._sample(instances[i], make_int_box(boxes_xywh_abs[i]))
+ annotation_i[DensePoseDataRelative.S_KEY] = self._resample_mask( # pyre-ignore[6]
+ instances[i].pred_densepose
+ )
+ dp_datas.append(DensePoseDataRelative(annotation_i))
+ # create densepose annotations on CPU
+ dp_list = DensePoseList(dp_datas, boxes_xyxy_abs, instances.image_size)
+ return dp_list
+
+ def _sample(self, instance: Instances, bbox_xywh: IntTupleBox) -> Dict[str, List[Any]]:
+ """
+ Sample DensPoseDataRelative from estimation results
+ """
+ labels, dp_result = self._produce_labels_and_results(instance)
+ annotation = {
+ DensePoseDataRelative.X_KEY: [],
+ DensePoseDataRelative.Y_KEY: [],
+ DensePoseDataRelative.U_KEY: [],
+ DensePoseDataRelative.V_KEY: [],
+ DensePoseDataRelative.I_KEY: [],
+ }
+ n, h, w = dp_result.shape
+ for part_id in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
+ # indices - tuple of 3 1D tensors of size k
+ # 0: index along the first dimension N
+ # 1: index along H dimension
+ # 2: index along W dimension
+ indices = torch.nonzero(labels.expand(n, h, w) == part_id, as_tuple=True)
+ # values - an array of size [n, k]
+ # n: number of channels (U, V, confidences)
+ # k: number of points labeled with part_id
+ values = dp_result[indices].view(n, -1)
+ k = values.shape[1]
+ count = min(self.count_per_class, k)
+ if count <= 0:
+ continue
+ index_sample = self._produce_index_sample(values, count)
+ sampled_values = values[:, index_sample]
+ sampled_y = indices[1][index_sample] + 0.5
+ sampled_x = indices[2][index_sample] + 0.5
+ # prepare / normalize data
+ x = (sampled_x / w * 256.0).cpu().tolist()
+ y = (sampled_y / h * 256.0).cpu().tolist()
+ u = sampled_values[0].clamp(0, 1).cpu().tolist()
+ v = sampled_values[1].clamp(0, 1).cpu().tolist()
+ fine_segm_labels = [part_id] * count
+ # extend annotations
+ annotation[DensePoseDataRelative.X_KEY].extend(x)
+ annotation[DensePoseDataRelative.Y_KEY].extend(y)
+ annotation[DensePoseDataRelative.U_KEY].extend(u)
+ annotation[DensePoseDataRelative.V_KEY].extend(v)
+ annotation[DensePoseDataRelative.I_KEY].extend(fine_segm_labels)
+ return annotation
+
+ def _produce_index_sample(self, values: torch.Tensor, count: int):
+ """
+ Abstract method to produce a sample of indices to select data
+ To be implemented in descendants
+
+ Args:
+ values (torch.Tensor): an array of size [n, k] that contains
+ estimated values (U, V, confidences);
+ n: number of channels (U, V, confidences)
+ k: number of points labeled with part_id
+ count (int): number of samples to produce, should be positive and <= k
+
+ Return:
+ list(int): indices of values (along axis 1) selected as a sample
+ """
+ raise NotImplementedError
+
+ def _produce_labels_and_results(self, instance: Instances) -> Tuple[torch.Tensor, torch.Tensor]:
+ """
+ Method to get labels and DensePose results from an instance
+
+ Args:
+ instance (Instances): an instance of `DensePoseChartPredictorOutput`
+
+ Return:
+ labels (torch.Tensor): shape [H, W], DensePose segmentation labels
+ dp_result (torch.Tensor): shape [2, H, W], stacked DensePose results u and v
+ """
+ converter = ToChartResultConverter
+ chart_result = converter.convert(instance.pred_densepose, instance.pred_boxes)
+ labels, dp_result = chart_result.labels.cpu(), chart_result.uv.cpu()
+ return labels, dp_result
+
+ def _resample_mask(self, output: Any) -> torch.Tensor:
+ """
+ Convert DensePose predictor output to segmentation annotation - tensors of size
+ (256, 256) and type `int64`.
+
+ Args:
+ output: DensePose predictor output with the following attributes:
+ - coarse_segm: tensor of size [N, D, H, W] with unnormalized coarse
+ segmentation scores
+ - fine_segm: tensor of size [N, C, H, W] with unnormalized fine
+ segmentation scores
+ Return:
+ Tensor of size (S, S) and type `int64` with coarse segmentation annotations,
+ where S = DensePoseDataRelative.MASK_SIZE
+ """
+ sz = DensePoseDataRelative.MASK_SIZE
+ S = (
+ F.interpolate(output.coarse_segm, (sz, sz), mode="bilinear", align_corners=False)
+ .argmax(dim=1)
+ .long()
+ )
+ I = (
+ (
+ F.interpolate(
+ output.fine_segm,
+ (sz, sz),
+ mode="bilinear",
+ align_corners=False,
+ ).argmax(dim=1)
+ * (S > 0).long()
+ )
+ .squeeze()
+ .cpu()
+ )
+ # Map fine segmentation results to coarse segmentation ground truth
+ # TODO: extract this into separate classes
+ # coarse segmentation: 1 = Torso, 2 = Right Hand, 3 = Left Hand,
+ # 4 = Left Foot, 5 = Right Foot, 6 = Upper Leg Right, 7 = Upper Leg Left,
+ # 8 = Lower Leg Right, 9 = Lower Leg Left, 10 = Upper Arm Left,
+ # 11 = Upper Arm Right, 12 = Lower Arm Left, 13 = Lower Arm Right,
+ # 14 = Head
+ # fine segmentation: 1, 2 = Torso, 3 = Right Hand, 4 = Left Hand,
+ # 5 = Left Foot, 6 = Right Foot, 7, 9 = Upper Leg Right,
+ # 8, 10 = Upper Leg Left, 11, 13 = Lower Leg Right,
+ # 12, 14 = Lower Leg Left, 15, 17 = Upper Arm Left,
+ # 16, 18 = Upper Arm Right, 19, 21 = Lower Arm Left,
+ # 20, 22 = Lower Arm Right, 23, 24 = Head
+ FINE_TO_COARSE_SEGMENTATION = {
+ 1: 1,
+ 2: 1,
+ 3: 2,
+ 4: 3,
+ 5: 4,
+ 6: 5,
+ 7: 6,
+ 8: 7,
+ 9: 6,
+ 10: 7,
+ 11: 8,
+ 12: 9,
+ 13: 8,
+ 14: 9,
+ 15: 10,
+ 16: 11,
+ 17: 10,
+ 18: 11,
+ 19: 12,
+ 20: 13,
+ 21: 12,
+ 22: 13,
+ 23: 14,
+ 24: 14,
+ }
+ mask = torch.zeros((sz, sz), dtype=torch.int64, device=torch.device("cpu"))
+ for i in range(DensePoseDataRelative.N_PART_LABELS):
+ mask[I == i + 1] = FINE_TO_COARSE_SEGMENTATION[i + 1]
+ return mask
diff --git a/densepose/data/samplers/densepose_confidence_based.py b/densepose/data/samplers/densepose_confidence_based.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a9a637e214cbd584773a9fb6031368b5d32417b
--- /dev/null
+++ b/densepose/data/samplers/densepose_confidence_based.py
@@ -0,0 +1,110 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import random
+from typing import Optional, Tuple
+import torch
+
+from densepose.converters import ToChartResultConverterWithConfidences
+
+from .densepose_base import DensePoseBaseSampler
+
+
+class DensePoseConfidenceBasedSampler(DensePoseBaseSampler):
+ """
+ Samples DensePose data from DensePose predictions.
+ Samples for each class are drawn using confidence value estimates.
+ """
+
+ def __init__(
+ self,
+ confidence_channel: str,
+ count_per_class: int = 8,
+ search_count_multiplier: Optional[float] = None,
+ search_proportion: Optional[float] = None,
+ ):
+ """
+ Constructor
+
+ Args:
+ confidence_channel (str): confidence channel to use for sampling;
+ possible values:
+ "sigma_2": confidences for UV values
+ "fine_segm_confidence": confidences for fine segmentation
+ "coarse_segm_confidence": confidences for coarse segmentation
+ (default: "sigma_2")
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category (default: 8)
+ search_count_multiplier (float or None): if not None, the total number
+ of the most confident estimates of a given class to consider is
+ defined as `min(search_count_multiplier * count_per_class, N)`,
+ where `N` is the total number of estimates of the class; cannot be
+ specified together with `search_proportion` (default: None)
+ search_proportion (float or None): if not None, the total number of the
+ of the most confident estimates of a given class to consider is
+ defined as `min(max(search_proportion * N, count_per_class), N)`,
+ where `N` is the total number of estimates of the class; cannot be
+ specified together with `search_count_multiplier` (default: None)
+ """
+ super().__init__(count_per_class)
+ self.confidence_channel = confidence_channel
+ self.search_count_multiplier = search_count_multiplier
+ self.search_proportion = search_proportion
+ assert (search_count_multiplier is None) or (search_proportion is None), (
+ f"Cannot specify both search_count_multiplier (={search_count_multiplier})"
+ f"and search_proportion (={search_proportion})"
+ )
+
+ def _produce_index_sample(self, values: torch.Tensor, count: int):
+ """
+ Produce a sample of indices to select data based on confidences
+
+ Args:
+ values (torch.Tensor): an array of size [n, k] that contains
+ estimated values (U, V, confidences);
+ n: number of channels (U, V, confidences)
+ k: number of points labeled with part_id
+ count (int): number of samples to produce, should be positive and <= k
+
+ Return:
+ list(int): indices of values (along axis 1) selected as a sample
+ """
+ k = values.shape[1]
+ if k == count:
+ index_sample = list(range(k))
+ else:
+ # take the best count * search_count_multiplier pixels,
+ # sample from them uniformly
+ # (here best = smallest variance)
+ _, sorted_confidence_indices = torch.sort(values[2])
+ if self.search_count_multiplier is not None:
+ search_count = min(int(count * self.search_count_multiplier), k)
+ elif self.search_proportion is not None:
+ search_count = min(max(int(k * self.search_proportion), count), k)
+ else:
+ search_count = min(count, k)
+ sample_from_top = random.sample(range(search_count), count)
+ index_sample = sorted_confidence_indices[:search_count][sample_from_top]
+ return index_sample
+
+ def _produce_labels_and_results(self, instance) -> Tuple[torch.Tensor, torch.Tensor]:
+ """
+ Method to get labels and DensePose results from an instance, with confidences
+
+ Args:
+ instance (Instances): an instance of `DensePoseChartPredictorOutputWithConfidences`
+
+ Return:
+ labels (torch.Tensor): shape [H, W], DensePose segmentation labels
+ dp_result (torch.Tensor): shape [3, H, W], DensePose results u and v
+ stacked with the confidence channel
+ """
+ converter = ToChartResultConverterWithConfidences
+ chart_result = converter.convert(instance.pred_densepose, instance.pred_boxes)
+ labels, dp_result = chart_result.labels.cpu(), chart_result.uv.cpu()
+ dp_result = torch.cat(
+ (dp_result, getattr(chart_result, self.confidence_channel)[None].cpu())
+ )
+
+ return labels, dp_result
diff --git a/densepose/data/samplers/densepose_cse_base.py b/densepose/data/samplers/densepose_cse_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..593f1339f29308ff93ba98ed1426ee1dbd47be27
--- /dev/null
+++ b/densepose/data/samplers/densepose_cse_base.py
@@ -0,0 +1,141 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any, Dict, List, Tuple
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from densepose.converters.base import IntTupleBox
+from densepose.data.utils import get_class_to_mesh_name_mapping
+from densepose.modeling.cse.utils import squared_euclidean_distance_matrix
+from densepose.structures import DensePoseDataRelative
+
+from .densepose_base import DensePoseBaseSampler
+
+
+class DensePoseCSEBaseSampler(DensePoseBaseSampler):
+ """
+ Base DensePose sampler to produce DensePose data from DensePose predictions.
+ Samples for each class are drawn according to some distribution over all pixels estimated
+ to belong to that class.
+ """
+
+ def __init__(
+ self,
+ cfg: CfgNode,
+ use_gt_categories: bool,
+ embedder: torch.nn.Module,
+ count_per_class: int = 8,
+ ):
+ """
+ Constructor
+
+ Args:
+ cfg (CfgNode): the config of the model
+ embedder (torch.nn.Module): necessary to compute mesh vertex embeddings
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category
+ """
+ super().__init__(count_per_class)
+ self.embedder = embedder
+ self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
+ self.use_gt_categories = use_gt_categories
+
+ def _sample(self, instance: Instances, bbox_xywh: IntTupleBox) -> Dict[str, List[Any]]:
+ """
+ Sample DensPoseDataRelative from estimation results
+ """
+ if self.use_gt_categories:
+ instance_class = instance.dataset_classes.tolist()[0]
+ else:
+ instance_class = instance.pred_classes.tolist()[0]
+ mesh_name = self.class_to_mesh_name[instance_class]
+
+ annotation = {
+ DensePoseDataRelative.X_KEY: [],
+ DensePoseDataRelative.Y_KEY: [],
+ DensePoseDataRelative.VERTEX_IDS_KEY: [],
+ DensePoseDataRelative.MESH_NAME_KEY: mesh_name,
+ }
+
+ mask, embeddings, other_values = self._produce_mask_and_results(instance, bbox_xywh)
+ indices = torch.nonzero(mask, as_tuple=True)
+ selected_embeddings = embeddings.permute(1, 2, 0)[indices].cpu()
+ values = other_values[:, indices[0], indices[1]]
+ k = values.shape[1]
+
+ count = min(self.count_per_class, k)
+ if count <= 0:
+ return annotation
+
+ index_sample = self._produce_index_sample(values, count)
+ closest_vertices = squared_euclidean_distance_matrix(
+ selected_embeddings[index_sample], self.embedder(mesh_name)
+ )
+ closest_vertices = torch.argmin(closest_vertices, dim=1)
+
+ sampled_y = indices[0][index_sample] + 0.5
+ sampled_x = indices[1][index_sample] + 0.5
+ # prepare / normalize data
+ _, _, w, h = bbox_xywh
+ x = (sampled_x / w * 256.0).cpu().tolist()
+ y = (sampled_y / h * 256.0).cpu().tolist()
+ # extend annotations
+ annotation[DensePoseDataRelative.X_KEY].extend(x)
+ annotation[DensePoseDataRelative.Y_KEY].extend(y)
+ annotation[DensePoseDataRelative.VERTEX_IDS_KEY].extend(closest_vertices.cpu().tolist())
+ return annotation
+
+ def _produce_mask_and_results(
+ self, instance: Instances, bbox_xywh: IntTupleBox
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+ """
+ Method to get labels and DensePose results from an instance
+
+ Args:
+ instance (Instances): an instance of `DensePoseEmbeddingPredictorOutput`
+ bbox_xywh (IntTupleBox): the corresponding bounding box
+
+ Return:
+ mask (torch.Tensor): shape [H, W], DensePose segmentation mask
+ embeddings (Tuple[torch.Tensor]): a tensor of shape [D, H, W],
+ DensePose CSE Embeddings
+ other_values (Tuple[torch.Tensor]): a tensor of shape [0, H, W],
+ for potential other values
+ """
+ densepose_output = instance.pred_densepose
+ S = densepose_output.coarse_segm
+ E = densepose_output.embedding
+ _, _, w, h = bbox_xywh
+ embeddings = F.interpolate(E, size=(h, w), mode="bilinear")[0]
+ coarse_segm_resized = F.interpolate(S, size=(h, w), mode="bilinear")[0]
+ mask = coarse_segm_resized.argmax(0) > 0
+ other_values = torch.empty((0, h, w), device=E.device)
+ return mask, embeddings, other_values
+
+ def _resample_mask(self, output: Any) -> torch.Tensor:
+ """
+ Convert DensePose predictor output to segmentation annotation - tensors of size
+ (256, 256) and type `int64`.
+
+ Args:
+ output: DensePose predictor output with the following attributes:
+ - coarse_segm: tensor of size [N, D, H, W] with unnormalized coarse
+ segmentation scores
+ Return:
+ Tensor of size (S, S) and type `int64` with coarse segmentation annotations,
+ where S = DensePoseDataRelative.MASK_SIZE
+ """
+ sz = DensePoseDataRelative.MASK_SIZE
+ mask = (
+ F.interpolate(output.coarse_segm, (sz, sz), mode="bilinear", align_corners=False)
+ .argmax(dim=1)
+ .long()
+ .squeeze()
+ .cpu()
+ )
+ return mask
diff --git a/densepose/data/samplers/densepose_cse_confidence_based.py b/densepose/data/samplers/densepose_cse_confidence_based.py
new file mode 100644
index 0000000000000000000000000000000000000000..d656a5ab853152c65d8f4c88fe7210cf68ee8df7
--- /dev/null
+++ b/densepose/data/samplers/densepose_cse_confidence_based.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import random
+from typing import Optional, Tuple
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from densepose.converters.base import IntTupleBox
+
+from .densepose_cse_base import DensePoseCSEBaseSampler
+
+
+class DensePoseCSEConfidenceBasedSampler(DensePoseCSEBaseSampler):
+ """
+ Samples DensePose data from DensePose predictions.
+ Samples for each class are drawn using confidence value estimates.
+ """
+
+ def __init__(
+ self,
+ cfg: CfgNode,
+ use_gt_categories: bool,
+ embedder: torch.nn.Module,
+ confidence_channel: str,
+ count_per_class: int = 8,
+ search_count_multiplier: Optional[float] = None,
+ search_proportion: Optional[float] = None,
+ ):
+ """
+ Constructor
+
+ Args:
+ cfg (CfgNode): the config of the model
+ embedder (torch.nn.Module): necessary to compute mesh vertex embeddings
+ confidence_channel (str): confidence channel to use for sampling;
+ possible values:
+ "coarse_segm_confidence": confidences for coarse segmentation
+ (default: "coarse_segm_confidence")
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category (default: 8)
+ search_count_multiplier (float or None): if not None, the total number
+ of the most confident estimates of a given class to consider is
+ defined as `min(search_count_multiplier * count_per_class, N)`,
+ where `N` is the total number of estimates of the class; cannot be
+ specified together with `search_proportion` (default: None)
+ search_proportion (float or None): if not None, the total number of the
+ of the most confident estimates of a given class to consider is
+ defined as `min(max(search_proportion * N, count_per_class), N)`,
+ where `N` is the total number of estimates of the class; cannot be
+ specified together with `search_count_multiplier` (default: None)
+ """
+ super().__init__(cfg, use_gt_categories, embedder, count_per_class)
+ self.confidence_channel = confidence_channel
+ self.search_count_multiplier = search_count_multiplier
+ self.search_proportion = search_proportion
+ assert (search_count_multiplier is None) or (search_proportion is None), (
+ f"Cannot specify both search_count_multiplier (={search_count_multiplier})"
+ f"and search_proportion (={search_proportion})"
+ )
+
+ def _produce_index_sample(self, values: torch.Tensor, count: int):
+ """
+ Produce a sample of indices to select data based on confidences
+
+ Args:
+ values (torch.Tensor): a tensor of length k that contains confidences
+ k: number of points labeled with part_id
+ count (int): number of samples to produce, should be positive and <= k
+
+ Return:
+ list(int): indices of values (along axis 1) selected as a sample
+ """
+ k = values.shape[1]
+ if k == count:
+ index_sample = list(range(k))
+ else:
+ # take the best count * search_count_multiplier pixels,
+ # sample from them uniformly
+ # (here best = smallest variance)
+ _, sorted_confidence_indices = torch.sort(values[0])
+ if self.search_count_multiplier is not None:
+ search_count = min(int(count * self.search_count_multiplier), k)
+ elif self.search_proportion is not None:
+ search_count = min(max(int(k * self.search_proportion), count), k)
+ else:
+ search_count = min(count, k)
+ sample_from_top = random.sample(range(search_count), count)
+ index_sample = sorted_confidence_indices[-search_count:][sample_from_top]
+ return index_sample
+
+ def _produce_mask_and_results(
+ self, instance: Instances, bbox_xywh: IntTupleBox
+ ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+ """
+ Method to get labels and DensePose results from an instance
+
+ Args:
+ instance (Instances): an instance of
+ `DensePoseEmbeddingPredictorOutputWithConfidences`
+ bbox_xywh (IntTupleBox): the corresponding bounding box
+
+ Return:
+ mask (torch.Tensor): shape [H, W], DensePose segmentation mask
+ embeddings (Tuple[torch.Tensor]): a tensor of shape [D, H, W]
+ DensePose CSE Embeddings
+ other_values: a tensor of shape [1, H, W], DensePose CSE confidence
+ """
+ _, _, w, h = bbox_xywh
+ densepose_output = instance.pred_densepose
+ mask, embeddings, _ = super()._produce_mask_and_results(instance, bbox_xywh)
+ other_values = F.interpolate(
+ getattr(densepose_output, self.confidence_channel),
+ size=(h, w),
+ mode="bilinear",
+ )[0].cpu()
+ return mask, embeddings, other_values
diff --git a/densepose/data/samplers/densepose_cse_uniform.py b/densepose/data/samplers/densepose_cse_uniform.py
new file mode 100644
index 0000000000000000000000000000000000000000..482c650caf404bfe96dd28c5092d2508b17a1dbf
--- /dev/null
+++ b/densepose/data/samplers/densepose_cse_uniform.py
@@ -0,0 +1,14 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .densepose_cse_base import DensePoseCSEBaseSampler
+from .densepose_uniform import DensePoseUniformSampler
+
+
+class DensePoseCSEUniformSampler(DensePoseCSEBaseSampler, DensePoseUniformSampler):
+ """
+ Uniform Sampler for CSE
+ """
+
+ pass
diff --git a/densepose/data/samplers/densepose_uniform.py b/densepose/data/samplers/densepose_uniform.py
new file mode 100644
index 0000000000000000000000000000000000000000..af0e35b667047674a498433e4c153475a5b5a1fc
--- /dev/null
+++ b/densepose/data/samplers/densepose_uniform.py
@@ -0,0 +1,43 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import random
+import torch
+
+from .densepose_base import DensePoseBaseSampler
+
+
+class DensePoseUniformSampler(DensePoseBaseSampler):
+ """
+ Samples DensePose data from DensePose predictions.
+ Samples for each class are drawn uniformly over all pixels estimated
+ to belong to that class.
+ """
+
+ def __init__(self, count_per_class: int = 8):
+ """
+ Constructor
+
+ Args:
+ count_per_class (int): the sampler produces at most `count_per_class`
+ samples for each category
+ """
+ super().__init__(count_per_class)
+
+ def _produce_index_sample(self, values: torch.Tensor, count: int):
+ """
+ Produce a uniform sample of indices to select data
+
+ Args:
+ values (torch.Tensor): an array of size [n, k] that contains
+ estimated values (U, V, confidences);
+ n: number of channels (U, V, confidences)
+ k: number of points labeled with part_id
+ count (int): number of samples to produce, should be positive and <= k
+
+ Return:
+ list(int): indices of values (along axis 1) selected as a sample
+ """
+ k = values.shape[1]
+ return random.sample(range(k), count)
diff --git a/densepose/data/samplers/mask_from_densepose.py b/densepose/data/samplers/mask_from_densepose.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d631dff320bbec264675e6772c565cd06fc6b9f
--- /dev/null
+++ b/densepose/data/samplers/mask_from_densepose.py
@@ -0,0 +1,30 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from detectron2.structures import BitMasks, Instances
+
+from densepose.converters import ToMaskConverter
+
+
+class MaskFromDensePoseSampler:
+ """
+ Produce mask GT from DensePose predictions
+ This sampler simply converts DensePose predictions to BitMasks
+ that a contain a bool tensor of the size of the input image
+ """
+
+ def __call__(self, instances: Instances) -> BitMasks:
+ """
+ Converts predicted data from `instances` into the GT mask data
+
+ Args:
+ instances (Instances): predicted results, expected to have `pred_densepose` field
+
+ Returns:
+ Boolean Tensor of the size of the input image that has non-zero
+ values at pixels that are estimated to belong to the detected object
+ """
+ return ToMaskConverter.convert(
+ instances.pred_densepose, instances.pred_boxes, instances.image_size
+ )
diff --git a/densepose/data/samplers/prediction_to_gt.py b/densepose/data/samplers/prediction_to_gt.py
new file mode 100644
index 0000000000000000000000000000000000000000..42a28ff4f19012e96fdf3fb4923500839429a999
--- /dev/null
+++ b/densepose/data/samplers/prediction_to_gt.py
@@ -0,0 +1,100 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional
+
+from detectron2.structures import Instances
+
+ModelOutput = Dict[str, Any]
+SampledData = Dict[str, Any]
+
+
+@dataclass
+class _Sampler:
+ """
+ Sampler registry entry that contains:
+ - src (str): source field to sample from (deleted after sampling)
+ - dst (Optional[str]): destination field to sample to, if not None
+ - func (Optional[Callable: Any -> Any]): function that performs sampling,
+ if None, reference copy is performed
+ """
+
+ src: str
+ dst: Optional[str]
+ func: Optional[Callable[[Any], Any]]
+
+
+class PredictionToGroundTruthSampler:
+ """
+ Sampler implementation that converts predictions to GT using registered
+ samplers for different fields of `Instances`.
+ """
+
+ def __init__(self, dataset_name: str = ""):
+ self.dataset_name = dataset_name
+ self._samplers = {}
+ self.register_sampler("pred_boxes", "gt_boxes", None)
+ self.register_sampler("pred_classes", "gt_classes", None)
+ # delete scores
+ self.register_sampler("scores")
+
+ def __call__(self, model_output: List[ModelOutput]) -> List[SampledData]:
+ """
+ Transform model output into ground truth data through sampling
+
+ Args:
+ model_output (Dict[str, Any]): model output
+ Returns:
+ Dict[str, Any]: sampled data
+ """
+ for model_output_i in model_output:
+ instances: Instances = model_output_i["instances"]
+ # transform data in each field
+ for _, sampler in self._samplers.items():
+ if not instances.has(sampler.src) or sampler.dst is None:
+ continue
+ if sampler.func is None:
+ instances.set(sampler.dst, instances.get(sampler.src))
+ else:
+ instances.set(sampler.dst, sampler.func(instances))
+ # delete model output data that was transformed
+ for _, sampler in self._samplers.items():
+ if sampler.src != sampler.dst and instances.has(sampler.src):
+ instances.remove(sampler.src)
+ model_output_i["dataset"] = self.dataset_name
+ return model_output
+
+ def register_sampler(
+ self,
+ prediction_attr: str,
+ gt_attr: Optional[str] = None,
+ func: Optional[Callable[[Any], Any]] = None,
+ ):
+ """
+ Register sampler for a field
+
+ Args:
+ prediction_attr (str): field to replace with a sampled value
+ gt_attr (Optional[str]): field to store the sampled value to, if not None
+ func (Optional[Callable: Any -> Any]): sampler function
+ """
+ self._samplers[(prediction_attr, gt_attr)] = _Sampler(
+ src=prediction_attr, dst=gt_attr, func=func
+ )
+
+ def remove_sampler(
+ self,
+ prediction_attr: str,
+ gt_attr: Optional[str] = None,
+ ):
+ """
+ Remove sampler for a field
+
+ Args:
+ prediction_attr (str): field to replace with a sampled value
+ gt_attr (Optional[str]): field to store the sampled value to, if not None
+ """
+ assert (prediction_attr, gt_attr) in self._samplers
+ del self._samplers[(prediction_attr, gt_attr)]
diff --git a/densepose/data/transform/__init__.py b/densepose/data/transform/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..147671e198475ce4a82b17e8f81a688d697207d8
--- /dev/null
+++ b/densepose/data/transform/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .image import ImageResizeTransform
diff --git a/densepose/data/transform/__pycache__/__init__.cpython-39.pyc b/densepose/data/transform/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2c4b18f933a8a7ccf9fd415e5669d6418377aab
Binary files /dev/null and b/densepose/data/transform/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/data/transform/__pycache__/image.cpython-39.pyc b/densepose/data/transform/__pycache__/image.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4ca923dca82946775e637795ac51b8cce341d007
Binary files /dev/null and b/densepose/data/transform/__pycache__/image.cpython-39.pyc differ
diff --git a/densepose/data/transform/image.py b/densepose/data/transform/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f35b3ab1de3b1b58e9d7f9763c73eb1236f67d2
--- /dev/null
+++ b/densepose/data/transform/image.py
@@ -0,0 +1,41 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import torch
+
+
+class ImageResizeTransform:
+ """
+ Transform that resizes images loaded from a dataset
+ (BGR data in NCHW channel order, typically uint8) to a format ready to be
+ consumed by DensePose training (BGR float32 data in NCHW channel order)
+ """
+
+ def __init__(self, min_size: int = 800, max_size: int = 1333):
+ self.min_size = min_size
+ self.max_size = max_size
+
+ def __call__(self, images: torch.Tensor) -> torch.Tensor:
+ """
+ Args:
+ images (torch.Tensor): tensor of size [N, 3, H, W] that contains
+ BGR data (typically in uint8)
+ Returns:
+ images (torch.Tensor): tensor of size [N, 3, H1, W1] where
+ H1 and W1 are chosen to respect the specified min and max sizes
+ and preserve the original aspect ratio, the data channels
+ follow BGR order and the data type is `torch.float32`
+ """
+ # resize with min size
+ images = images.float()
+ min_size = min(images.shape[-2:])
+ max_size = max(images.shape[-2:])
+ scale = min(self.min_size / min_size, self.max_size / max_size)
+ images = torch.nn.functional.interpolate(
+ images,
+ scale_factor=scale,
+ mode="bilinear",
+ align_corners=False,
+ )
+ return images
diff --git a/densepose/data/utils.py b/densepose/data/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7625f3d5f7894d2d1519e8672d6fb2e6411e07ba
--- /dev/null
+++ b/densepose/data/utils.py
@@ -0,0 +1,40 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import os
+from typing import Dict, Optional
+
+from detectron2.config import CfgNode
+
+
+def is_relative_local_path(path: str) -> bool:
+ path_str = os.fsdecode(path)
+ return ("://" not in path_str) and not os.path.isabs(path)
+
+
+def maybe_prepend_base_path(base_path: Optional[str], path: str):
+ """
+ Prepends the provided path with a base path prefix if:
+ 1) base path is not None;
+ 2) path is a local path
+ """
+ if base_path is None:
+ return path
+ if is_relative_local_path(path):
+ return os.path.join(base_path, path)
+ return path
+
+
+def get_class_to_mesh_name_mapping(cfg: CfgNode) -> Dict[int, str]:
+ return {
+ int(class_id): mesh_name
+ for class_id, mesh_name in cfg.DATASETS.CLASS_TO_MESH_NAME_MAPPING.items()
+ }
+
+
+def get_category_to_class_mapping(dataset_cfg: CfgNode) -> Dict[str, int]:
+ return {
+ category: int(class_id)
+ for category, class_id in dataset_cfg.CATEGORY_TO_CLASS_MAPPING.items()
+ }
diff --git a/densepose/data/video/__init__.py b/densepose/data/video/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbd83443be8d6fff40b35a13758c31984f3d89be
--- /dev/null
+++ b/densepose/data/video/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .frame_selector import (
+ FrameSelectionStrategy,
+ RandomKFramesSelector,
+ FirstKFramesSelector,
+ LastKFramesSelector,
+ FrameTsList,
+ FrameSelector,
+)
+
+from .video_keyframe_dataset import (
+ VideoKeyframeDataset,
+ video_list_from_file,
+ list_keyframes,
+ read_keyframes,
+)
diff --git a/densepose/data/video/__pycache__/__init__.cpython-39.pyc b/densepose/data/video/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..79d16cb81c94ad299c124828b29cb8a0bfa9e358
Binary files /dev/null and b/densepose/data/video/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/data/video/__pycache__/frame_selector.cpython-39.pyc b/densepose/data/video/__pycache__/frame_selector.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3336d45f920a5e106d92a8da89ef0e8373b843da
Binary files /dev/null and b/densepose/data/video/__pycache__/frame_selector.cpython-39.pyc differ
diff --git a/densepose/data/video/__pycache__/video_keyframe_dataset.cpython-39.pyc b/densepose/data/video/__pycache__/video_keyframe_dataset.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e46b9af3c5a5a1bb2bdaa5af921fe941ed7a271
Binary files /dev/null and b/densepose/data/video/__pycache__/video_keyframe_dataset.cpython-39.pyc differ
diff --git a/densepose/data/video/frame_selector.py b/densepose/data/video/frame_selector.py
new file mode 100644
index 0000000000000000000000000000000000000000..77a97a82f7c7bb95b2023df946b246f9de71a7d2
--- /dev/null
+++ b/densepose/data/video/frame_selector.py
@@ -0,0 +1,89 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import random
+from collections.abc import Callable
+from enum import Enum
+from typing import Callable as TCallable
+from typing import List
+
+FrameTsList = List[int]
+FrameSelector = TCallable[[FrameTsList], FrameTsList]
+
+
+class FrameSelectionStrategy(Enum):
+ """
+ Frame selection strategy used with videos:
+ - "random_k": select k random frames
+ - "first_k": select k first frames
+ - "last_k": select k last frames
+ - "all": select all frames
+ """
+
+ # fmt: off
+ RANDOM_K = "random_k"
+ FIRST_K = "first_k"
+ LAST_K = "last_k"
+ ALL = "all"
+ # fmt: on
+
+
+class RandomKFramesSelector(Callable): # pyre-ignore[39]
+ """
+ Selector that retains at most `k` random frames
+ """
+
+ def __init__(self, k: int):
+ self.k = k
+
+ def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+ """
+ Select `k` random frames
+
+ Args:
+ frames_tss (List[int]): timestamps of input frames
+ Returns:
+ List[int]: timestamps of selected frames
+ """
+ return random.sample(frame_tss, min(self.k, len(frame_tss)))
+
+
+class FirstKFramesSelector(Callable): # pyre-ignore[39]
+ """
+ Selector that retains at most `k` first frames
+ """
+
+ def __init__(self, k: int):
+ self.k = k
+
+ def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+ """
+ Select `k` first frames
+
+ Args:
+ frames_tss (List[int]): timestamps of input frames
+ Returns:
+ List[int]: timestamps of selected frames
+ """
+ return frame_tss[: self.k]
+
+
+class LastKFramesSelector(Callable): # pyre-ignore[39]
+ """
+ Selector that retains at most `k` last frames from video data
+ """
+
+ def __init__(self, k: int):
+ self.k = k
+
+ def __call__(self, frame_tss: FrameTsList) -> FrameTsList:
+ """
+ Select `k` last frames
+
+ Args:
+ frames_tss (List[int]): timestamps of input frames
+ Returns:
+ List[int]: timestamps of selected frames
+ """
+ return frame_tss[-self.k :]
diff --git a/densepose/data/video/video_keyframe_dataset.py b/densepose/data/video/video_keyframe_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..d68857f095a1224313b1dfddc9d75981b04ffa34
--- /dev/null
+++ b/densepose/data/video/video_keyframe_dataset.py
@@ -0,0 +1,304 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import csv
+import logging
+import numpy as np
+from typing import Any, Callable, Dict, List, Optional, Union
+import av
+import torch
+from torch.utils.data.dataset import Dataset
+
+from detectron2.utils.file_io import PathManager
+
+from ..utils import maybe_prepend_base_path
+from .frame_selector import FrameSelector, FrameTsList
+
+FrameList = List[av.frame.Frame] # pyre-ignore[16]
+FrameTransform = Callable[[torch.Tensor], torch.Tensor]
+
+
+def list_keyframes(video_fpath: str, video_stream_idx: int = 0) -> FrameTsList:
+ """
+ Traverses all keyframes of a video file. Returns a list of keyframe
+ timestamps. Timestamps are counts in timebase units.
+
+ Args:
+ video_fpath (str): Video file path
+ video_stream_idx (int): Video stream index (default: 0)
+ Returns:
+ List[int]: list of keyframe timestaps (timestamp is a count in timebase
+ units)
+ """
+ try:
+ with PathManager.open(video_fpath, "rb") as io:
+ # pyre-fixme[16]: Module `av` has no attribute `open`.
+ container = av.open(io, mode="r")
+ stream = container.streams.video[video_stream_idx]
+ keyframes = []
+ pts = -1
+ # Note: even though we request forward seeks for keyframes, sometimes
+ # a keyframe in backwards direction is returned. We introduce tolerance
+ # as a max count of ignored backward seeks
+ tolerance_backward_seeks = 2
+ while True:
+ try:
+ container.seek(pts + 1, backward=False, any_frame=False, stream=stream)
+ except av.AVError as e:
+ # the exception occurs when the video length is exceeded,
+ # we then return whatever data we've already collected
+ logger = logging.getLogger(__name__)
+ logger.debug(
+ f"List keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts + 1}, AV error: {e}"
+ )
+ return keyframes
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"List keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts + 1}, OS error: {e}"
+ )
+ return []
+ packet = next(container.demux(video=video_stream_idx))
+ if packet.pts is not None and packet.pts <= pts:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Video file {video_fpath}, stream {video_stream_idx}: "
+ f"bad seek for packet {pts + 1} (got packet {packet.pts}), "
+ f"tolerance {tolerance_backward_seeks}."
+ )
+ tolerance_backward_seeks -= 1
+ if tolerance_backward_seeks == 0:
+ return []
+ pts += 1
+ continue
+ tolerance_backward_seeks = 2
+ pts = packet.pts
+ if pts is None:
+ return keyframes
+ if packet.is_keyframe:
+ keyframes.append(pts)
+ return keyframes
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"List keyframes: Error opening video file container {video_fpath}, " f"OS error: {e}"
+ )
+ except RuntimeError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"List keyframes: Error opening video file container {video_fpath}, "
+ f"Runtime error: {e}"
+ )
+ return []
+
+
+def read_keyframes(
+ video_fpath: str, keyframes: FrameTsList, video_stream_idx: int = 0
+) -> FrameList: # pyre-ignore[11]
+ """
+ Reads keyframe data from a video file.
+
+ Args:
+ video_fpath (str): Video file path
+ keyframes (List[int]): List of keyframe timestamps (as counts in
+ timebase units to be used in container seek operations)
+ video_stream_idx (int): Video stream index (default: 0)
+ Returns:
+ List[Frame]: list of frames that correspond to the specified timestamps
+ """
+ try:
+ with PathManager.open(video_fpath, "rb") as io:
+ # pyre-fixme[16]: Module `av` has no attribute `open`.
+ container = av.open(io)
+ stream = container.streams.video[video_stream_idx]
+ frames = []
+ for pts in keyframes:
+ try:
+ container.seek(pts, any_frame=False, stream=stream)
+ frame = next(container.decode(video=0))
+ frames.append(frame)
+ except av.AVError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts}, AV error: {e}"
+ )
+ container.close()
+ return frames
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error seeking video file {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts}, OS error: {e}"
+ )
+ container.close()
+ return frames
+ except StopIteration:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error decoding frame from {video_fpath}, "
+ f"video stream {video_stream_idx}, pts {pts}"
+ )
+ container.close()
+ return frames
+
+ container.close()
+ return frames
+ except OSError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error opening video file container {video_fpath}, OS error: {e}"
+ )
+ except RuntimeError as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Read keyframes: Error opening video file container {video_fpath}, Runtime error: {e}"
+ )
+ return []
+
+
+def video_list_from_file(video_list_fpath: str, base_path: Optional[str] = None):
+ """
+ Create a list of paths to video files from a text file.
+
+ Args:
+ video_list_fpath (str): path to a plain text file with the list of videos
+ base_path (str): base path for entries from the video list (default: None)
+ """
+ video_list = []
+ with PathManager.open(video_list_fpath, "r") as io:
+ for line in io:
+ video_list.append(maybe_prepend_base_path(base_path, str(line.strip())))
+ return video_list
+
+
+def read_keyframe_helper_data(fpath: str):
+ """
+ Read keyframe data from a file in CSV format: the header should contain
+ "video_id" and "keyframes" fields. Value specifications are:
+ video_id: int
+ keyframes: list(int)
+ Example of contents:
+ video_id,keyframes
+ 2,"[1,11,21,31,41,51,61,71,81]"
+
+ Args:
+ fpath (str): File containing keyframe data
+
+ Return:
+ video_id_to_keyframes (dict: int -> list(int)): for a given video ID it
+ contains a list of keyframes for that video
+ """
+ video_id_to_keyframes = {}
+ try:
+ with PathManager.open(fpath, "r") as io:
+ csv_reader = csv.reader(io)
+ header = next(csv_reader)
+ video_id_idx = header.index("video_id")
+ keyframes_idx = header.index("keyframes")
+ for row in csv_reader:
+ video_id = int(row[video_id_idx])
+ assert (
+ video_id not in video_id_to_keyframes
+ ), f"Duplicate keyframes entry for video {fpath}"
+ video_id_to_keyframes[video_id] = (
+ [int(v) for v in row[keyframes_idx][1:-1].split(",")]
+ if len(row[keyframes_idx]) > 2
+ else []
+ )
+ except Exception as e:
+ logger = logging.getLogger(__name__)
+ logger.warning(f"Error reading keyframe helper data from {fpath}: {e}")
+ return video_id_to_keyframes
+
+
+class VideoKeyframeDataset(Dataset):
+ """
+ Dataset that provides keyframes for a set of videos.
+ """
+
+ _EMPTY_FRAMES = torch.empty((0, 3, 1, 1))
+
+ def __init__(
+ self,
+ video_list: List[str],
+ category_list: Union[str, List[str], None] = None,
+ frame_selector: Optional[FrameSelector] = None,
+ transform: Optional[FrameTransform] = None,
+ keyframe_helper_fpath: Optional[str] = None,
+ ):
+ """
+ Dataset constructor
+
+ Args:
+ video_list (List[str]): list of paths to video files
+ category_list (Union[str, List[str], None]): list of animal categories for each
+ video file. If it is a string, or None, this applies to all videos
+ frame_selector (Callable: KeyFrameList -> KeyFrameList):
+ selects keyframes to process, keyframes are given by
+ packet timestamps in timebase counts. If None, all keyframes
+ are selected (default: None)
+ transform (Callable: torch.Tensor -> torch.Tensor):
+ transforms a batch of RGB images (tensors of size [B, 3, H, W]),
+ returns a tensor of the same size. If None, no transform is
+ applied (default: None)
+
+ """
+ if type(category_list) is list:
+ self.category_list = category_list
+ else:
+ self.category_list = [category_list] * len(video_list)
+ assert len(video_list) == len(
+ self.category_list
+ ), "length of video and category lists must be equal"
+ self.video_list = video_list
+ self.frame_selector = frame_selector
+ self.transform = transform
+ self.keyframe_helper_data = (
+ read_keyframe_helper_data(keyframe_helper_fpath)
+ if keyframe_helper_fpath is not None
+ else None
+ )
+
+ def __getitem__(self, idx: int) -> Dict[str, Any]:
+ """
+ Gets selected keyframes from a given video
+
+ Args:
+ idx (int): video index in the video list file
+ Returns:
+ A dictionary containing two keys:
+ images (torch.Tensor): tensor of size [N, H, W, 3] or of size
+ defined by the transform that contains keyframes data
+ categories (List[str]): categories of the frames
+ """
+ categories = [self.category_list[idx]]
+ fpath = self.video_list[idx]
+ keyframes = (
+ list_keyframes(fpath)
+ if self.keyframe_helper_data is None or idx not in self.keyframe_helper_data
+ else self.keyframe_helper_data[idx]
+ )
+ transform = self.transform
+ frame_selector = self.frame_selector
+ if not keyframes:
+ return {"images": self._EMPTY_FRAMES, "categories": []}
+ if frame_selector is not None:
+ keyframes = frame_selector(keyframes)
+ frames = read_keyframes(fpath, keyframes)
+ if not frames:
+ return {"images": self._EMPTY_FRAMES, "categories": []}
+ frames = np.stack([frame.to_rgb().to_ndarray() for frame in frames])
+ frames = torch.as_tensor(frames, device=torch.device("cpu"))
+ frames = frames[..., [2, 1, 0]] # RGB -> BGR
+ frames = frames.permute(0, 3, 1, 2).float() # NHWC -> NCHW
+ if transform is not None:
+ frames = transform(frames)
+ return {"images": frames, "categories": categories}
+
+ def __len__(self):
+ return len(self.video_list)
diff --git a/densepose/engine/__init__.py b/densepose/engine/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4709c1b2d87e3c578d98aaa083e41323e4047ac9
--- /dev/null
+++ b/densepose/engine/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .trainer import Trainer
diff --git a/densepose/engine/trainer.py b/densepose/engine/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c9046312244dc2381ea722413986010f4ba75e7
--- /dev/null
+++ b/densepose/engine/trainer.py
@@ -0,0 +1,260 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import logging
+import os
+from collections import OrderedDict
+from typing import List, Optional, Union
+import torch
+from torch import nn
+
+from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.config import CfgNode
+from detectron2.engine import DefaultTrainer
+from detectron2.evaluation import (
+ DatasetEvaluator,
+ DatasetEvaluators,
+ inference_on_dataset,
+ print_csv_format,
+)
+from detectron2.solver.build import get_default_optimizer_params, maybe_add_gradient_clipping
+from detectron2.utils import comm
+from detectron2.utils.events import EventWriter, get_event_storage
+
+from densepose import DensePoseDatasetMapperTTA, DensePoseGeneralizedRCNNWithTTA, load_from_cfg
+from densepose.data import (
+ DatasetMapper,
+ build_combined_loader,
+ build_detection_test_loader,
+ build_detection_train_loader,
+ build_inference_based_loaders,
+ has_inference_based_loaders,
+)
+from densepose.evaluation.d2_evaluator_adapter import Detectron2COCOEvaluatorAdapter
+from densepose.evaluation.evaluator import DensePoseCOCOEvaluator, build_densepose_evaluator_storage
+from densepose.modeling.cse import Embedder
+
+
+class SampleCountingLoader:
+ def __init__(self, loader):
+ self.loader = loader
+
+ def __iter__(self):
+ it = iter(self.loader)
+ storage = get_event_storage()
+ while True:
+ try:
+ batch = next(it)
+ num_inst_per_dataset = {}
+ for data in batch:
+ dataset_name = data["dataset"]
+ if dataset_name not in num_inst_per_dataset:
+ num_inst_per_dataset[dataset_name] = 0
+ num_inst = len(data["instances"])
+ num_inst_per_dataset[dataset_name] += num_inst
+ for dataset_name in num_inst_per_dataset:
+ storage.put_scalar(f"batch/{dataset_name}", num_inst_per_dataset[dataset_name])
+ yield batch
+ except StopIteration:
+ break
+
+
+class SampleCountMetricPrinter(EventWriter):
+ def __init__(self):
+ self.logger = logging.getLogger(__name__)
+
+ def write(self):
+ storage = get_event_storage()
+ batch_stats_strs = []
+ for key, buf in storage.histories().items():
+ if key.startswith("batch/"):
+ batch_stats_strs.append(f"{key} {buf.avg(20)}")
+ self.logger.info(", ".join(batch_stats_strs))
+
+
+class Trainer(DefaultTrainer):
+ @classmethod
+ def extract_embedder_from_model(cls, model: nn.Module) -> Optional[Embedder]:
+ if isinstance(model, nn.parallel.DistributedDataParallel):
+ model = model.module
+ if hasattr(model, "roi_heads") and hasattr(model.roi_heads, "embedder"):
+ return model.roi_heads.embedder
+ return None
+
+ # TODO: the only reason to copy the base class code here is to pass the embedder from
+ # the model to the evaluator; that should be refactored to avoid unnecessary copy-pasting
+ @classmethod
+ def test(
+ cls,
+ cfg: CfgNode,
+ model: nn.Module,
+ evaluators: Optional[Union[DatasetEvaluator, List[DatasetEvaluator]]] = None,
+ ):
+ """
+ Args:
+ cfg (CfgNode):
+ model (nn.Module):
+ evaluators (DatasetEvaluator, list[DatasetEvaluator] or None): if None, will call
+ :meth:`build_evaluator`. Otherwise, must have the same length as
+ ``cfg.DATASETS.TEST``.
+
+ Returns:
+ dict: a dict of result metrics
+ """
+ logger = logging.getLogger(__name__)
+ if isinstance(evaluators, DatasetEvaluator):
+ evaluators = [evaluators]
+ if evaluators is not None:
+ assert len(cfg.DATASETS.TEST) == len(evaluators), "{} != {}".format(
+ len(cfg.DATASETS.TEST), len(evaluators)
+ )
+
+ results = OrderedDict()
+ for idx, dataset_name in enumerate(cfg.DATASETS.TEST):
+ data_loader = cls.build_test_loader(cfg, dataset_name)
+ # When evaluators are passed in as arguments,
+ # implicitly assume that evaluators can be created before data_loader.
+ if evaluators is not None:
+ evaluator = evaluators[idx]
+ else:
+ try:
+ embedder = cls.extract_embedder_from_model(model)
+ evaluator = cls.build_evaluator(cfg, dataset_name, embedder=embedder)
+ except NotImplementedError:
+ logger.warn(
+ "No evaluator found. Use `DefaultTrainer.test(evaluators=)`, "
+ "or implement its `build_evaluator` method."
+ )
+ results[dataset_name] = {}
+ continue
+ if cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE or comm.is_main_process():
+ results_i = inference_on_dataset(model, data_loader, evaluator)
+ else:
+ results_i = {}
+ results[dataset_name] = results_i
+ if comm.is_main_process():
+ assert isinstance(
+ results_i, dict
+ ), "Evaluator must return a dict on the main process. Got {} instead.".format(
+ results_i
+ )
+ logger.info("Evaluation results for {} in csv format:".format(dataset_name))
+ print_csv_format(results_i)
+
+ if len(results) == 1:
+ results = list(results.values())[0]
+ return results
+
+ @classmethod
+ def build_evaluator(
+ cls,
+ cfg: CfgNode,
+ dataset_name: str,
+ output_folder: Optional[str] = None,
+ embedder: Optional[Embedder] = None,
+ ) -> DatasetEvaluators:
+ if output_folder is None:
+ output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+ evaluators = []
+ distributed = cfg.DENSEPOSE_EVALUATION.DISTRIBUTED_INFERENCE
+ # Note: we currently use COCO evaluator for both COCO and LVIS datasets
+ # to have compatible metrics. LVIS bbox evaluator could also be used
+ # with an adapter to properly handle filtered / mapped categories
+ # evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+ # if evaluator_type == "coco":
+ # evaluators.append(COCOEvaluator(dataset_name, output_dir=output_folder))
+ # elif evaluator_type == "lvis":
+ # evaluators.append(LVISEvaluator(dataset_name, output_dir=output_folder))
+ evaluators.append(
+ Detectron2COCOEvaluatorAdapter(
+ dataset_name, output_dir=output_folder, distributed=distributed
+ )
+ )
+ if cfg.MODEL.DENSEPOSE_ON:
+ storage = build_densepose_evaluator_storage(cfg, output_folder)
+ evaluators.append(
+ DensePoseCOCOEvaluator(
+ dataset_name,
+ distributed,
+ output_folder,
+ evaluator_type=cfg.DENSEPOSE_EVALUATION.TYPE,
+ min_iou_threshold=cfg.DENSEPOSE_EVALUATION.MIN_IOU_THRESHOLD,
+ storage=storage,
+ embedder=embedder,
+ should_evaluate_mesh_alignment=cfg.DENSEPOSE_EVALUATION.EVALUATE_MESH_ALIGNMENT,
+ mesh_alignment_mesh_names=cfg.DENSEPOSE_EVALUATION.MESH_ALIGNMENT_MESH_NAMES,
+ )
+ )
+ return DatasetEvaluators(evaluators)
+
+ @classmethod
+ def build_optimizer(cls, cfg: CfgNode, model: nn.Module):
+ params = get_default_optimizer_params(
+ model,
+ base_lr=cfg.SOLVER.BASE_LR,
+ weight_decay_norm=cfg.SOLVER.WEIGHT_DECAY_NORM,
+ bias_lr_factor=cfg.SOLVER.BIAS_LR_FACTOR,
+ weight_decay_bias=cfg.SOLVER.WEIGHT_DECAY_BIAS,
+ overrides={
+ "features": {
+ "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.FEATURES_LR_FACTOR,
+ },
+ "embeddings": {
+ "lr": cfg.SOLVER.BASE_LR * cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_LR_FACTOR,
+ },
+ },
+ )
+ optimizer = torch.optim.SGD(
+ params,
+ cfg.SOLVER.BASE_LR,
+ momentum=cfg.SOLVER.MOMENTUM,
+ nesterov=cfg.SOLVER.NESTEROV,
+ weight_decay=cfg.SOLVER.WEIGHT_DECAY,
+ )
+ # pyre-fixme[6]: For 2nd param expected `Type[Optimizer]` but got `SGD`.
+ return maybe_add_gradient_clipping(cfg, optimizer)
+
+ @classmethod
+ def build_test_loader(cls, cfg: CfgNode, dataset_name):
+ return build_detection_test_loader(cfg, dataset_name, mapper=DatasetMapper(cfg, False))
+
+ @classmethod
+ def build_train_loader(cls, cfg: CfgNode):
+ data_loader = build_detection_train_loader(cfg, mapper=DatasetMapper(cfg, True))
+ if not has_inference_based_loaders(cfg):
+ return data_loader
+ model = cls.build_model(cfg)
+ model.to(cfg.BOOTSTRAP_MODEL.DEVICE)
+ DetectionCheckpointer(model).resume_or_load(cfg.BOOTSTRAP_MODEL.WEIGHTS, resume=False)
+ inference_based_loaders, ratios = build_inference_based_loaders(cfg, model)
+ loaders = [data_loader] + inference_based_loaders
+ ratios = [1.0] + ratios
+ combined_data_loader = build_combined_loader(cfg, loaders, ratios)
+ sample_counting_loader = SampleCountingLoader(combined_data_loader)
+ return sample_counting_loader
+
+ def build_writers(self):
+ writers = super().build_writers()
+ writers.append(SampleCountMetricPrinter())
+ return writers
+
+ @classmethod
+ def test_with_TTA(cls, cfg: CfgNode, model):
+ logger = logging.getLogger("detectron2.trainer")
+ # In the end of training, run an evaluation with TTA
+ # Only support some R-CNN models.
+ logger.info("Running inference with test-time augmentation ...")
+ transform_data = load_from_cfg(cfg)
+ model = DensePoseGeneralizedRCNNWithTTA(
+ cfg, model, transform_data, DensePoseDatasetMapperTTA(cfg)
+ )
+ evaluators = [
+ cls.build_evaluator(
+ cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA")
+ )
+ for name in cfg.DATASETS.TEST
+ ]
+ res = cls.test(cfg, model, evaluators) # pyre-ignore[6]
+ res = OrderedDict({k + "_TTA": v for k, v in res.items()})
+ return res
diff --git a/densepose/evaluation/__init__.py b/densepose/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..cffabf0808c913a309b791ba8869c80db52a0ac8
--- /dev/null
+++ b/densepose/evaluation/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .evaluator import DensePoseCOCOEvaluator
diff --git a/densepose/evaluation/__pycache__/__init__.cpython-39.pyc b/densepose/evaluation/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dbc342d643af7faa280175d272cef37e20cfdadb
Binary files /dev/null and b/densepose/evaluation/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/evaluation/__pycache__/densepose_coco_evaluation.cpython-39.pyc b/densepose/evaluation/__pycache__/densepose_coco_evaluation.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3e2e5dae772b724f21b4bfb17d1c6ce06c3edf62
Binary files /dev/null and b/densepose/evaluation/__pycache__/densepose_coco_evaluation.cpython-39.pyc differ
diff --git a/densepose/evaluation/__pycache__/evaluator.cpython-39.pyc b/densepose/evaluation/__pycache__/evaluator.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..83c5d730167a55cf162a0aadecb4d155f21a5632
Binary files /dev/null and b/densepose/evaluation/__pycache__/evaluator.cpython-39.pyc differ
diff --git a/densepose/evaluation/__pycache__/mesh_alignment_evaluator.cpython-39.pyc b/densepose/evaluation/__pycache__/mesh_alignment_evaluator.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..155670b616bf38fd96745c5688279965f1666115
Binary files /dev/null and b/densepose/evaluation/__pycache__/mesh_alignment_evaluator.cpython-39.pyc differ
diff --git a/densepose/evaluation/__pycache__/tensor_storage.cpython-39.pyc b/densepose/evaluation/__pycache__/tensor_storage.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7e50821f5e2ab3af814c0abeffdf2667c4609e8d
Binary files /dev/null and b/densepose/evaluation/__pycache__/tensor_storage.cpython-39.pyc differ
diff --git a/densepose/evaluation/d2_evaluator_adapter.py b/densepose/evaluation/d2_evaluator_adapter.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7fbb9e34f42bce02c71eab9efad742491c6b4aa
--- /dev/null
+++ b/densepose/evaluation/d2_evaluator_adapter.py
@@ -0,0 +1,52 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from detectron2.data.catalog import Metadata
+from detectron2.evaluation import COCOEvaluator
+
+from densepose.data.datasets.coco import (
+ get_contiguous_id_to_category_id_map,
+ maybe_filter_categories_cocoapi,
+)
+
+
+def _maybe_add_iscrowd_annotations(cocoapi) -> None:
+ for ann in cocoapi.dataset["annotations"]:
+ if "iscrowd" not in ann:
+ ann["iscrowd"] = 0
+
+
+class Detectron2COCOEvaluatorAdapter(COCOEvaluator):
+ def __init__(
+ self,
+ dataset_name,
+ output_dir=None,
+ distributed=True,
+ ):
+ super().__init__(dataset_name, output_dir=output_dir, distributed=distributed)
+ maybe_filter_categories_cocoapi(dataset_name, self._coco_api)
+ _maybe_add_iscrowd_annotations(self._coco_api)
+ # substitute category metadata to account for categories
+ # that are mapped to the same contiguous id
+ if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+ self._maybe_substitute_metadata()
+
+ def _maybe_substitute_metadata(self):
+ cont_id_2_cat_id = get_contiguous_id_to_category_id_map(self._metadata)
+ cat_id_2_cont_id = self._metadata.thing_dataset_id_to_contiguous_id
+ if len(cont_id_2_cat_id) == len(cat_id_2_cont_id):
+ return
+
+ cat_id_2_cont_id_injective = {}
+ for cat_id, cont_id in cat_id_2_cont_id.items():
+ if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id] == cat_id):
+ cat_id_2_cont_id_injective[cat_id] = cont_id
+
+ metadata_new = Metadata(name=self._metadata.name)
+ for key, value in self._metadata.__dict__.items():
+ if key == "thing_dataset_id_to_contiguous_id":
+ setattr(metadata_new, key, cat_id_2_cont_id_injective)
+ else:
+ setattr(metadata_new, key, value)
+ self._metadata = metadata_new
diff --git a/densepose/evaluation/densepose_coco_evaluation.py b/densepose/evaluation/densepose_coco_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..16bcec6a08921eb62f22ece337821d7ce9e7e591
--- /dev/null
+++ b/densepose/evaluation/densepose_coco_evaluation.py
@@ -0,0 +1,1305 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# This is a modified version of cocoeval.py where we also have the densepose evaluation.
+
+# pyre-unsafe
+
+__author__ = "tsungyi"
+
+import copy
+import datetime
+import logging
+import numpy as np
+import pickle
+import time
+from collections import defaultdict
+from enum import Enum
+from typing import Any, Dict, Tuple
+import scipy.spatial.distance as ssd
+import torch
+import torch.nn.functional as F
+from pycocotools import mask as maskUtils
+from scipy.io import loadmat
+from scipy.ndimage import zoom as spzoom
+
+from detectron2.utils.file_io import PathManager
+
+from densepose.converters.chart_output_to_chart_result import resample_uv_tensors_to_bbox
+from densepose.converters.segm_to_mask import (
+ resample_coarse_segm_tensor_to_bbox,
+ resample_fine_and_coarse_segm_tensors_to_bbox,
+)
+from densepose.modeling.cse.utils import squared_euclidean_distance_matrix
+from densepose.structures import DensePoseDataRelative
+from densepose.structures.mesh import create_mesh
+
+logger = logging.getLogger(__name__)
+
+
+class DensePoseEvalMode(str, Enum):
+ # use both masks and geodesic distances (GPS * IOU) to compute scores
+ GPSM = "gpsm"
+ # use only geodesic distances (GPS) to compute scores
+ GPS = "gps"
+ # use only masks (IOU) to compute scores
+ IOU = "iou"
+
+
+class DensePoseDataMode(str, Enum):
+ # use estimated IUV data (default mode)
+ IUV_DT = "iuvdt"
+ # use ground truth IUV data
+ IUV_GT = "iuvgt"
+ # use ground truth labels I and set UV to 0
+ I_GT_UV_0 = "igtuv0"
+ # use ground truth labels I and estimated UV coordinates
+ I_GT_UV_DT = "igtuvdt"
+ # use estimated labels I and set UV to 0
+ I_DT_UV_0 = "idtuv0"
+
+
+class DensePoseCocoEval:
+ # Interface for evaluating detection on the Microsoft COCO dataset.
+ #
+ # The usage for CocoEval is as follows:
+ # cocoGt=..., cocoDt=... # load dataset and results
+ # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
+ # E.params.recThrs = ...; # set parameters as desired
+ # E.evaluate(); # run per image evaluation
+ # E.accumulate(); # accumulate per image results
+ # E.summarize(); # display summary metrics of results
+ # For example usage see evalDemo.m and http://mscoco.org/.
+ #
+ # The evaluation parameters are as follows (defaults in brackets):
+ # imgIds - [all] N img ids to use for evaluation
+ # catIds - [all] K cat ids to use for evaluation
+ # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
+ # recThrs - [0:.01:1] R=101 recall thresholds for evaluation
+ # areaRng - [...] A=4 object area ranges for evaluation
+ # maxDets - [1 10 100] M=3 thresholds on max detections per image
+ # iouType - ['segm'] set iouType to 'segm', 'bbox', 'keypoints' or 'densepose'
+ # iouType replaced the now DEPRECATED useSegm parameter.
+ # useCats - [1] if true use category labels for evaluation
+ # Note: if useCats=0 category labels are ignored as in proposal scoring.
+ # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
+ #
+ # evaluate(): evaluates detections on every image and every category and
+ # concats the results into the "evalImgs" with fields:
+ # dtIds - [1xD] id for each of the D detections (dt)
+ # gtIds - [1xG] id for each of the G ground truths (gt)
+ # dtMatches - [TxD] matching gt id at each IoU or 0
+ # gtMatches - [TxG] matching dt id at each IoU or 0
+ # dtScores - [1xD] confidence of each dt
+ # gtIgnore - [1xG] ignore flag for each gt
+ # dtIgnore - [TxD] ignore flag for each dt at each IoU
+ #
+ # accumulate(): accumulates the per-image, per-category evaluation
+ # results in "evalImgs" into the dictionary "eval" with fields:
+ # params - parameters used for evaluation
+ # date - date evaluation was performed
+ # counts - [T,R,K,A,M] parameter dimensions (see above)
+ # precision - [TxRxKxAxM] precision for every evaluation setting
+ # recall - [TxKxAxM] max recall for every evaluation setting
+ # Note: precision and recall==-1 for settings with no gt objects.
+ #
+ # See also coco, mask, pycocoDemo, pycocoEvalDemo
+ #
+ # Microsoft COCO Toolbox. version 2.0
+ # Data, paper, and tutorials available at: http://mscoco.org/
+ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+ # Licensed under the Simplified BSD License [see coco/license.txt]
+ def __init__(
+ self,
+ cocoGt=None,
+ cocoDt=None,
+ iouType: str = "densepose",
+ multi_storage=None,
+ embedder=None,
+ dpEvalMode: DensePoseEvalMode = DensePoseEvalMode.GPS,
+ dpDataMode: DensePoseDataMode = DensePoseDataMode.IUV_DT,
+ ):
+ """
+ Initialize CocoEval using coco APIs for gt and dt
+ :param cocoGt: coco object with ground truth annotations
+ :param cocoDt: coco object with detection results
+ :return: None
+ """
+ self.cocoGt = cocoGt # ground truth COCO API
+ self.cocoDt = cocoDt # detections COCO API
+ self.multi_storage = multi_storage
+ self.embedder = embedder
+ self._dpEvalMode = dpEvalMode
+ self._dpDataMode = dpDataMode
+ self.evalImgs = defaultdict(list) # per-image per-category eval results [KxAxI]
+ self.eval = {} # accumulated evaluation results
+ self._gts = defaultdict(list) # gt for evaluation
+ self._dts = defaultdict(list) # dt for evaluation
+ self.params = Params(iouType=iouType) # parameters
+ self._paramsEval = {} # parameters for evaluation
+ self.stats = [] # result summarization
+ self.ious = {} # ious between all gts and dts
+ if cocoGt is not None:
+ self.params.imgIds = sorted(cocoGt.getImgIds())
+ self.params.catIds = sorted(cocoGt.getCatIds())
+ self.ignoreThrBB = 0.7
+ self.ignoreThrUV = 0.9
+
+ def _loadGEval(self):
+ smpl_subdiv_fpath = PathManager.get_local_path(
+ "https://dl.fbaipublicfiles.com/densepose/data/SMPL_subdiv.mat"
+ )
+ pdist_transform_fpath = PathManager.get_local_path(
+ "https://dl.fbaipublicfiles.com/densepose/data/SMPL_SUBDIV_TRANSFORM.mat"
+ )
+ pdist_matrix_fpath = PathManager.get_local_path(
+ "https://dl.fbaipublicfiles.com/densepose/data/Pdist_matrix.pkl", timeout_sec=120
+ )
+ SMPL_subdiv = loadmat(smpl_subdiv_fpath)
+ self.PDIST_transform = loadmat(pdist_transform_fpath)
+ self.PDIST_transform = self.PDIST_transform["index"].squeeze()
+ UV = np.array([SMPL_subdiv["U_subdiv"], SMPL_subdiv["V_subdiv"]]).squeeze()
+ ClosestVertInds = np.arange(UV.shape[1]) + 1
+ self.Part_UVs = []
+ self.Part_ClosestVertInds = []
+ for i in np.arange(24):
+ self.Part_UVs.append(UV[:, SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)])
+ self.Part_ClosestVertInds.append(
+ ClosestVertInds[SMPL_subdiv["Part_ID_subdiv"].squeeze() == (i + 1)]
+ )
+
+ with open(pdist_matrix_fpath, "rb") as hFile:
+ arrays = pickle.load(hFile, encoding="latin1")
+ self.Pdist_matrix = arrays["Pdist_matrix"]
+ self.Part_ids = np.array(SMPL_subdiv["Part_ID_subdiv"].squeeze())
+ # Mean geodesic distances for parts.
+ self.Mean_Distances = np.array([0, 0.351, 0.107, 0.126, 0.237, 0.173, 0.142, 0.128, 0.150])
+ # Coarse Part labels.
+ self.CoarseParts = np.array(
+ [0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8]
+ )
+
+ def _prepare(self):
+ """
+ Prepare ._gts and ._dts for evaluation based on params
+ :return: None
+ """
+
+ def _toMask(anns, coco):
+ # modify ann['segmentation'] by reference
+ for ann in anns:
+ # safeguard for invalid segmentation annotation;
+ # annotations containing empty lists exist in the posetrack
+ # dataset. This is not a correct segmentation annotation
+ # in terms of COCO format; we need to deal with it somehow
+ segm = ann["segmentation"]
+ if type(segm) is list and len(segm) == 0:
+ ann["segmentation"] = None
+ continue
+ rle = coco.annToRLE(ann)
+ ann["segmentation"] = rle
+
+ def _getIgnoreRegion(iid, coco):
+ img = coco.imgs[iid]
+
+ if "ignore_regions_x" not in img.keys():
+ return None
+
+ if len(img["ignore_regions_x"]) == 0:
+ return None
+
+ rgns_merged = [
+ [v for xy in zip(region_x, region_y) for v in xy]
+ for region_x, region_y in zip(img["ignore_regions_x"], img["ignore_regions_y"])
+ ]
+ rles = maskUtils.frPyObjects(rgns_merged, img["height"], img["width"])
+ rle = maskUtils.merge(rles)
+ return maskUtils.decode(rle)
+
+ def _checkIgnore(dt, iregion):
+ if iregion is None:
+ return True
+
+ bb = np.array(dt["bbox"]).astype(int)
+ x1, y1, x2, y2 = bb[0], bb[1], bb[0] + bb[2], bb[1] + bb[3]
+ x2 = min([x2, iregion.shape[1]])
+ y2 = min([y2, iregion.shape[0]])
+
+ if bb[2] * bb[3] == 0:
+ return False
+
+ crop_iregion = iregion[y1:y2, x1:x2]
+
+ if crop_iregion.sum() == 0:
+ return True
+
+ if "densepose" not in dt.keys(): # filtering boxes
+ return crop_iregion.sum() / bb[2] / bb[3] < self.ignoreThrBB
+
+ # filtering UVs
+ ignoremask = np.require(crop_iregion, requirements=["F"])
+ mask = self._extract_mask(dt)
+ uvmask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
+ uvmask_ = maskUtils.encode(uvmask)
+ ignoremask_ = maskUtils.encode(ignoremask)
+ uviou = maskUtils.iou([uvmask_], [ignoremask_], [1])[0]
+ return uviou < self.ignoreThrUV
+
+ p = self.params
+
+ if p.useCats:
+ gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+ dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+ else:
+ gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
+ dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
+
+ imns = self.cocoGt.loadImgs(p.imgIds)
+ self.size_mapping = {}
+ for im in imns:
+ self.size_mapping[im["id"]] = [im["height"], im["width"]]
+
+ # if iouType == 'uv', add point gt annotations
+ if p.iouType == "densepose":
+ self._loadGEval()
+
+ # convert ground truth to mask if iouType == 'segm'
+ if p.iouType == "segm":
+ _toMask(gts, self.cocoGt)
+ _toMask(dts, self.cocoDt)
+
+ # set ignore flag
+ for gt in gts:
+ gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
+ gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
+ if p.iouType == "keypoints":
+ gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
+ if p.iouType == "densepose":
+ gt["ignore"] = ("dp_x" in gt) == 0
+ if p.iouType == "segm":
+ gt["ignore"] = gt["segmentation"] is None
+
+ self._gts = defaultdict(list) # gt for evaluation
+ self._dts = defaultdict(list) # dt for evaluation
+ self._igrgns = defaultdict(list)
+
+ for gt in gts:
+ iid = gt["image_id"]
+ if iid not in self._igrgns.keys():
+ self._igrgns[iid] = _getIgnoreRegion(iid, self.cocoGt)
+ if _checkIgnore(gt, self._igrgns[iid]):
+ self._gts[iid, gt["category_id"]].append(gt)
+ for dt in dts:
+ iid = dt["image_id"]
+ if (iid not in self._igrgns) or _checkIgnore(dt, self._igrgns[iid]):
+ self._dts[iid, dt["category_id"]].append(dt)
+
+ self.evalImgs = defaultdict(list) # per-image per-category evaluation results
+ self.eval = {} # accumulated evaluation results
+
+ def evaluate(self):
+ """
+ Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
+ :return: None
+ """
+ tic = time.time()
+ logger.info("Running per image DensePose evaluation... {}".format(self.params.iouType))
+ p = self.params
+ # add backward compatibility if useSegm is specified in params
+ if p.useSegm is not None:
+ p.iouType = "segm" if p.useSegm == 1 else "bbox"
+ logger.info("useSegm (deprecated) is not None. Running DensePose evaluation")
+ p.imgIds = list(np.unique(p.imgIds))
+ if p.useCats:
+ p.catIds = list(np.unique(p.catIds))
+ p.maxDets = sorted(p.maxDets)
+ self.params = p
+
+ self._prepare()
+ # loop through images, area range, max detection number
+ catIds = p.catIds if p.useCats else [-1]
+
+ if p.iouType in ["segm", "bbox"]:
+ computeIoU = self.computeIoU
+ elif p.iouType == "keypoints":
+ computeIoU = self.computeOks
+ elif p.iouType == "densepose":
+ computeIoU = self.computeOgps
+ if self._dpEvalMode in {DensePoseEvalMode.GPSM, DensePoseEvalMode.IOU}:
+ self.real_ious = {
+ (imgId, catId): self.computeDPIoU(imgId, catId)
+ for imgId in p.imgIds
+ for catId in catIds
+ }
+
+ self.ious = {
+ (imgId, catId): computeIoU(imgId, catId) for imgId in p.imgIds for catId in catIds
+ }
+
+ evaluateImg = self.evaluateImg
+ maxDet = p.maxDets[-1]
+ self.evalImgs = [
+ evaluateImg(imgId, catId, areaRng, maxDet)
+ for catId in catIds
+ for areaRng in p.areaRng
+ for imgId in p.imgIds
+ ]
+ self._paramsEval = copy.deepcopy(self.params)
+ toc = time.time()
+ logger.info("DensePose evaluation DONE (t={:0.2f}s).".format(toc - tic))
+
+ def getDensePoseMask(self, polys):
+ maskGen = np.zeros([256, 256])
+ stop = min(len(polys) + 1, 15)
+ for i in range(1, stop):
+ if polys[i - 1]:
+ currentMask = maskUtils.decode(polys[i - 1])
+ maskGen[currentMask > 0] = i
+ return maskGen
+
+ def _generate_rlemask_on_image(self, mask, imgId, data):
+ bbox_xywh = np.array(data["bbox"])
+ x, y, w, h = bbox_xywh
+ im_h, im_w = self.size_mapping[imgId]
+ im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
+ if mask is not None:
+ x0 = max(int(x), 0)
+ x1 = min(int(x + w), im_w, int(x) + mask.shape[1])
+ y0 = max(int(y), 0)
+ y1 = min(int(y + h), im_h, int(y) + mask.shape[0])
+ y = int(y)
+ x = int(x)
+ im_mask[y0:y1, x0:x1] = mask[y0 - y : y1 - y, x0 - x : x1 - x]
+ im_mask = np.require(np.asarray(im_mask > 0), dtype=np.uint8, requirements=["F"])
+ rle_mask = maskUtils.encode(np.array(im_mask[:, :, np.newaxis], order="F"))[0]
+ return rle_mask
+
+ def computeDPIoU(self, imgId, catId):
+ p = self.params
+ if p.useCats:
+ gt = self._gts[imgId, catId]
+ dt = self._dts[imgId, catId]
+ else:
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+ if len(gt) == 0 and len(dt) == 0:
+ return []
+ inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+ dt = [dt[i] for i in inds]
+ if len(dt) > p.maxDets[-1]:
+ dt = dt[0 : p.maxDets[-1]]
+
+ gtmasks = []
+ for g in gt:
+ if DensePoseDataRelative.S_KEY in g:
+ # convert DensePose mask to a binary mask
+ mask = np.minimum(self.getDensePoseMask(g[DensePoseDataRelative.S_KEY]), 1.0)
+ _, _, w, h = g["bbox"]
+ scale_x = float(max(w, 1)) / mask.shape[1]
+ scale_y = float(max(h, 1)) / mask.shape[0]
+ mask = spzoom(mask, (scale_y, scale_x), order=1, prefilter=False)
+ mask = np.array(mask > 0.5, dtype=np.uint8)
+ rle_mask = self._generate_rlemask_on_image(mask, imgId, g)
+ elif "segmentation" in g:
+ segmentation = g["segmentation"]
+ if isinstance(segmentation, list) and segmentation:
+ # polygons
+ im_h, im_w = self.size_mapping[imgId]
+ rles = maskUtils.frPyObjects(segmentation, im_h, im_w)
+ rle_mask = maskUtils.merge(rles)
+ elif isinstance(segmentation, dict):
+ if isinstance(segmentation["counts"], list):
+ # uncompressed RLE
+ im_h, im_w = self.size_mapping[imgId]
+ rle_mask = maskUtils.frPyObjects(segmentation, im_h, im_w)
+ else:
+ # compressed RLE
+ rle_mask = segmentation
+ else:
+ rle_mask = self._generate_rlemask_on_image(None, imgId, g)
+ else:
+ rle_mask = self._generate_rlemask_on_image(None, imgId, g)
+ gtmasks.append(rle_mask)
+
+ dtmasks = []
+ for d in dt:
+ mask = self._extract_mask(d)
+ mask = np.require(np.asarray(mask > 0), dtype=np.uint8, requirements=["F"])
+ rle_mask = self._generate_rlemask_on_image(mask, imgId, d)
+ dtmasks.append(rle_mask)
+
+ # compute iou between each dt and gt region
+ iscrowd = [int(o.get("iscrowd", 0)) for o in gt]
+ iousDP = maskUtils.iou(dtmasks, gtmasks, iscrowd)
+ return iousDP
+
+ def computeIoU(self, imgId, catId):
+ p = self.params
+ if p.useCats:
+ gt = self._gts[imgId, catId]
+ dt = self._dts[imgId, catId]
+ else:
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+ if len(gt) == 0 and len(dt) == 0:
+ return []
+ inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+ dt = [dt[i] for i in inds]
+ if len(dt) > p.maxDets[-1]:
+ dt = dt[0 : p.maxDets[-1]]
+
+ if p.iouType == "segm":
+ g = [g["segmentation"] for g in gt if g["segmentation"] is not None]
+ d = [d["segmentation"] for d in dt if d["segmentation"] is not None]
+ elif p.iouType == "bbox":
+ g = [g["bbox"] for g in gt]
+ d = [d["bbox"] for d in dt]
+ else:
+ raise Exception("unknown iouType for iou computation")
+
+ # compute iou between each dt and gt region
+ iscrowd = [int(o.get("iscrowd", 0)) for o in gt]
+ ious = maskUtils.iou(d, g, iscrowd)
+ return ious
+
+ def computeOks(self, imgId, catId):
+ p = self.params
+ # dimension here should be Nxm
+ gts = self._gts[imgId, catId]
+ dts = self._dts[imgId, catId]
+ inds = np.argsort([-d["score"] for d in dts], kind="mergesort")
+ dts = [dts[i] for i in inds]
+ if len(dts) > p.maxDets[-1]:
+ dts = dts[0 : p.maxDets[-1]]
+ # if len(gts) == 0 and len(dts) == 0:
+ if len(gts) == 0 or len(dts) == 0:
+ return []
+ ious = np.zeros((len(dts), len(gts)))
+ sigmas = (
+ np.array(
+ [
+ 0.26,
+ 0.25,
+ 0.25,
+ 0.35,
+ 0.35,
+ 0.79,
+ 0.79,
+ 0.72,
+ 0.72,
+ 0.62,
+ 0.62,
+ 1.07,
+ 1.07,
+ 0.87,
+ 0.87,
+ 0.89,
+ 0.89,
+ ]
+ )
+ / 10.0
+ )
+ vars = (sigmas * 2) ** 2
+ k = len(sigmas)
+ # compute oks between each detection and ground truth object
+ for j, gt in enumerate(gts):
+ # create bounds for ignore regions(double the gt bbox)
+ g = np.array(gt["keypoints"])
+ xg = g[0::3]
+ yg = g[1::3]
+ vg = g[2::3]
+ k1 = np.count_nonzero(vg > 0)
+ bb = gt["bbox"]
+ x0 = bb[0] - bb[2]
+ x1 = bb[0] + bb[2] * 2
+ y0 = bb[1] - bb[3]
+ y1 = bb[1] + bb[3] * 2
+ for i, dt in enumerate(dts):
+ d = np.array(dt["keypoints"])
+ xd = d[0::3]
+ yd = d[1::3]
+ if k1 > 0:
+ # measure the per-keypoint distance if keypoints visible
+ dx = xd - xg
+ dy = yd - yg
+ else:
+ # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
+ z = np.zeros(k)
+ dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0)
+ dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0)
+ e = (dx**2 + dy**2) / vars / (gt["area"] + np.spacing(1)) / 2
+ if k1 > 0:
+ e = e[vg > 0]
+ ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
+ return ious
+
+ def _extract_mask(self, dt: Dict[str, Any]) -> np.ndarray:
+ if "densepose" in dt:
+ densepose_results_quantized = dt["densepose"]
+ return densepose_results_quantized.labels_uv_uint8[0].numpy()
+ elif "cse_mask" in dt:
+ return dt["cse_mask"]
+ elif "coarse_segm" in dt:
+ dy = max(int(dt["bbox"][3]), 1)
+ dx = max(int(dt["bbox"][2]), 1)
+ return (
+ F.interpolate(
+ dt["coarse_segm"].unsqueeze(0),
+ (dy, dx),
+ mode="bilinear",
+ align_corners=False,
+ )
+ .squeeze(0)
+ .argmax(0)
+ .numpy()
+ .astype(np.uint8)
+ )
+ elif "record_id" in dt:
+ assert (
+ self.multi_storage is not None
+ ), f"Storage record id encountered in a detection {dt}, but no storage provided!"
+ record = self.multi_storage.get(dt["rank"], dt["record_id"])
+ coarse_segm = record["coarse_segm"]
+ dy = max(int(dt["bbox"][3]), 1)
+ dx = max(int(dt["bbox"][2]), 1)
+ return (
+ F.interpolate(
+ coarse_segm.unsqueeze(0),
+ (dy, dx),
+ mode="bilinear",
+ align_corners=False,
+ )
+ .squeeze(0)
+ .argmax(0)
+ .numpy()
+ .astype(np.uint8)
+ )
+ else:
+ raise Exception(f"No mask data in the detection: {dt}")
+ raise ValueError('The prediction dict needs to contain either "densepose" or "cse_mask"')
+
+ def _extract_iuv(
+ self, densepose_data: np.ndarray, py: np.ndarray, px: np.ndarray, gt: Dict[str, Any]
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+ """
+ Extract arrays of I, U and V values at given points as numpy arrays
+ given the data mode stored in self._dpDataMode
+ """
+ if self._dpDataMode == DensePoseDataMode.IUV_DT:
+ # estimated labels and UV (default)
+ ipoints = densepose_data[0, py, px]
+ upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
+ vpoints = densepose_data[2, py, px] / 255.0
+ elif self._dpDataMode == DensePoseDataMode.IUV_GT:
+ # ground truth
+ ipoints = np.array(gt["dp_I"])
+ upoints = np.array(gt["dp_U"])
+ vpoints = np.array(gt["dp_V"])
+ elif self._dpDataMode == DensePoseDataMode.I_GT_UV_0:
+ # ground truth labels, UV = 0
+ ipoints = np.array(gt["dp_I"])
+ upoints = upoints * 0.0
+ vpoints = vpoints * 0.0
+ elif self._dpDataMode == DensePoseDataMode.I_GT_UV_DT:
+ # ground truth labels, estimated UV
+ ipoints = np.array(gt["dp_I"])
+ upoints = densepose_data[1, py, px] / 255.0 # convert from uint8 by /255.
+ vpoints = densepose_data[2, py, px] / 255.0
+ elif self._dpDataMode == DensePoseDataMode.I_DT_UV_0:
+ # estimated labels, UV = 0
+ ipoints = densepose_data[0, py, px]
+ upoints = upoints * 0.0
+ vpoints = vpoints * 0.0
+ else:
+ raise ValueError(f"Unknown data mode: {self._dpDataMode}")
+ return ipoints, upoints, vpoints
+
+ def computeOgps_single_pair(self, dt, gt, py, px, pt_mask):
+ if "densepose" in dt:
+ ipoints, upoints, vpoints = self.extract_iuv_from_quantized(dt, gt, py, px, pt_mask)
+ return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints)
+ elif "u" in dt:
+ ipoints, upoints, vpoints = self.extract_iuv_from_raw(dt, gt, py, px, pt_mask)
+ return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints)
+ elif "record_id" in dt:
+ assert (
+ self.multi_storage is not None
+ ), f"Storage record id encountered in detection {dt}, but no storage provided!"
+ record = self.multi_storage.get(dt["rank"], dt["record_id"])
+ record["bbox"] = dt["bbox"]
+ if "u" in record:
+ ipoints, upoints, vpoints = self.extract_iuv_from_raw(record, gt, py, px, pt_mask)
+ return self.computeOgps_single_pair_iuv(dt, gt, ipoints, upoints, vpoints)
+ elif "embedding" in record:
+ return self.computeOgps_single_pair_cse(
+ dt,
+ gt,
+ py,
+ px,
+ pt_mask,
+ record["coarse_segm"],
+ record["embedding"],
+ record["bbox"],
+ )
+ else:
+ raise Exception(f"Unknown record format: {record}")
+ elif "embedding" in dt:
+ return self.computeOgps_single_pair_cse(
+ dt, gt, py, px, pt_mask, dt["coarse_segm"], dt["embedding"], dt["bbox"]
+ )
+ raise Exception(f"Unknown detection format: {dt}")
+
+ def extract_iuv_from_quantized(self, dt, gt, py, px, pt_mask):
+ densepose_results_quantized = dt["densepose"]
+ ipoints, upoints, vpoints = self._extract_iuv(
+ densepose_results_quantized.labels_uv_uint8.numpy(), py, px, gt
+ )
+ ipoints[pt_mask == -1] = 0
+ return ipoints, upoints, vpoints
+
+ def extract_iuv_from_raw(self, dt, gt, py, px, pt_mask):
+ labels_dt = resample_fine_and_coarse_segm_tensors_to_bbox(
+ dt["fine_segm"].unsqueeze(0),
+ dt["coarse_segm"].unsqueeze(0),
+ dt["bbox"],
+ )
+ uv = resample_uv_tensors_to_bbox(
+ dt["u"].unsqueeze(0), dt["v"].unsqueeze(0), labels_dt.squeeze(0), dt["bbox"]
+ )
+ labels_uv_uint8 = torch.cat((labels_dt.byte(), (uv * 255).clamp(0, 255).byte()))
+ ipoints, upoints, vpoints = self._extract_iuv(labels_uv_uint8.numpy(), py, px, gt)
+ ipoints[pt_mask == -1] = 0
+ return ipoints, upoints, vpoints
+
+ def computeOgps_single_pair_iuv(self, dt, gt, ipoints, upoints, vpoints):
+ cVertsGT, ClosestVertsGTTransformed = self.findAllClosestVertsGT(gt)
+ cVerts = self.findAllClosestVertsUV(upoints, vpoints, ipoints)
+ # Get pairwise geodesic distances between gt and estimated mesh points.
+ dist = self.getDistancesUV(ClosestVertsGTTransformed, cVerts)
+ # Compute the Ogps measure.
+ # Find the mean geodesic normalization distance for
+ # each GT point, based on which part it is on.
+ Current_Mean_Distances = self.Mean_Distances[
+ self.CoarseParts[self.Part_ids[cVertsGT[cVertsGT > 0].astype(int) - 1]]
+ ]
+ return dist, Current_Mean_Distances
+
+ def computeOgps_single_pair_cse(
+ self, dt, gt, py, px, pt_mask, coarse_segm, embedding, bbox_xywh_abs
+ ):
+ # 0-based mesh vertex indices
+ cVertsGT = torch.as_tensor(gt["dp_vertex"], dtype=torch.int64)
+ # label for each pixel of the bbox, [H, W] tensor of long
+ labels_dt = resample_coarse_segm_tensor_to_bbox(
+ coarse_segm.unsqueeze(0), bbox_xywh_abs
+ ).squeeze(0)
+ x, y, w, h = bbox_xywh_abs
+ # embedding for each pixel of the bbox, [D, H, W] tensor of float32
+ embedding = F.interpolate(
+ embedding.unsqueeze(0), (int(h), int(w)), mode="bilinear", align_corners=False
+ ).squeeze(0)
+ # valid locations py, px
+ py_pt = torch.from_numpy(py[pt_mask > -1])
+ px_pt = torch.from_numpy(px[pt_mask > -1])
+ cVerts = torch.ones_like(cVertsGT) * -1
+ cVerts[pt_mask > -1] = self.findClosestVertsCse(
+ embedding, py_pt, px_pt, labels_dt, gt["ref_model"]
+ )
+ # Get pairwise geodesic distances between gt and estimated mesh points.
+ dist = self.getDistancesCse(cVertsGT, cVerts, gt["ref_model"])
+ # normalize distances
+ if (gt["ref_model"] == "smpl_27554") and ("dp_I" in gt):
+ Current_Mean_Distances = self.Mean_Distances[
+ self.CoarseParts[np.array(gt["dp_I"], dtype=int)]
+ ]
+ else:
+ Current_Mean_Distances = 0.255
+ return dist, Current_Mean_Distances
+
+ def computeOgps(self, imgId, catId):
+ p = self.params
+ # dimension here should be Nxm
+ g = self._gts[imgId, catId]
+ d = self._dts[imgId, catId]
+ inds = np.argsort([-d_["score"] for d_ in d], kind="mergesort")
+ d = [d[i] for i in inds]
+ if len(d) > p.maxDets[-1]:
+ d = d[0 : p.maxDets[-1]]
+ # if len(gts) == 0 and len(dts) == 0:
+ if len(g) == 0 or len(d) == 0:
+ return []
+ ious = np.zeros((len(d), len(g)))
+ # compute opgs between each detection and ground truth object
+ # sigma = self.sigma #0.255 # dist = 0.3m corresponds to ogps = 0.5
+ # 1 # dist = 0.3m corresponds to ogps = 0.96
+ # 1.45 # dist = 1.7m (person height) corresponds to ogps = 0.5)
+ for j, gt in enumerate(g):
+ if not gt["ignore"]:
+ g_ = gt["bbox"]
+ for i, dt in enumerate(d):
+ #
+ dy = int(dt["bbox"][3])
+ dx = int(dt["bbox"][2])
+ dp_x = np.array(gt["dp_x"]) * g_[2] / 255.0
+ dp_y = np.array(gt["dp_y"]) * g_[3] / 255.0
+ py = (dp_y + g_[1] - dt["bbox"][1]).astype(int)
+ px = (dp_x + g_[0] - dt["bbox"][0]).astype(int)
+ #
+ pts = np.zeros(len(px))
+ pts[px >= dx] = -1
+ pts[py >= dy] = -1
+ pts[px < 0] = -1
+ pts[py < 0] = -1
+ if len(pts) < 1:
+ ogps = 0.0
+ elif np.max(pts) == -1:
+ ogps = 0.0
+ else:
+ px[pts == -1] = 0
+ py[pts == -1] = 0
+ dists_between_matches, dist_norm_coeffs = self.computeOgps_single_pair(
+ dt, gt, py, px, pts
+ )
+ # Compute gps
+ ogps_values = np.exp(
+ -(dists_between_matches**2) / (2 * (dist_norm_coeffs**2))
+ )
+ #
+ ogps = np.mean(ogps_values) if len(ogps_values) > 0 else 0.0
+ ious[i, j] = ogps
+
+ gbb = [gt["bbox"] for gt in g]
+ dbb = [dt["bbox"] for dt in d]
+
+ # compute iou between each dt and gt region
+ iscrowd = [int(o.get("iscrowd", 0)) for o in g]
+ ious_bb = maskUtils.iou(dbb, gbb, iscrowd)
+ return ious, ious_bb
+
+ def evaluateImg(self, imgId, catId, aRng, maxDet):
+ """
+ perform evaluation for single category and image
+ :return: dict (single image results)
+ """
+
+ p = self.params
+ if p.useCats:
+ gt = self._gts[imgId, catId]
+ dt = self._dts[imgId, catId]
+ else:
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+ if len(gt) == 0 and len(dt) == 0:
+ return None
+
+ for g in gt:
+ # g['_ignore'] = g['ignore']
+ if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]):
+ g["_ignore"] = True
+ else:
+ g["_ignore"] = False
+
+ # sort dt highest score first, sort gt ignore last
+ gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort")
+ gt = [gt[i] for i in gtind]
+ dtind = np.argsort([-d["score"] for d in dt], kind="mergesort")
+ dt = [dt[i] for i in dtind[0:maxDet]]
+ iscrowd = [int(o.get("iscrowd", 0)) for o in gt]
+ # load computed ious
+ if p.iouType == "densepose":
+ # print('Checking the length', len(self.ious[imgId, catId]))
+ # if len(self.ious[imgId, catId]) == 0:
+ # print(self.ious[imgId, catId])
+ ious = (
+ self.ious[imgId, catId][0][:, gtind]
+ if len(self.ious[imgId, catId]) > 0
+ else self.ious[imgId, catId]
+ )
+ ioubs = (
+ self.ious[imgId, catId][1][:, gtind]
+ if len(self.ious[imgId, catId]) > 0
+ else self.ious[imgId, catId]
+ )
+ if self._dpEvalMode in {DensePoseEvalMode.GPSM, DensePoseEvalMode.IOU}:
+ iousM = (
+ self.real_ious[imgId, catId][:, gtind]
+ if len(self.real_ious[imgId, catId]) > 0
+ else self.real_ious[imgId, catId]
+ )
+ else:
+ ious = (
+ self.ious[imgId, catId][:, gtind]
+ if len(self.ious[imgId, catId]) > 0
+ else self.ious[imgId, catId]
+ )
+
+ T = len(p.iouThrs)
+ G = len(gt)
+ D = len(dt)
+ gtm = np.zeros((T, G))
+ dtm = np.zeros((T, D))
+ gtIg = np.array([g["_ignore"] for g in gt])
+ dtIg = np.zeros((T, D))
+ if np.all(gtIg) and p.iouType == "densepose":
+ dtIg = np.logical_or(dtIg, True)
+
+ if len(ious) > 0: # and not p.iouType == 'densepose':
+ for tind, t in enumerate(p.iouThrs):
+ for dind, d in enumerate(dt):
+ # information about best match so far (m=-1 -> unmatched)
+ iou = min([t, 1 - 1e-10])
+ m = -1
+ for gind, _g in enumerate(gt):
+ # if this gt already matched, and not a crowd, continue
+ if gtm[tind, gind] > 0 and not iscrowd[gind]:
+ continue
+ # if dt matched to reg gt, and on ignore gt, stop
+ if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
+ break
+ if p.iouType == "densepose":
+ if self._dpEvalMode == DensePoseEvalMode.GPSM:
+ new_iou = np.sqrt(iousM[dind, gind] * ious[dind, gind])
+ elif self._dpEvalMode == DensePoseEvalMode.IOU:
+ new_iou = iousM[dind, gind]
+ elif self._dpEvalMode == DensePoseEvalMode.GPS:
+ new_iou = ious[dind, gind]
+ else:
+ new_iou = ious[dind, gind]
+ if new_iou < iou:
+ continue
+ if new_iou == 0.0:
+ continue
+ # if match successful and best so far, store appropriately
+ iou = new_iou
+ m = gind
+ # if match made store id of match for both dt and gt
+ if m == -1:
+ continue
+ dtIg[tind, dind] = gtIg[m]
+ dtm[tind, dind] = gt[m]["id"]
+ gtm[tind, m] = d["id"]
+
+ if p.iouType == "densepose":
+ if not len(ioubs) == 0:
+ for dind, d in enumerate(dt):
+ # information about best match so far (m=-1 -> unmatched)
+ if dtm[tind, dind] == 0:
+ ioub = 0.8
+ m = -1
+ for gind, _g in enumerate(gt):
+ # if this gt already matched, and not a crowd, continue
+ if gtm[tind, gind] > 0 and not iscrowd[gind]:
+ continue
+ # continue to next gt unless better match made
+ if ioubs[dind, gind] < ioub:
+ continue
+ # if match successful and best so far, store appropriately
+ ioub = ioubs[dind, gind]
+ m = gind
+ # if match made store id of match for both dt and gt
+ if m > -1:
+ dtIg[:, dind] = gtIg[m]
+ if gtIg[m]:
+ dtm[tind, dind] = gt[m]["id"]
+ gtm[tind, m] = d["id"]
+ # set unmatched detections outside of area range to ignore
+ a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape((1, len(dt)))
+ dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0)))
+ # store results for given image and category
+ # print('Done with the function', len(self.ious[imgId, catId]))
+ return {
+ "image_id": imgId,
+ "category_id": catId,
+ "aRng": aRng,
+ "maxDet": maxDet,
+ "dtIds": [d["id"] for d in dt],
+ "gtIds": [g["id"] for g in gt],
+ "dtMatches": dtm,
+ "gtMatches": gtm,
+ "dtScores": [d["score"] for d in dt],
+ "gtIgnore": gtIg,
+ "dtIgnore": dtIg,
+ }
+
+ def accumulate(self, p=None):
+ """
+ Accumulate per image evaluation results and store the result in self.eval
+ :param p: input params for evaluation
+ :return: None
+ """
+ logger.info("Accumulating evaluation results...")
+ tic = time.time()
+ if not self.evalImgs:
+ logger.info("Please run evaluate() first")
+ # allows input customized parameters
+ if p is None:
+ p = self.params
+ p.catIds = p.catIds if p.useCats == 1 else [-1]
+ T = len(p.iouThrs)
+ R = len(p.recThrs)
+ K = len(p.catIds) if p.useCats else 1
+ A = len(p.areaRng)
+ M = len(p.maxDets)
+ precision = -(np.ones((T, R, K, A, M))) # -1 for the precision of absent categories
+ recall = -(np.ones((T, K, A, M)))
+
+ # create dictionary for future indexing
+ logger.info("Categories: {}".format(p.catIds))
+ _pe = self._paramsEval
+ catIds = _pe.catIds if _pe.useCats else [-1]
+ setK = set(catIds)
+ setA = set(map(tuple, _pe.areaRng))
+ setM = set(_pe.maxDets)
+ setI = set(_pe.imgIds)
+ # get inds to evaluate
+ k_list = [n for n, k in enumerate(p.catIds) if k in setK]
+ m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+ a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+ i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
+ I0 = len(_pe.imgIds)
+ A0 = len(_pe.areaRng)
+ # retrieve E at each category, area range, and max number of detections
+ for k, k0 in enumerate(k_list):
+ Nk = k0 * A0 * I0
+ for a, a0 in enumerate(a_list):
+ Na = a0 * I0
+ for m, maxDet in enumerate(m_list):
+ E = [self.evalImgs[Nk + Na + i] for i in i_list]
+ E = [e for e in E if e is not None]
+ if len(E) == 0:
+ continue
+ dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
+
+ # different sorting method generates slightly different results.
+ # mergesort is used to be consistent as Matlab implementation.
+ inds = np.argsort(-dtScores, kind="mergesort")
+
+ dtm = np.concatenate([e["dtMatches"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ dtIg = np.concatenate([e["dtIgnore"][:, 0:maxDet] for e in E], axis=1)[:, inds]
+ gtIg = np.concatenate([e["gtIgnore"] for e in E])
+ npig = np.count_nonzero(gtIg == 0)
+ if npig == 0:
+ continue
+ tps = np.logical_and(dtm, np.logical_not(dtIg))
+ fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
+ tp_sum = np.cumsum(tps, axis=1).astype(dtype=float)
+ fp_sum = np.cumsum(fps, axis=1).astype(dtype=float)
+ for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+ tp = np.array(tp)
+ fp = np.array(fp)
+ nd = len(tp)
+ rc = tp / npig
+ pr = tp / (fp + tp + np.spacing(1))
+ q = np.zeros((R,))
+
+ if nd:
+ recall[t, k, a, m] = rc[-1]
+ else:
+ recall[t, k, a, m] = 0
+
+ # numpy is slow without cython optimization for accessing elements
+ # use python array gets significant speed improvement
+ pr = pr.tolist()
+ q = q.tolist()
+
+ for i in range(nd - 1, 0, -1):
+ if pr[i] > pr[i - 1]:
+ pr[i - 1] = pr[i]
+
+ inds = np.searchsorted(rc, p.recThrs, side="left")
+ try:
+ for ri, pi in enumerate(inds):
+ q[ri] = pr[pi]
+ except Exception:
+ pass
+ precision[t, :, k, a, m] = np.array(q)
+ logger.info(
+ "Final: max precision {}, min precision {}".format(np.max(precision), np.min(precision))
+ )
+ self.eval = {
+ "params": p,
+ "counts": [T, R, K, A, M],
+ "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+ "precision": precision,
+ "recall": recall,
+ }
+ toc = time.time()
+ logger.info("DONE (t={:0.2f}s).".format(toc - tic))
+
+ def summarize(self):
+ """
+ Compute and display summary metrics for evaluation results.
+ Note this function can *only* be applied on the default parameter setting
+ """
+
+ def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
+ p = self.params
+ iStr = " {:<18} {} @[ {}={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
+ titleStr = "Average Precision" if ap == 1 else "Average Recall"
+ typeStr = "(AP)" if ap == 1 else "(AR)"
+ measure = "IoU"
+ if self.params.iouType == "keypoints":
+ measure = "OKS"
+ elif self.params.iouType == "densepose":
+ measure = "OGPS"
+ iouStr = (
+ "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+ if iouThr is None
+ else "{:0.2f}".format(iouThr)
+ )
+
+ aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+ mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+ if ap == 1:
+ # dimension of precision: [TxRxKxAxM]
+ s = self.eval["precision"]
+ # IoU
+ if iouThr is not None:
+ t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
+ s = s[t]
+ s = s[:, :, :, aind, mind]
+ else:
+ # dimension of recall: [TxKxAxM]
+ s = self.eval["recall"]
+ if iouThr is not None:
+ t = np.where(np.abs(iouThr - p.iouThrs) < 0.001)[0]
+ s = s[t]
+ s = s[:, :, aind, mind]
+ if len(s[s > -1]) == 0:
+ mean_s = -1
+ else:
+ mean_s = np.mean(s[s > -1])
+ logger.info(iStr.format(titleStr, typeStr, measure, iouStr, areaRng, maxDets, mean_s))
+ return mean_s
+
+ def _summarizeDets():
+ stats = np.zeros((12,))
+ stats[0] = _summarize(1)
+ stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
+ stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
+ stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
+ stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
+ stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
+ stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+ stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+ stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+ stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
+ stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
+ stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
+ return stats
+
+ def _summarizeKps():
+ stats = np.zeros((10,))
+ stats[0] = _summarize(1, maxDets=20)
+ stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
+ stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
+ stats[3] = _summarize(1, maxDets=20, areaRng="medium")
+ stats[4] = _summarize(1, maxDets=20, areaRng="large")
+ stats[5] = _summarize(0, maxDets=20)
+ stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
+ stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
+ stats[8] = _summarize(0, maxDets=20, areaRng="medium")
+ stats[9] = _summarize(0, maxDets=20, areaRng="large")
+ return stats
+
+ def _summarizeUvs():
+ stats = [_summarize(1, maxDets=self.params.maxDets[0])]
+ min_threshold = self.params.iouThrs.min()
+ if min_threshold <= 0.201:
+ stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.2)]
+ if min_threshold <= 0.301:
+ stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.3)]
+ if min_threshold <= 0.401:
+ stats += [_summarize(1, maxDets=self.params.maxDets[0], iouThr=0.4)]
+ stats += [
+ _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5),
+ _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75),
+ _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium"),
+ _summarize(1, maxDets=self.params.maxDets[0], areaRng="large"),
+ _summarize(0, maxDets=self.params.maxDets[0]),
+ _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5),
+ _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75),
+ _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium"),
+ _summarize(0, maxDets=self.params.maxDets[0], areaRng="large"),
+ ]
+ return np.array(stats)
+
+ def _summarizeUvsOld():
+ stats = np.zeros((18,))
+ stats[0] = _summarize(1, maxDets=self.params.maxDets[0])
+ stats[1] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.5)
+ stats[2] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.55)
+ stats[3] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.60)
+ stats[4] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.65)
+ stats[5] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.70)
+ stats[6] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.75)
+ stats[7] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.80)
+ stats[8] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.85)
+ stats[9] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.90)
+ stats[10] = _summarize(1, maxDets=self.params.maxDets[0], iouThr=0.95)
+ stats[11] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="medium")
+ stats[12] = _summarize(1, maxDets=self.params.maxDets[0], areaRng="large")
+ stats[13] = _summarize(0, maxDets=self.params.maxDets[0])
+ stats[14] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.5)
+ stats[15] = _summarize(0, maxDets=self.params.maxDets[0], iouThr=0.75)
+ stats[16] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="medium")
+ stats[17] = _summarize(0, maxDets=self.params.maxDets[0], areaRng="large")
+ return stats
+
+ if not self.eval:
+ raise Exception("Please run accumulate() first")
+ iouType = self.params.iouType
+ if iouType in ["segm", "bbox"]:
+ summarize = _summarizeDets
+ elif iouType in ["keypoints"]:
+ summarize = _summarizeKps
+ elif iouType in ["densepose"]:
+ summarize = _summarizeUvs
+ self.stats = summarize()
+
+ def __str__(self):
+ self.summarize()
+
+ # ================ functions for dense pose ==============================
+ def findAllClosestVertsUV(self, U_points, V_points, Index_points):
+ ClosestVerts = np.ones(Index_points.shape) * -1
+ for i in np.arange(24):
+ #
+ if (i + 1) in Index_points:
+ UVs = np.array(
+ [U_points[Index_points == (i + 1)], V_points[Index_points == (i + 1)]]
+ )
+ Current_Part_UVs = self.Part_UVs[i]
+ Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
+ D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
+ ClosestVerts[Index_points == (i + 1)] = Current_Part_ClosestVertInds[
+ np.argmin(D, axis=0)
+ ]
+ ClosestVertsTransformed = self.PDIST_transform[ClosestVerts.astype(int) - 1]
+ ClosestVertsTransformed[ClosestVerts < 0] = 0
+ return ClosestVertsTransformed
+
+ def findClosestVertsCse(self, embedding, py, px, mask, mesh_name):
+ mesh_vertex_embeddings = self.embedder(mesh_name)
+ pixel_embeddings = embedding[:, py, px].t().to(device="cuda")
+ mask_vals = mask[py, px]
+ edm = squared_euclidean_distance_matrix(pixel_embeddings, mesh_vertex_embeddings)
+ vertex_indices = edm.argmin(dim=1).cpu()
+ vertex_indices[mask_vals <= 0] = -1
+ return vertex_indices
+
+ def findAllClosestVertsGT(self, gt):
+ #
+ I_gt = np.array(gt["dp_I"])
+ U_gt = np.array(gt["dp_U"])
+ V_gt = np.array(gt["dp_V"])
+ #
+ # print(I_gt)
+ #
+ ClosestVertsGT = np.ones(I_gt.shape) * -1
+ for i in np.arange(24):
+ if (i + 1) in I_gt:
+ UVs = np.array([U_gt[I_gt == (i + 1)], V_gt[I_gt == (i + 1)]])
+ Current_Part_UVs = self.Part_UVs[i]
+ Current_Part_ClosestVertInds = self.Part_ClosestVertInds[i]
+ D = ssd.cdist(Current_Part_UVs.transpose(), UVs.transpose()).squeeze()
+ ClosestVertsGT[I_gt == (i + 1)] = Current_Part_ClosestVertInds[np.argmin(D, axis=0)]
+ #
+ ClosestVertsGTTransformed = self.PDIST_transform[ClosestVertsGT.astype(int) - 1]
+ ClosestVertsGTTransformed[ClosestVertsGT < 0] = 0
+ return ClosestVertsGT, ClosestVertsGTTransformed
+
+ def getDistancesCse(self, cVertsGT, cVerts, mesh_name):
+ geodists_vertices = torch.ones_like(cVertsGT) * float("inf")
+ selected = (cVertsGT >= 0) * (cVerts >= 0)
+ mesh = create_mesh(mesh_name, "cpu")
+ geodists_vertices[selected] = mesh.geodists[cVertsGT[selected], cVerts[selected]]
+ return geodists_vertices.numpy()
+
+ def getDistancesUV(self, cVertsGT, cVerts):
+ #
+ n = 27554
+ dists = []
+ for d in range(len(cVertsGT)):
+ if cVertsGT[d] > 0:
+ if cVerts[d] > 0:
+ i = cVertsGT[d] - 1
+ j = cVerts[d] - 1
+ if j == i:
+ dists.append(0)
+ elif j > i:
+ ccc = i
+ i = j
+ j = ccc
+ i = n - i - 1
+ j = n - j - 1
+ k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
+ k = (n * n - n) / 2 - k - 1
+ dists.append(self.Pdist_matrix[int(k)][0])
+ else:
+ i = n - i - 1
+ j = n - j - 1
+ k = (n * (n - 1) / 2) - (n - i) * ((n - i) - 1) / 2 + j - i - 1
+ k = (n * n - n) / 2 - k - 1
+ dists.append(self.Pdist_matrix[int(k)][0])
+ else:
+ dists.append(np.inf)
+ return np.atleast_1d(np.array(dists).squeeze())
+
+
+class Params:
+ """
+ Params for coco evaluation api
+ """
+
+ def setDetParams(self):
+ self.imgIds = []
+ self.catIds = []
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
+ self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
+ self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
+ self.maxDets = [1, 10, 100]
+ self.areaRng = [
+ [0**2, 1e5**2],
+ [0**2, 32**2],
+ [32**2, 96**2],
+ [96**2, 1e5**2],
+ ]
+ self.areaRngLbl = ["all", "small", "medium", "large"]
+ self.useCats = 1
+
+ def setKpParams(self):
+ self.imgIds = []
+ self.catIds = []
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
+ self.iouThrs = np.linspace(0.5, 0.95, np.round((0.95 - 0.5) / 0.05) + 1, endpoint=True)
+ self.recThrs = np.linspace(0.0, 1.00, np.round((1.00 - 0.0) / 0.01) + 1, endpoint=True)
+ self.maxDets = [20]
+ self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]]
+ self.areaRngLbl = ["all", "medium", "large"]
+ self.useCats = 1
+
+ def setUvParams(self):
+ self.imgIds = []
+ self.catIds = []
+ self.iouThrs = np.linspace(0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True)
+ self.recThrs = np.linspace(0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True)
+ self.maxDets = [20]
+ self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]]
+ self.areaRngLbl = ["all", "medium", "large"]
+ self.useCats = 1
+
+ def __init__(self, iouType="segm"):
+ if iouType == "segm" or iouType == "bbox":
+ self.setDetParams()
+ elif iouType == "keypoints":
+ self.setKpParams()
+ elif iouType == "densepose":
+ self.setUvParams()
+ else:
+ raise Exception("iouType not supported")
+ self.iouType = iouType
+ # useSegm is deprecated
+ self.useSegm = None
diff --git a/densepose/evaluation/evaluator.py b/densepose/evaluation/evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..803d3dccbe60a637e349a22e3364f3c0b5f4f1e5
--- /dev/null
+++ b/densepose/evaluation/evaluator.py
@@ -0,0 +1,423 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import contextlib
+import copy
+import io
+import itertools
+import logging
+import numpy as np
+import os
+from collections import OrderedDict
+from typing import Dict, Iterable, List, Optional
+import pycocotools.mask as mask_utils
+import torch
+from pycocotools.coco import COCO
+from tabulate import tabulate
+
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.evaluation import DatasetEvaluator
+from detectron2.structures import BoxMode
+from detectron2.utils.comm import gather, get_rank, is_main_process, synchronize
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+from densepose.converters import ToChartResultConverter, ToMaskConverter
+from densepose.data.datasets.coco import maybe_filter_and_map_categories_cocoapi
+from densepose.structures import (
+ DensePoseChartPredictorOutput,
+ DensePoseEmbeddingPredictorOutput,
+ quantize_densepose_chart_result,
+)
+
+from .densepose_coco_evaluation import DensePoseCocoEval, DensePoseEvalMode
+from .mesh_alignment_evaluator import MeshAlignmentEvaluator
+from .tensor_storage import (
+ SingleProcessFileTensorStorage,
+ SingleProcessRamTensorStorage,
+ SingleProcessTensorStorage,
+ SizeData,
+ storage_gather,
+)
+
+
+class DensePoseCOCOEvaluator(DatasetEvaluator):
+ def __init__(
+ self,
+ dataset_name,
+ distributed,
+ output_dir=None,
+ evaluator_type: str = "iuv",
+ min_iou_threshold: float = 0.5,
+ storage: Optional[SingleProcessTensorStorage] = None,
+ embedder=None,
+ should_evaluate_mesh_alignment: bool = False,
+ mesh_alignment_mesh_names: Optional[List[str]] = None,
+ ):
+ self._embedder = embedder
+ self._distributed = distributed
+ self._output_dir = output_dir
+ self._evaluator_type = evaluator_type
+ self._storage = storage
+ self._should_evaluate_mesh_alignment = should_evaluate_mesh_alignment
+
+ assert not (
+ should_evaluate_mesh_alignment and embedder is None
+ ), "Mesh alignment evaluation is activated, but no vertex embedder provided!"
+ if should_evaluate_mesh_alignment:
+ self._mesh_alignment_evaluator = MeshAlignmentEvaluator(
+ embedder,
+ mesh_alignment_mesh_names,
+ )
+
+ self._cpu_device = torch.device("cpu")
+ self._logger = logging.getLogger(__name__)
+
+ self._metadata = MetadataCatalog.get(dataset_name)
+ self._min_threshold = min_iou_threshold
+ json_file = PathManager.get_local_path(self._metadata.json_file)
+ with contextlib.redirect_stdout(io.StringIO()):
+ self._coco_api = COCO(json_file)
+ maybe_filter_and_map_categories_cocoapi(dataset_name, self._coco_api)
+
+ def reset(self):
+ self._predictions = []
+
+ def process(self, inputs, outputs):
+ """
+ Args:
+ inputs: the inputs to a COCO model (e.g., GeneralizedRCNN).
+ It is a list of dict. Each dict corresponds to an image and
+ contains keys like "height", "width", "file_name", "image_id".
+ outputs: the outputs of a COCO model. It is a list of dicts with key
+ "instances" that contains :class:`Instances`.
+ The :class:`Instances` object needs to have `densepose` field.
+ """
+ for input, output in zip(inputs, outputs):
+ instances = output["instances"].to(self._cpu_device)
+ if not instances.has("pred_densepose"):
+ continue
+ prediction_list = prediction_to_dict(
+ instances,
+ input["image_id"],
+ self._embedder,
+ self._metadata.class_to_mesh_name,
+ self._storage is not None,
+ )
+ if self._storage is not None:
+ for prediction_dict in prediction_list:
+ dict_to_store = {}
+ for field_name in self._storage.data_schema:
+ dict_to_store[field_name] = prediction_dict[field_name]
+ record_id = self._storage.put(dict_to_store)
+ prediction_dict["record_id"] = record_id
+ prediction_dict["rank"] = get_rank()
+ for field_name in self._storage.data_schema:
+ del prediction_dict[field_name]
+ self._predictions.extend(prediction_list)
+
+ def evaluate(self, img_ids=None):
+ if self._distributed:
+ synchronize()
+ predictions = gather(self._predictions)
+ predictions = list(itertools.chain(*predictions))
+ else:
+ predictions = self._predictions
+
+ multi_storage = storage_gather(self._storage) if self._storage is not None else None
+
+ if not is_main_process():
+ return
+ return copy.deepcopy(self._eval_predictions(predictions, multi_storage, img_ids))
+
+ def _eval_predictions(self, predictions, multi_storage=None, img_ids=None):
+ """
+ Evaluate predictions on densepose.
+ Return results with the metrics of the tasks.
+ """
+ self._logger.info("Preparing results for COCO format ...")
+
+ if self._output_dir:
+ PathManager.mkdirs(self._output_dir)
+ file_path = os.path.join(self._output_dir, "coco_densepose_predictions.pth")
+ with PathManager.open(file_path, "wb") as f:
+ torch.save(predictions, f)
+
+ self._logger.info("Evaluating predictions ...")
+ res = OrderedDict()
+ results_gps, results_gpsm, results_segm = _evaluate_predictions_on_coco(
+ self._coco_api,
+ predictions,
+ multi_storage,
+ self._embedder,
+ class_names=self._metadata.get("thing_classes"),
+ min_threshold=self._min_threshold,
+ img_ids=img_ids,
+ )
+ res["densepose_gps"] = results_gps
+ res["densepose_gpsm"] = results_gpsm
+ res["densepose_segm"] = results_segm
+ if self._should_evaluate_mesh_alignment:
+ res["densepose_mesh_alignment"] = self._evaluate_mesh_alignment()
+ return res
+
+ def _evaluate_mesh_alignment(self):
+ self._logger.info("Mesh alignment evaluation ...")
+ mean_ge, mean_gps, per_mesh_metrics = self._mesh_alignment_evaluator.evaluate()
+ results = {
+ "GE": mean_ge * 100,
+ "GPS": mean_gps * 100,
+ }
+ mesh_names = set()
+ for metric_name in per_mesh_metrics:
+ for mesh_name, value in per_mesh_metrics[metric_name].items():
+ results[f"{metric_name}-{mesh_name}"] = value * 100
+ mesh_names.add(mesh_name)
+ self._print_mesh_alignment_results(results, mesh_names)
+ return results
+
+ def _print_mesh_alignment_results(self, results: Dict[str, float], mesh_names: Iterable[str]):
+ self._logger.info("Evaluation results for densepose, mesh alignment:")
+ self._logger.info(f'| {"Mesh":13s} | {"GErr":7s} | {"GPS":7s} |')
+ self._logger.info("| :-----------: | :-----: | :-----: |")
+ for mesh_name in mesh_names:
+ ge_key = f"GE-{mesh_name}"
+ ge_str = f"{results[ge_key]:.4f}" if ge_key in results else " "
+ gps_key = f"GPS-{mesh_name}"
+ gps_str = f"{results[gps_key]:.4f}" if gps_key in results else " "
+ self._logger.info(f"| {mesh_name:13s} | {ge_str:7s} | {gps_str:7s} |")
+ self._logger.info("| :-------------------------------: |")
+ ge_key = "GE"
+ ge_str = f"{results[ge_key]:.4f}" if ge_key in results else " "
+ gps_key = "GPS"
+ gps_str = f"{results[gps_key]:.4f}" if gps_key in results else " "
+ self._logger.info(f'| {"MEAN":13s} | {ge_str:7s} | {gps_str:7s} |')
+
+
+def prediction_to_dict(instances, img_id, embedder, class_to_mesh_name, use_storage):
+ """
+ Args:
+ instances (Instances): the output of the model
+ img_id (str): the image id in COCO
+
+ Returns:
+ list[dict]: the results in densepose evaluation format
+ """
+ scores = instances.scores.tolist()
+ classes = instances.pred_classes.tolist()
+ raw_boxes_xywh = BoxMode.convert(
+ instances.pred_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+ )
+
+ if isinstance(instances.pred_densepose, DensePoseEmbeddingPredictorOutput):
+ results_densepose = densepose_cse_predictions_to_dict(
+ instances, embedder, class_to_mesh_name, use_storage
+ )
+ elif isinstance(instances.pred_densepose, DensePoseChartPredictorOutput):
+ if not use_storage:
+ results_densepose = densepose_chart_predictions_to_dict(instances)
+ else:
+ results_densepose = densepose_chart_predictions_to_storage_dict(instances)
+
+ results = []
+ for k in range(len(instances)):
+ result = {
+ "image_id": img_id,
+ "category_id": classes[k],
+ "bbox": raw_boxes_xywh[k].tolist(),
+ "score": scores[k],
+ }
+ results.append({**result, **results_densepose[k]})
+ return results
+
+
+def densepose_chart_predictions_to_dict(instances):
+ segmentations = ToMaskConverter.convert(
+ instances.pred_densepose, instances.pred_boxes, instances.image_size
+ )
+
+ results = []
+ for k in range(len(instances)):
+ densepose_results_quantized = quantize_densepose_chart_result(
+ ToChartResultConverter.convert(instances.pred_densepose[k], instances.pred_boxes[k])
+ )
+ densepose_results_quantized.labels_uv_uint8 = (
+ densepose_results_quantized.labels_uv_uint8.cpu()
+ )
+ segmentation = segmentations.tensor[k]
+ segmentation_encoded = mask_utils.encode(
+ np.require(segmentation.numpy(), dtype=np.uint8, requirements=["F"])
+ )
+ segmentation_encoded["counts"] = segmentation_encoded["counts"].decode("utf-8")
+ result = {
+ "densepose": densepose_results_quantized,
+ "segmentation": segmentation_encoded,
+ }
+ results.append(result)
+ return results
+
+
+def densepose_chart_predictions_to_storage_dict(instances):
+ results = []
+ for k in range(len(instances)):
+ densepose_predictor_output = instances.pred_densepose[k]
+ result = {
+ "coarse_segm": densepose_predictor_output.coarse_segm.squeeze(0).cpu(),
+ "fine_segm": densepose_predictor_output.fine_segm.squeeze(0).cpu(),
+ "u": densepose_predictor_output.u.squeeze(0).cpu(),
+ "v": densepose_predictor_output.v.squeeze(0).cpu(),
+ }
+ results.append(result)
+ return results
+
+
+def densepose_cse_predictions_to_dict(instances, embedder, class_to_mesh_name, use_storage):
+ results = []
+ for k in range(len(instances)):
+ cse = instances.pred_densepose[k]
+ results.append(
+ {
+ "coarse_segm": cse.coarse_segm[0].cpu(),
+ "embedding": cse.embedding[0].cpu(),
+ }
+ )
+ return results
+
+
+def _evaluate_predictions_on_coco(
+ coco_gt,
+ coco_results,
+ multi_storage=None,
+ embedder=None,
+ class_names=None,
+ min_threshold: float = 0.5,
+ img_ids=None,
+):
+ logger = logging.getLogger(__name__)
+
+ densepose_metrics = _get_densepose_metrics(min_threshold)
+ if len(coco_results) == 0: # cocoapi does not handle empty results very well
+ logger.warn("No predictions from the model! Set scores to -1")
+ results_gps = {metric: -1 for metric in densepose_metrics}
+ results_gpsm = {metric: -1 for metric in densepose_metrics}
+ results_segm = {metric: -1 for metric in densepose_metrics}
+ return results_gps, results_gpsm, results_segm
+
+ coco_dt = coco_gt.loadRes(coco_results)
+
+ results = []
+ for eval_mode_name in ["GPS", "GPSM", "IOU"]:
+ eval_mode = getattr(DensePoseEvalMode, eval_mode_name)
+ coco_eval = DensePoseCocoEval(
+ coco_gt, coco_dt, "densepose", multi_storage, embedder, dpEvalMode=eval_mode
+ )
+ result = _derive_results_from_coco_eval(
+ coco_eval, eval_mode_name, densepose_metrics, class_names, min_threshold, img_ids
+ )
+ results.append(result)
+ return results
+
+
+def _get_densepose_metrics(min_threshold: float = 0.5):
+ metrics = ["AP"]
+ if min_threshold <= 0.201:
+ metrics += ["AP20"]
+ if min_threshold <= 0.301:
+ metrics += ["AP30"]
+ if min_threshold <= 0.401:
+ metrics += ["AP40"]
+ metrics.extend(["AP50", "AP75", "APm", "APl", "AR", "AR50", "AR75", "ARm", "ARl"])
+ return metrics
+
+
+def _derive_results_from_coco_eval(
+ coco_eval, eval_mode_name, metrics, class_names, min_threshold: float, img_ids
+):
+ if img_ids is not None:
+ coco_eval.params.imgIds = img_ids
+ coco_eval.params.iouThrs = np.linspace(
+ min_threshold, 0.95, int(np.round((0.95 - min_threshold) / 0.05)) + 1, endpoint=True
+ )
+ coco_eval.evaluate()
+ coco_eval.accumulate()
+ coco_eval.summarize()
+ results = {metric: float(coco_eval.stats[idx] * 100) for idx, metric in enumerate(metrics)}
+ logger = logging.getLogger(__name__)
+ logger.info(
+ f"Evaluation results for densepose, {eval_mode_name} metric: \n"
+ + create_small_table(results)
+ )
+ if class_names is None or len(class_names) <= 1:
+ return results
+
+ # Compute per-category AP, the same way as it is done in D2
+ # (see detectron2/evaluation/coco_evaluation.py):
+ precisions = coco_eval.eval["precision"]
+ # precision has dims (iou, recall, cls, area range, max dets)
+ assert len(class_names) == precisions.shape[2]
+
+ results_per_category = []
+ for idx, name in enumerate(class_names):
+ # area range index 0: all area ranges
+ # max dets index -1: typically 100 per image
+ precision = precisions[:, :, idx, 0, -1]
+ precision = precision[precision > -1]
+ ap = np.mean(precision) if precision.size else float("nan")
+ results_per_category.append((f"{name}", float(ap * 100)))
+
+ # tabulate it
+ n_cols = min(6, len(results_per_category) * 2)
+ results_flatten = list(itertools.chain(*results_per_category))
+ results_2d = itertools.zip_longest(*[results_flatten[i::n_cols] for i in range(n_cols)])
+ table = tabulate(
+ results_2d,
+ tablefmt="pipe",
+ floatfmt=".3f",
+ headers=["category", "AP"] * (n_cols // 2),
+ numalign="left",
+ )
+ logger.info(f"Per-category {eval_mode_name} AP: \n" + table)
+
+ results.update({"AP-" + name: ap for name, ap in results_per_category})
+ return results
+
+
+def build_densepose_evaluator_storage(cfg: CfgNode, output_folder: str):
+ storage_spec = cfg.DENSEPOSE_EVALUATION.STORAGE
+ if storage_spec == "none":
+ return None
+ evaluator_type = cfg.DENSEPOSE_EVALUATION.TYPE
+ # common output tensor sizes
+ hout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
+ wout = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
+ n_csc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+ # specific output tensors
+ if evaluator_type == "iuv":
+ n_fsc = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+ schema = {
+ "coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)),
+ "fine_segm": SizeData(dtype="float32", shape=(n_fsc, hout, wout)),
+ "u": SizeData(dtype="float32", shape=(n_fsc, hout, wout)),
+ "v": SizeData(dtype="float32", shape=(n_fsc, hout, wout)),
+ }
+ elif evaluator_type == "cse":
+ embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
+ schema = {
+ "coarse_segm": SizeData(dtype="float32", shape=(n_csc, hout, wout)),
+ "embedding": SizeData(dtype="float32", shape=(embed_size, hout, wout)),
+ }
+ else:
+ raise ValueError(f"Unknown evaluator type: {evaluator_type}")
+ # storage types
+ if storage_spec == "ram":
+ storage = SingleProcessRamTensorStorage(schema, io.BytesIO())
+ elif storage_spec == "file":
+ fpath = os.path.join(output_folder, f"DensePoseEvaluatorStorage.{get_rank()}.bin")
+ PathManager.mkdirs(output_folder)
+ storage = SingleProcessFileTensorStorage(schema, fpath, "wb")
+ else:
+ raise ValueError(f"Unknown storage specification: {storage_spec}")
+ return storage
diff --git a/densepose/evaluation/mesh_alignment_evaluator.py b/densepose/evaluation/mesh_alignment_evaluator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6c76f3cf2d54250f7fa1d9a2a3a1d2c60eb0aad
--- /dev/null
+++ b/densepose/evaluation/mesh_alignment_evaluator.py
@@ -0,0 +1,68 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import json
+import logging
+from typing import List, Optional
+import torch
+from torch import nn
+
+from detectron2.utils.file_io import PathManager
+
+from densepose.structures.mesh import create_mesh
+
+
+class MeshAlignmentEvaluator:
+ """
+ Class for evaluation of 3D mesh alignment based on the learned vertex embeddings
+ """
+
+ def __init__(self, embedder: nn.Module, mesh_names: Optional[List[str]]):
+ self.embedder = embedder
+ # use the provided mesh names if not None and not an empty list
+ self.mesh_names = mesh_names if mesh_names else embedder.mesh_names
+ self.logger = logging.getLogger(__name__)
+ with PathManager.open(
+ "https://dl.fbaipublicfiles.com/densepose/data/cse/mesh_keyvertices_v0.json", "r"
+ ) as f:
+ self.mesh_keyvertices = json.load(f)
+
+ def evaluate(self):
+ ge_per_mesh = {}
+ gps_per_mesh = {}
+ for mesh_name_1 in self.mesh_names:
+ avg_errors = []
+ avg_gps = []
+ embeddings_1 = self.embedder(mesh_name_1)
+ keyvertices_1 = self.mesh_keyvertices[mesh_name_1]
+ keyvertex_names_1 = list(keyvertices_1.keys())
+ keyvertex_indices_1 = [keyvertices_1[name] for name in keyvertex_names_1]
+ for mesh_name_2 in self.mesh_names:
+ if mesh_name_1 == mesh_name_2:
+ continue
+ embeddings_2 = self.embedder(mesh_name_2)
+ keyvertices_2 = self.mesh_keyvertices[mesh_name_2]
+ sim_matrix_12 = embeddings_1[keyvertex_indices_1].mm(embeddings_2.T)
+ vertices_2_matching_keyvertices_1 = sim_matrix_12.argmax(axis=1)
+ mesh_2 = create_mesh(mesh_name_2, embeddings_2.device)
+ geodists = mesh_2.geodists[
+ vertices_2_matching_keyvertices_1,
+ [keyvertices_2[name] for name in keyvertex_names_1],
+ ]
+ Current_Mean_Distances = 0.255
+ gps = (-(geodists**2) / (2 * (Current_Mean_Distances**2))).exp()
+ avg_errors.append(geodists.mean().item())
+ avg_gps.append(gps.mean().item())
+
+ ge_mean = torch.as_tensor(avg_errors).mean().item()
+ gps_mean = torch.as_tensor(avg_gps).mean().item()
+ ge_per_mesh[mesh_name_1] = ge_mean
+ gps_per_mesh[mesh_name_1] = gps_mean
+ ge_mean_global = torch.as_tensor(list(ge_per_mesh.values())).mean().item()
+ gps_mean_global = torch.as_tensor(list(gps_per_mesh.values())).mean().item()
+ per_mesh_metrics = {
+ "GE": ge_per_mesh,
+ "GPS": gps_per_mesh,
+ }
+ return ge_mean_global, gps_mean_global, per_mesh_metrics
diff --git a/densepose/evaluation/tensor_storage.py b/densepose/evaluation/tensor_storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..369a29470807e60be377516f7910a9f95ab0a47d
--- /dev/null
+++ b/densepose/evaluation/tensor_storage.py
@@ -0,0 +1,241 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import io
+import numpy as np
+import os
+from dataclasses import dataclass
+from functools import reduce
+from operator import mul
+from typing import BinaryIO, Dict, Optional, Tuple
+import torch
+
+from detectron2.utils.comm import gather, get_rank
+from detectron2.utils.file_io import PathManager
+
+
+@dataclass
+class SizeData:
+ dtype: str
+ shape: Tuple[int]
+
+
+def _calculate_record_field_size_b(data_schema: Dict[str, SizeData], field_name: str) -> int:
+ schema = data_schema[field_name]
+ element_size_b = np.dtype(schema.dtype).itemsize
+ record_field_size_b = reduce(mul, schema.shape) * element_size_b
+ return record_field_size_b
+
+
+def _calculate_record_size_b(data_schema: Dict[str, SizeData]) -> int:
+ record_size_b = 0
+ for field_name in data_schema:
+ record_field_size_b = _calculate_record_field_size_b(data_schema, field_name)
+ record_size_b += record_field_size_b
+ return record_size_b
+
+
+def _calculate_record_field_sizes_b(data_schema: Dict[str, SizeData]) -> Dict[str, int]:
+ field_sizes_b = {}
+ for field_name in data_schema:
+ field_sizes_b[field_name] = _calculate_record_field_size_b(data_schema, field_name)
+ return field_sizes_b
+
+
+class SingleProcessTensorStorage:
+ """
+ Compact tensor storage to keep tensor data of predefined size and type.
+ """
+
+ def __init__(self, data_schema: Dict[str, SizeData], storage_impl: BinaryIO):
+ """
+ Construct tensor storage based on information on data shape and size.
+ Internally uses numpy to interpret the type specification.
+ The storage must support operations `seek(offset, whence=os.SEEK_SET)` and
+ `read(size)` to be able to perform the `get` operation.
+ The storage must support operation `write(bytes)` to be able to perform
+ the `put` operation.
+
+ Args:
+ data_schema (dict: str -> SizeData): dictionary which maps tensor name
+ to its size data (shape and data type), e.g.
+ ```
+ {
+ "coarse_segm": SizeData(dtype="float32", shape=(112, 112)),
+ "embedding": SizeData(dtype="float32", shape=(16, 112, 112)),
+ }
+ ```
+ storage_impl (BinaryIO): io instance that handles file-like seek, read
+ and write operations, e.g. a file handle or a memory buffer like io.BytesIO
+ """
+ self.data_schema = data_schema
+ self.record_size_b = _calculate_record_size_b(data_schema)
+ self.record_field_sizes_b = _calculate_record_field_sizes_b(data_schema)
+ self.storage_impl = storage_impl
+ self.next_record_id = 0
+
+ def get(self, record_id: int) -> Dict[str, torch.Tensor]:
+ """
+ Load tensors from the storage by record ID
+
+ Args:
+ record_id (int): Record ID, for which to load the data
+
+ Return:
+ dict: str -> tensor: tensor name mapped to tensor data, recorded under the provided ID
+ """
+ self.storage_impl.seek(record_id * self.record_size_b, os.SEEK_SET)
+ data_bytes = self.storage_impl.read(self.record_size_b)
+ assert len(data_bytes) == self.record_size_b, (
+ f"Expected data size {self.record_size_b} B could not be read: "
+ f"got {len(data_bytes)} B"
+ )
+ record = {}
+ cur_idx = 0
+ # it's important to read and write in the same order
+ for field_name in sorted(self.data_schema):
+ schema = self.data_schema[field_name]
+ field_size_b = self.record_field_sizes_b[field_name]
+ chunk = data_bytes[cur_idx : cur_idx + field_size_b]
+ data_np = np.frombuffer(
+ chunk, dtype=schema.dtype, count=reduce(mul, schema.shape)
+ ).reshape(schema.shape)
+ record[field_name] = torch.from_numpy(data_np)
+ cur_idx += field_size_b
+ return record
+
+ def put(self, data: Dict[str, torch.Tensor]) -> int:
+ """
+ Store tensors in the storage
+
+ Args:
+ data (dict: str -> tensor): data to store, a dictionary which maps
+ tensor names into tensors; tensor shapes must match those specified
+ in data schema.
+ Return:
+ int: record ID, under which the data is stored
+ """
+ # it's important to read and write in the same order
+ for field_name in sorted(self.data_schema):
+ assert (
+ field_name in data
+ ), f"Field '{field_name}' not present in data: data keys are {data.keys()}"
+ value = data[field_name]
+ assert value.shape == self.data_schema[field_name].shape, (
+ f"Mismatched tensor shapes for field '{field_name}': "
+ f"expected {self.data_schema[field_name].shape}, got {value.shape}"
+ )
+ data_bytes = value.cpu().numpy().tobytes()
+ assert len(data_bytes) == self.record_field_sizes_b[field_name], (
+ f"Expected field {field_name} to be of size "
+ f"{self.record_field_sizes_b[field_name]} B, got {len(data_bytes)} B"
+ )
+ self.storage_impl.write(data_bytes)
+ record_id = self.next_record_id
+ self.next_record_id += 1
+ return record_id
+
+
+class SingleProcessFileTensorStorage(SingleProcessTensorStorage):
+ """
+ Implementation of a single process tensor storage which stores data in a file
+ """
+
+ def __init__(self, data_schema: Dict[str, SizeData], fpath: str, mode: str):
+ self.fpath = fpath
+ assert "b" in mode, f"Tensor storage should be opened in binary mode, got '{mode}'"
+ if "w" in mode:
+ # pyre-fixme[6]: For 2nd argument expected `Union[typing_extensions.Liter...
+ file_h = PathManager.open(fpath, mode)
+ elif "r" in mode:
+ local_fpath = PathManager.get_local_path(fpath)
+ file_h = open(local_fpath, mode)
+ else:
+ raise ValueError(f"Unsupported file mode {mode}, supported modes: rb, wb")
+ super().__init__(data_schema, file_h) # pyre-ignore[6]
+
+
+class SingleProcessRamTensorStorage(SingleProcessTensorStorage):
+ """
+ Implementation of a single process tensor storage which stores data in RAM
+ """
+
+ def __init__(self, data_schema: Dict[str, SizeData], buf: io.BytesIO):
+ super().__init__(data_schema, buf)
+
+
+class MultiProcessTensorStorage:
+ """
+ Representation of a set of tensor storages created by individual processes,
+ allows to access those storages from a single owner process. The storages
+ should either be shared or broadcasted to the owner process.
+ The processes are identified by their rank, data is uniquely defined by
+ the rank of the process and the record ID.
+ """
+
+ def __init__(self, rank_to_storage: Dict[int, SingleProcessTensorStorage]):
+ self.rank_to_storage = rank_to_storage
+
+ def get(self, rank: int, record_id: int) -> Dict[str, torch.Tensor]:
+ storage = self.rank_to_storage[rank]
+ return storage.get(record_id)
+
+ def put(self, rank: int, data: Dict[str, torch.Tensor]) -> int:
+ storage = self.rank_to_storage[rank]
+ return storage.put(data)
+
+
+class MultiProcessFileTensorStorage(MultiProcessTensorStorage):
+ def __init__(self, data_schema: Dict[str, SizeData], rank_to_fpath: Dict[int, str], mode: str):
+ rank_to_storage = {
+ rank: SingleProcessFileTensorStorage(data_schema, fpath, mode)
+ for rank, fpath in rank_to_fpath.items()
+ }
+ super().__init__(rank_to_storage) # pyre-ignore[6]
+
+
+class MultiProcessRamTensorStorage(MultiProcessTensorStorage):
+ def __init__(self, data_schema: Dict[str, SizeData], rank_to_buffer: Dict[int, io.BytesIO]):
+ rank_to_storage = {
+ rank: SingleProcessRamTensorStorage(data_schema, buf)
+ for rank, buf in rank_to_buffer.items()
+ }
+ super().__init__(rank_to_storage) # pyre-ignore[6]
+
+
+def _ram_storage_gather(
+ storage: SingleProcessRamTensorStorage, dst_rank: int = 0
+) -> Optional[MultiProcessRamTensorStorage]:
+ storage.storage_impl.seek(0, os.SEEK_SET)
+ # TODO: overhead, pickling a bytes object, can just pass bytes in a tensor directly
+ # see detectron2/utils.comm.py
+ data_list = gather(storage.storage_impl.read(), dst=dst_rank)
+ if get_rank() != dst_rank:
+ return None
+ rank_to_buffer = {i: io.BytesIO(data_list[i]) for i in range(len(data_list))}
+ multiprocess_storage = MultiProcessRamTensorStorage(storage.data_schema, rank_to_buffer)
+ return multiprocess_storage
+
+
+def _file_storage_gather(
+ storage: SingleProcessFileTensorStorage,
+ dst_rank: int = 0,
+ mode: str = "rb",
+) -> Optional[MultiProcessFileTensorStorage]:
+ storage.storage_impl.close()
+ fpath_list = gather(storage.fpath, dst=dst_rank)
+ if get_rank() != dst_rank:
+ return None
+ rank_to_fpath = {i: fpath_list[i] for i in range(len(fpath_list))}
+ return MultiProcessFileTensorStorage(storage.data_schema, rank_to_fpath, mode)
+
+
+def storage_gather(
+ storage: SingleProcessTensorStorage, dst_rank: int = 0
+) -> Optional[MultiProcessTensorStorage]:
+ if isinstance(storage, SingleProcessRamTensorStorage):
+ return _ram_storage_gather(storage, dst_rank)
+ elif isinstance(storage, SingleProcessFileTensorStorage):
+ return _file_storage_gather(storage, dst_rank)
+ raise Exception(f"Unsupported storage for gather operation: {storage}")
diff --git a/densepose/modeling/__init__.py b/densepose/modeling/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c5b48b1fc6100dd531f7b61467876e222e40bdd
--- /dev/null
+++ b/densepose/modeling/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from .filter import DensePoseDataFilter
+from .inference import densepose_inference
+from .utils import initialize_module_params
+from .build import (
+ build_densepose_data_filter,
+ build_densepose_embedder,
+ build_densepose_head,
+ build_densepose_losses,
+ build_densepose_predictor,
+)
diff --git a/densepose/modeling/__pycache__/__init__.cpython-39.pyc b/densepose/modeling/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cb514b114a95540c46fffd1c0739939bc23dfafe
Binary files /dev/null and b/densepose/modeling/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/build.cpython-39.pyc b/densepose/modeling/__pycache__/build.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab20b2c6d876ced320e312146146e39f71d98dcc
Binary files /dev/null and b/densepose/modeling/__pycache__/build.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/confidence.cpython-39.pyc b/densepose/modeling/__pycache__/confidence.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2242d1bf50d79da3f554735c111a830accd10d1c
Binary files /dev/null and b/densepose/modeling/__pycache__/confidence.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/filter.cpython-39.pyc b/densepose/modeling/__pycache__/filter.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..471ab7a844279e79902f481460b9ed3fed620ffb
Binary files /dev/null and b/densepose/modeling/__pycache__/filter.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/hrfpn.cpython-39.pyc b/densepose/modeling/__pycache__/hrfpn.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff803e930882092b2d2650444f2ab2060c823f09
Binary files /dev/null and b/densepose/modeling/__pycache__/hrfpn.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/hrnet.cpython-39.pyc b/densepose/modeling/__pycache__/hrnet.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3b72be0bb4f87b08f7815dd2fbfb7a25ff9ff7c
Binary files /dev/null and b/densepose/modeling/__pycache__/hrnet.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/inference.cpython-39.pyc b/densepose/modeling/__pycache__/inference.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f691af36aa63bd6175423f0e430b988b19c106ef
Binary files /dev/null and b/densepose/modeling/__pycache__/inference.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/test_time_augmentation.cpython-39.pyc b/densepose/modeling/__pycache__/test_time_augmentation.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d8c016bbaf816a2b867dd9f87a230883860c101
Binary files /dev/null and b/densepose/modeling/__pycache__/test_time_augmentation.cpython-39.pyc differ
diff --git a/densepose/modeling/__pycache__/utils.cpython-39.pyc b/densepose/modeling/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..161f01ca964f996c8ec4870b4305bae0c14db66b
Binary files /dev/null and b/densepose/modeling/__pycache__/utils.cpython-39.pyc differ
diff --git a/densepose/modeling/build.py b/densepose/modeling/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..82e40d9284eeb9c90bf5e2ac13a95f587c76a595
--- /dev/null
+++ b/densepose/modeling/build.py
@@ -0,0 +1,89 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Optional
+from torch import nn
+
+from detectron2.config import CfgNode
+
+from .cse.embedder import Embedder
+from .filter import DensePoseDataFilter
+
+
+def build_densepose_predictor(cfg: CfgNode, input_channels: int):
+ """
+ Create an instance of DensePose predictor based on configuration options.
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): input tensor size along the channel dimension
+ Return:
+ An instance of DensePose predictor
+ """
+ from .predictors import DENSEPOSE_PREDICTOR_REGISTRY
+
+ predictor_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.PREDICTOR_NAME
+ return DENSEPOSE_PREDICTOR_REGISTRY.get(predictor_name)(cfg, input_channels)
+
+
+def build_densepose_data_filter(cfg: CfgNode):
+ """
+ Build DensePose data filter which selects data for training
+
+ Args:
+ cfg (CfgNode): configuration options
+
+ Return:
+ Callable: list(Tensor), list(Instances) -> list(Tensor), list(Instances)
+ An instance of DensePose filter, which takes feature tensors and proposals
+ as an input and returns filtered features and proposals
+ """
+ dp_filter = DensePoseDataFilter(cfg)
+ return dp_filter
+
+
+def build_densepose_head(cfg: CfgNode, input_channels: int):
+ """
+ Build DensePose head based on configurations options
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): input tensor size along the channel dimension
+ Return:
+ An instance of DensePose head
+ """
+ from .roi_heads.registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+ head_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.NAME
+ return ROI_DENSEPOSE_HEAD_REGISTRY.get(head_name)(cfg, input_channels)
+
+
+def build_densepose_losses(cfg: CfgNode):
+ """
+ Build DensePose loss based on configurations options
+
+ Args:
+ cfg (CfgNode): configuration options
+ Return:
+ An instance of DensePose loss
+ """
+ from .losses import DENSEPOSE_LOSS_REGISTRY
+
+ loss_name = cfg.MODEL.ROI_DENSEPOSE_HEAD.LOSS_NAME
+ return DENSEPOSE_LOSS_REGISTRY.get(loss_name)(cfg)
+
+
+def build_densepose_embedder(cfg: CfgNode) -> Optional[nn.Module]:
+ """
+ Build embedder used to embed mesh vertices into an embedding space.
+ Embedder contains sub-embedders, one for each mesh ID.
+
+ Args:
+ cfg (cfgNode): configuration options
+ Return:
+ Embedding module
+ """
+ if cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS:
+ return Embedder(cfg)
+ return None
diff --git a/densepose/modeling/confidence.py b/densepose/modeling/confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..364e389078e78935da9e432bc04b5530d2d9963f
--- /dev/null
+++ b/densepose/modeling/confidence.py
@@ -0,0 +1,75 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from dataclasses import dataclass
+from enum import Enum
+
+from detectron2.config import CfgNode
+
+
+class DensePoseUVConfidenceType(Enum):
+ """
+ Statistical model type for confidence learning, possible values:
+ - "iid_iso": statistically independent identically distributed residuals
+ with anisotropic covariance
+ - "indep_aniso": statistically independent residuals with anisotropic
+ covariances
+ For details, see:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ """
+
+ # fmt: off
+ IID_ISO = "iid_iso"
+ INDEP_ANISO = "indep_aniso"
+ # fmt: on
+
+
+@dataclass
+class DensePoseUVConfidenceConfig:
+ """
+ Configuration options for confidence on UV data
+ """
+
+ enabled: bool = False
+ # lower bound on UV confidences
+ epsilon: float = 0.01
+ type: DensePoseUVConfidenceType = DensePoseUVConfidenceType.IID_ISO
+
+
+@dataclass
+class DensePoseSegmConfidenceConfig:
+ """
+ Configuration options for confidence on segmentation
+ """
+
+ enabled: bool = False
+ # lower bound on confidence values
+ epsilon: float = 0.01
+
+
+@dataclass
+class DensePoseConfidenceModelConfig:
+ """
+ Configuration options for confidence models
+ """
+
+ # confidence for U and V values
+ uv_confidence: DensePoseUVConfidenceConfig
+ # segmentation confidence
+ segm_confidence: DensePoseSegmConfidenceConfig
+
+ @staticmethod
+ def from_cfg(cfg: CfgNode) -> "DensePoseConfidenceModelConfig":
+ return DensePoseConfidenceModelConfig(
+ uv_confidence=DensePoseUVConfidenceConfig(
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.ENABLED,
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.EPSILON,
+ type=DensePoseUVConfidenceType(cfg.MODEL.ROI_DENSEPOSE_HEAD.UV_CONFIDENCE.TYPE),
+ ),
+ segm_confidence=DensePoseSegmConfidenceConfig(
+ enabled=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.ENABLED,
+ epsilon=cfg.MODEL.ROI_DENSEPOSE_HEAD.SEGM_CONFIDENCE.EPSILON,
+ ),
+ )
diff --git a/densepose/modeling/cse/__init__.py b/densepose/modeling/cse/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..80248c94c5cc23f1503a6338af225f63bc8cec42
--- /dev/null
+++ b/densepose/modeling/cse/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from .vertex_direct_embedder import VertexDirectEmbedder
+from .vertex_feature_embedder import VertexFeatureEmbedder
+from .embedder import Embedder
diff --git a/densepose/modeling/cse/__pycache__/__init__.cpython-39.pyc b/densepose/modeling/cse/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c0f3d5a1eb6db87191fc1ca581c26a56dcb1309
Binary files /dev/null and b/densepose/modeling/cse/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/modeling/cse/__pycache__/embedder.cpython-39.pyc b/densepose/modeling/cse/__pycache__/embedder.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ab612169300ac2dcc121fe19e98b48577a6c342
Binary files /dev/null and b/densepose/modeling/cse/__pycache__/embedder.cpython-39.pyc differ
diff --git a/densepose/modeling/cse/__pycache__/utils.cpython-39.pyc b/densepose/modeling/cse/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5cd25bc909172cd84687f94dcbea5be158f32530
Binary files /dev/null and b/densepose/modeling/cse/__pycache__/utils.cpython-39.pyc differ
diff --git a/densepose/modeling/cse/__pycache__/vertex_direct_embedder.cpython-39.pyc b/densepose/modeling/cse/__pycache__/vertex_direct_embedder.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c0fb28165653bd12500b4bcc430ff3b6d8db5a82
Binary files /dev/null and b/densepose/modeling/cse/__pycache__/vertex_direct_embedder.cpython-39.pyc differ
diff --git a/densepose/modeling/cse/__pycache__/vertex_feature_embedder.cpython-39.pyc b/densepose/modeling/cse/__pycache__/vertex_feature_embedder.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6c41c9a1caab804374bf4f8f91817d71f17c6ef
Binary files /dev/null and b/densepose/modeling/cse/__pycache__/vertex_feature_embedder.cpython-39.pyc differ
diff --git a/densepose/modeling/cse/embedder.py b/densepose/modeling/cse/embedder.py
new file mode 100644
index 0000000000000000000000000000000000000000..69082294acee57517b4b4ab8c11814b7c99e5232
--- /dev/null
+++ b/densepose/modeling/cse/embedder.py
@@ -0,0 +1,130 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import logging
+import numpy as np
+import pickle
+from enum import Enum
+from typing import Optional
+import torch
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.utils.file_io import PathManager
+
+from .vertex_direct_embedder import VertexDirectEmbedder
+from .vertex_feature_embedder import VertexFeatureEmbedder
+
+
+class EmbedderType(Enum):
+ """
+ Embedder type which defines how vertices are mapped into the embedding space:
+ - "vertex_direct": direct vertex embedding
+ - "vertex_feature": embedding vertex features
+ """
+
+ VERTEX_DIRECT = "vertex_direct"
+ VERTEX_FEATURE = "vertex_feature"
+
+
+def create_embedder(embedder_spec: CfgNode, embedder_dim: int) -> nn.Module:
+ """
+ Create an embedder based on the provided configuration
+
+ Args:
+ embedder_spec (CfgNode): embedder configuration
+ embedder_dim (int): embedding space dimensionality
+ Return:
+ An embedder instance for the specified configuration
+ Raises ValueError, in case of unexpected embedder type
+ """
+ embedder_type = EmbedderType(embedder_spec.TYPE)
+ if embedder_type == EmbedderType.VERTEX_DIRECT:
+ embedder = VertexDirectEmbedder(
+ num_vertices=embedder_spec.NUM_VERTICES,
+ embed_dim=embedder_dim,
+ )
+ if embedder_spec.INIT_FILE != "":
+ embedder.load(embedder_spec.INIT_FILE)
+ elif embedder_type == EmbedderType.VERTEX_FEATURE:
+ embedder = VertexFeatureEmbedder(
+ num_vertices=embedder_spec.NUM_VERTICES,
+ feature_dim=embedder_spec.FEATURE_DIM,
+ embed_dim=embedder_dim,
+ train_features=embedder_spec.FEATURES_TRAINABLE,
+ )
+ if embedder_spec.INIT_FILE != "":
+ embedder.load(embedder_spec.INIT_FILE)
+ else:
+ raise ValueError(f"Unexpected embedder type {embedder_type}")
+
+ if not embedder_spec.IS_TRAINABLE:
+ embedder.requires_grad_(False)
+
+ return embedder
+
+
+class Embedder(nn.Module):
+ """
+ Embedder module that serves as a container for embedders to use with different
+ meshes. Extends Module to automatically save / load state dict.
+ """
+
+ DEFAULT_MODEL_CHECKPOINT_PREFIX = "roi_heads.embedder."
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Initialize mesh embedders. An embedder for mesh `i` is stored in a submodule
+ "embedder_{i}".
+
+ Args:
+ cfg (CfgNode): configuration options
+ """
+ super(Embedder, self).__init__()
+ self.mesh_names = set()
+ embedder_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
+ logger = logging.getLogger(__name__)
+ for mesh_name, embedder_spec in cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.items():
+ logger.info(f"Adding embedder embedder_{mesh_name} with spec {embedder_spec}")
+ self.add_module(f"embedder_{mesh_name}", create_embedder(embedder_spec, embedder_dim))
+ self.mesh_names.add(mesh_name)
+ if cfg.MODEL.WEIGHTS != "":
+ self.load_from_model_checkpoint(cfg.MODEL.WEIGHTS)
+
+ def load_from_model_checkpoint(self, fpath: str, prefix: Optional[str] = None):
+ if prefix is None:
+ prefix = Embedder.DEFAULT_MODEL_CHECKPOINT_PREFIX
+ state_dict = None
+ if fpath.endswith(".pkl"):
+ with PathManager.open(fpath, "rb") as hFile:
+ state_dict = pickle.load(hFile, encoding="latin1")
+ else:
+ with PathManager.open(fpath, "rb") as hFile:
+ state_dict = torch.load(hFile, map_location=torch.device("cpu"))
+ if state_dict is not None and "model" in state_dict:
+ state_dict_local = {}
+ for key in state_dict["model"]:
+ if key.startswith(prefix):
+ v_key = state_dict["model"][key]
+ if isinstance(v_key, np.ndarray):
+ v_key = torch.from_numpy(v_key)
+ state_dict_local[key[len(prefix) :]] = v_key
+ # non-strict loading to finetune on different meshes
+ self.load_state_dict(state_dict_local, strict=False)
+
+ def forward(self, mesh_name: str) -> torch.Tensor:
+ """
+ Produce vertex embeddings for the specific mesh; vertex embeddings are
+ a tensor of shape [N, D] where:
+ N = number of vertices
+ D = number of dimensions in the embedding space
+ Args:
+ mesh_name (str): name of a mesh for which to obtain vertex embeddings
+ Return:
+ Vertex embeddings, a tensor of shape [N, D]
+ """
+ return getattr(self, f"embedder_{mesh_name}")()
+
+ def has_embeddings(self, mesh_name: str) -> bool:
+ return hasattr(self, f"embedder_{mesh_name}")
diff --git a/densepose/modeling/cse/utils.py b/densepose/modeling/cse/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb83b1af580ef76d8eddb03980fa14fe97298965
--- /dev/null
+++ b/densepose/modeling/cse/utils.py
@@ -0,0 +1,83 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import torch
+from torch.nn import functional as F
+
+
+def squared_euclidean_distance_matrix(pts1: torch.Tensor, pts2: torch.Tensor) -> torch.Tensor:
+ """
+ Get squared Euclidean Distance Matrix
+ Computes pairwise squared Euclidean distances between points
+
+ Args:
+ pts1: Tensor [M x D], M is the number of points, D is feature dimensionality
+ pts2: Tensor [N x D], N is the number of points, D is feature dimensionality
+
+ Return:
+ Tensor [M, N]: matrix of squared Euclidean distances; at index (m, n)
+ it contains || pts1[m] - pts2[n] ||^2
+ """
+ edm = torch.mm(-2 * pts1, pts2.t())
+ edm += (pts1 * pts1).sum(1, keepdim=True) + (pts2 * pts2).sum(1, keepdim=True).t()
+ return edm.contiguous()
+
+
+def normalize_embeddings(embeddings: torch.Tensor, epsilon: float = 1e-6) -> torch.Tensor:
+ """
+ Normalize N D-dimensional embedding vectors arranged in a tensor [N, D]
+
+ Args:
+ embeddings (tensor [N, D]): N D-dimensional embedding vectors
+ epsilon (float): minimum value for a vector norm
+ Return:
+ Normalized embeddings (tensor [N, D]), such that L2 vector norms are all equal to 1.
+ """
+ return embeddings / torch.clamp(embeddings.norm(p=None, dim=1, keepdim=True), min=epsilon)
+
+
+def get_closest_vertices_mask_from_ES(
+ E: torch.Tensor,
+ S: torch.Tensor,
+ h: int,
+ w: int,
+ mesh_vertex_embeddings: torch.Tensor,
+ device: torch.device,
+):
+ """
+ Interpolate Embeddings and Segmentations to the size of a given bounding box,
+ and compute closest vertices and the segmentation mask
+
+ Args:
+ E (tensor [1, D, H, W]): D-dimensional embedding vectors for every point of the
+ default-sized box
+ S (tensor [1, 2, H, W]): 2-dimensional segmentation mask for every point of the
+ default-sized box
+ h (int): height of the target bounding box
+ w (int): width of the target bounding box
+ mesh_vertex_embeddings (tensor [N, D]): vertex embeddings for a chosen mesh
+ N is the number of vertices in the mesh, D is feature dimensionality
+ device (torch.device): device to move the tensors to
+ Return:
+ Closest Vertices (tensor [h, w]), int, for every point of the resulting box
+ Segmentation mask (tensor [h, w]), boolean, for every point of the resulting box
+ """
+ embedding_resized = F.interpolate(E, size=(h, w), mode="bilinear")[0].to(device)
+ coarse_segm_resized = F.interpolate(S, size=(h, w), mode="bilinear")[0].to(device)
+ mask = coarse_segm_resized.argmax(0) > 0
+ closest_vertices = torch.zeros(mask.shape, dtype=torch.long, device=device)
+ all_embeddings = embedding_resized[:, mask].t()
+ size_chunk = 10_000 # Chunking to avoid possible OOM
+ edm = []
+ if len(all_embeddings) == 0:
+ return closest_vertices, mask
+ for chunk in range((len(all_embeddings) - 1) // size_chunk + 1):
+ chunk_embeddings = all_embeddings[size_chunk * chunk : size_chunk * (chunk + 1)]
+ edm.append(
+ torch.argmin(
+ squared_euclidean_distance_matrix(chunk_embeddings, mesh_vertex_embeddings), dim=1
+ )
+ )
+ closest_vertices[mask] = torch.cat(edm)
+ return closest_vertices, mask
diff --git a/densepose/modeling/cse/vertex_direct_embedder.py b/densepose/modeling/cse/vertex_direct_embedder.py
new file mode 100644
index 0000000000000000000000000000000000000000..32d92e7786336da0ed9582793620c33a3853195e
--- /dev/null
+++ b/densepose/modeling/cse/vertex_direct_embedder.py
@@ -0,0 +1,66 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import pickle
+import torch
+from torch import nn
+
+from detectron2.utils.file_io import PathManager
+
+from .utils import normalize_embeddings
+
+
+class VertexDirectEmbedder(nn.Module):
+ """
+ Class responsible for embedding vertices. Vertex embeddings take
+ the form of a tensor of size [N, D], where
+ N = number of vertices
+ D = number of dimensions in the embedding space
+ """
+
+ def __init__(self, num_vertices: int, embed_dim: int):
+ """
+ Initialize embedder, set random embeddings
+
+ Args:
+ num_vertices (int): number of vertices to embed
+ embed_dim (int): number of dimensions in the embedding space
+ """
+ super(VertexDirectEmbedder, self).__init__()
+ self.embeddings = nn.Parameter(torch.Tensor(num_vertices, embed_dim))
+ self.reset_parameters()
+
+ @torch.no_grad()
+ def reset_parameters(self):
+ """
+ Reset embeddings to random values
+ """
+ self.embeddings.zero_()
+
+ def forward(self) -> torch.Tensor:
+ """
+ Produce vertex embeddings, a tensor of shape [N, D] where:
+ N = number of vertices
+ D = number of dimensions in the embedding space
+
+ Return:
+ Full vertex embeddings, a tensor of shape [N, D]
+ """
+ return normalize_embeddings(self.embeddings)
+
+ @torch.no_grad()
+ def load(self, fpath: str):
+ """
+ Load data from a file
+
+ Args:
+ fpath (str): file path to load data from
+ """
+ with PathManager.open(fpath, "rb") as hFile:
+ data = pickle.load(hFile)
+ for name in ["embeddings"]:
+ if name in data:
+ getattr(self, name).copy_(
+ torch.tensor(data[name]).float().to(device=getattr(self, name).device)
+ )
diff --git a/densepose/modeling/cse/vertex_feature_embedder.py b/densepose/modeling/cse/vertex_feature_embedder.py
new file mode 100644
index 0000000000000000000000000000000000000000..cb495f88bc5a205e3639d797910c899d6344cca5
--- /dev/null
+++ b/densepose/modeling/cse/vertex_feature_embedder.py
@@ -0,0 +1,77 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import pickle
+import torch
+from torch import nn
+
+from detectron2.utils.file_io import PathManager
+
+from .utils import normalize_embeddings
+
+
+class VertexFeatureEmbedder(nn.Module):
+ """
+ Class responsible for embedding vertex features. Mapping from
+ feature space to the embedding space is a tensor of size [K, D], where
+ K = number of dimensions in the feature space
+ D = number of dimensions in the embedding space
+ Vertex features is a tensor of size [N, K], where
+ N = number of vertices
+ K = number of dimensions in the feature space
+ Vertex embeddings are computed as F * E = tensor of size [N, D]
+ """
+
+ def __init__(
+ self, num_vertices: int, feature_dim: int, embed_dim: int, train_features: bool = False
+ ):
+ """
+ Initialize embedder, set random embeddings
+
+ Args:
+ num_vertices (int): number of vertices to embed
+ feature_dim (int): number of dimensions in the feature space
+ embed_dim (int): number of dimensions in the embedding space
+ train_features (bool): determines whether vertex features should
+ be trained (default: False)
+ """
+ super(VertexFeatureEmbedder, self).__init__()
+ if train_features:
+ self.features = nn.Parameter(torch.Tensor(num_vertices, feature_dim))
+ else:
+ self.register_buffer("features", torch.Tensor(num_vertices, feature_dim))
+ self.embeddings = nn.Parameter(torch.Tensor(feature_dim, embed_dim))
+ self.reset_parameters()
+
+ @torch.no_grad()
+ def reset_parameters(self):
+ self.features.zero_()
+ self.embeddings.zero_()
+
+ def forward(self) -> torch.Tensor:
+ """
+ Produce vertex embeddings, a tensor of shape [N, D] where:
+ N = number of vertices
+ D = number of dimensions in the embedding space
+
+ Return:
+ Full vertex embeddings, a tensor of shape [N, D]
+ """
+ return normalize_embeddings(torch.mm(self.features, self.embeddings))
+
+ @torch.no_grad()
+ def load(self, fpath: str):
+ """
+ Load data from a file
+
+ Args:
+ fpath (str): file path to load data from
+ """
+ with PathManager.open(fpath, "rb") as hFile:
+ data = pickle.load(hFile)
+ for name in ["features", "embeddings"]:
+ if name in data:
+ getattr(self, name).copy_(
+ torch.tensor(data[name]).float().to(device=getattr(self, name).device)
+ )
diff --git a/densepose/modeling/densepose_checkpoint.py b/densepose/modeling/densepose_checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..c85711e976efdf56f0c6494fd19636e7411be2b4
--- /dev/null
+++ b/densepose/modeling/densepose_checkpoint.py
@@ -0,0 +1,37 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from collections import OrderedDict
+
+from detectron2.checkpoint import DetectionCheckpointer
+
+
+def _rename_HRNet_weights(weights):
+ # We detect and rename HRNet weights for DensePose. 1956 and 1716 are values that are
+ # common to all HRNet pretrained weights, and should be enough to accurately identify them
+ if (
+ len(weights["model"].keys()) == 1956
+ and len([k for k in weights["model"].keys() if k.startswith("stage")]) == 1716
+ ):
+ hrnet_weights = OrderedDict()
+ for k in weights["model"].keys():
+ hrnet_weights["backbone.bottom_up." + str(k)] = weights["model"][k]
+ return {"model": hrnet_weights}
+ else:
+ return weights
+
+
+class DensePoseCheckpointer(DetectionCheckpointer):
+ """
+ Same as :class:`DetectionCheckpointer`, but is able to handle HRNet weights
+ """
+
+ def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables):
+ super().__init__(model, save_dir, save_to_disk=save_to_disk, **checkpointables)
+
+ def _load_file(self, filename: str) -> object:
+ """
+ Adding hrnet support
+ """
+ weights = super()._load_file(filename)
+ return _rename_HRNet_weights(weights)
diff --git a/densepose/modeling/filter.py b/densepose/modeling/filter.py
new file mode 100644
index 0000000000000000000000000000000000000000..503321004e39c1bd96be3512a3811e33fed4d008
--- /dev/null
+++ b/densepose/modeling/filter.py
@@ -0,0 +1,96 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import List
+import torch
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+from detectron2.structures.boxes import matched_pairwise_iou
+
+
+class DensePoseDataFilter:
+ def __init__(self, cfg: CfgNode):
+ self.iou_threshold = cfg.MODEL.ROI_DENSEPOSE_HEAD.FG_IOU_THRESHOLD
+ self.keep_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+
+ @torch.no_grad()
+ def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]):
+ """
+ Filters proposals with targets to keep only the ones relevant for
+ DensePose training
+
+ Args:
+ features (list[Tensor]): input data as a list of features,
+ each feature is a tensor. Axis 0 represents the number of
+ images `N` in the input data; axes 1-3 are channels,
+ height, and width, which may vary between features
+ (e.g., if a feature pyramid is used).
+ proposals_with_targets (list[Instances]): length `N` list of
+ `Instances`. The i-th `Instances` contains instances
+ (proposals, GT) for the i-th input image,
+ Returns:
+ list[Tensor]: filtered features
+ list[Instances]: filtered proposals
+ """
+ proposals_filtered = []
+ # TODO: the commented out code was supposed to correctly deal with situations
+ # where no valid DensePose GT is available for certain images. The corresponding
+ # image features were sliced and proposals were filtered. This led to performance
+ # deterioration, both in terms of runtime and in terms of evaluation results.
+ #
+ # feature_mask = torch.ones(
+ # len(proposals_with_targets),
+ # dtype=torch.bool,
+ # device=features[0].device if len(features) > 0 else torch.device("cpu"),
+ # )
+ for i, proposals_per_image in enumerate(proposals_with_targets):
+ if not proposals_per_image.has("gt_densepose") and (
+ not proposals_per_image.has("gt_masks") or not self.keep_masks
+ ):
+ # feature_mask[i] = 0
+ continue
+ gt_boxes = proposals_per_image.gt_boxes
+ est_boxes = proposals_per_image.proposal_boxes
+ # apply match threshold for densepose head
+ iou = matched_pairwise_iou(gt_boxes, est_boxes)
+ iou_select = iou > self.iou_threshold
+ proposals_per_image = proposals_per_image[iou_select] # pyre-ignore[6]
+
+ N_gt_boxes = len(proposals_per_image.gt_boxes)
+ assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
+ f"The number of GT boxes {N_gt_boxes} is different from the "
+ f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
+ )
+ # filter out any target without suitable annotation
+ if self.keep_masks:
+ gt_masks = (
+ proposals_per_image.gt_masks
+ if hasattr(proposals_per_image, "gt_masks")
+ else [None] * N_gt_boxes
+ )
+ else:
+ gt_masks = [None] * N_gt_boxes
+ gt_densepose = (
+ proposals_per_image.gt_densepose
+ if hasattr(proposals_per_image, "gt_densepose")
+ else [None] * N_gt_boxes
+ )
+ assert len(gt_masks) == N_gt_boxes
+ assert len(gt_densepose) == N_gt_boxes
+ selected_indices = [
+ i
+ for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks))
+ if (dp_target is not None) or (mask_target is not None)
+ ]
+ # if not len(selected_indices):
+ # feature_mask[i] = 0
+ # continue
+ if len(selected_indices) != N_gt_boxes:
+ proposals_per_image = proposals_per_image[selected_indices] # pyre-ignore[6]
+ assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
+ proposals_filtered.append(proposals_per_image)
+ # features_filtered = [feature[feature_mask] for feature in features]
+ # return features_filtered, proposals_filtered
+ return features, proposals_filtered
diff --git a/densepose/modeling/hrfpn.py b/densepose/modeling/hrfpn.py
new file mode 100644
index 0000000000000000000000000000000000000000..a19c3261198798738130267cb4c35022ddf8a9e6
--- /dev/null
+++ b/densepose/modeling/hrfpn.py
@@ -0,0 +1,184 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+"""
+MIT License
+Copyright (c) 2019 Microsoft
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+"""
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+
+from .hrnet import build_pose_hrnet_backbone
+
+
+class HRFPN(Backbone):
+ """HRFPN (High Resolution Feature Pyramids)
+ Transforms outputs of HRNet backbone so they are suitable for the ROI_heads
+ arXiv: https://arxiv.org/abs/1904.04514
+ Adapted from https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/necks/hrfpn.py
+ Args:
+ bottom_up: (list) output of HRNet
+ in_features (list): names of the input features (output of HRNet)
+ in_channels (list): number of channels for each branch
+ out_channels (int): output channels of feature pyramids
+ n_out_features (int): number of output stages
+ pooling (str): pooling for generating feature pyramids (from {MAX, AVG})
+ share_conv (bool): Have one conv per output, or share one with all the outputs
+ """
+
+ def __init__(
+ self,
+ bottom_up,
+ in_features,
+ n_out_features,
+ in_channels,
+ out_channels,
+ pooling="AVG",
+ share_conv=False,
+ ):
+ super(HRFPN, self).__init__()
+ assert isinstance(in_channels, list)
+ self.bottom_up = bottom_up
+ self.in_features = in_features
+ self.n_out_features = n_out_features
+ self.in_channels = in_channels
+ self.out_channels = out_channels
+ self.num_ins = len(in_channels)
+ self.share_conv = share_conv
+
+ if self.share_conv:
+ self.fpn_conv = nn.Conv2d(
+ in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1
+ )
+ else:
+ self.fpn_conv = nn.ModuleList()
+ for _ in range(self.n_out_features):
+ self.fpn_conv.append(
+ nn.Conv2d(
+ in_channels=out_channels,
+ out_channels=out_channels,
+ kernel_size=3,
+ padding=1,
+ )
+ )
+
+ # Custom change: Replaces a simple bilinear interpolation
+ self.interp_conv = nn.ModuleList()
+ for i in range(len(self.in_features)):
+ self.interp_conv.append(
+ nn.Sequential(
+ nn.ConvTranspose2d(
+ in_channels=in_channels[i],
+ out_channels=in_channels[i],
+ kernel_size=4,
+ stride=2**i,
+ padding=0,
+ output_padding=0,
+ bias=False,
+ ),
+ nn.BatchNorm2d(in_channels[i], momentum=0.1),
+ nn.ReLU(inplace=True),
+ )
+ )
+
+ # Custom change: Replaces a couple (reduction conv + pooling) by one conv
+ self.reduction_pooling_conv = nn.ModuleList()
+ for i in range(self.n_out_features):
+ self.reduction_pooling_conv.append(
+ nn.Sequential(
+ nn.Conv2d(sum(in_channels), out_channels, kernel_size=2**i, stride=2**i),
+ nn.BatchNorm2d(out_channels, momentum=0.1),
+ nn.ReLU(inplace=True),
+ )
+ )
+
+ if pooling == "MAX":
+ self.pooling = F.max_pool2d
+ else:
+ self.pooling = F.avg_pool2d
+
+ self._out_features = []
+ self._out_feature_channels = {}
+ self._out_feature_strides = {}
+
+ for i in range(self.n_out_features):
+ self._out_features.append("p%d" % (i + 1))
+ self._out_feature_channels.update({self._out_features[-1]: self.out_channels})
+ self._out_feature_strides.update({self._out_features[-1]: 2 ** (i + 2)})
+
+ # default init_weights for conv(msra) and norm in ConvModule
+ def init_weights(self):
+ for m in self.modules():
+ if isinstance(m, nn.Conv2d):
+ nn.init.kaiming_normal_(m.weight, a=1)
+ nn.init.constant_(m.bias, 0)
+
+ def forward(self, inputs):
+ bottom_up_features = self.bottom_up(inputs)
+ assert len(bottom_up_features) == len(self.in_features)
+ inputs = [bottom_up_features[f] for f in self.in_features]
+
+ outs = []
+ for i in range(len(inputs)):
+ outs.append(self.interp_conv[i](inputs[i]))
+ shape_2 = min(o.shape[2] for o in outs)
+ shape_3 = min(o.shape[3] for o in outs)
+ out = torch.cat([o[:, :, :shape_2, :shape_3] for o in outs], dim=1)
+ outs = []
+ for i in range(self.n_out_features):
+ outs.append(self.reduction_pooling_conv[i](out))
+ for i in range(len(outs)): # Make shapes consistent
+ outs[-1 - i] = outs[-1 - i][
+ :, :, : outs[-1].shape[2] * 2**i, : outs[-1].shape[3] * 2**i
+ ]
+ outputs = []
+ for i in range(len(outs)):
+ if self.share_conv:
+ outputs.append(self.fpn_conv(outs[i]))
+ else:
+ outputs.append(self.fpn_conv[i](outs[i]))
+
+ assert len(self._out_features) == len(outputs)
+ return dict(zip(self._out_features, outputs))
+
+
+@BACKBONE_REGISTRY.register()
+def build_hrfpn_backbone(cfg, input_shape: ShapeSpec) -> HRFPN:
+
+ in_channels = cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS
+ in_features = ["p%d" % (i + 1) for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES)]
+ n_out_features = len(cfg.MODEL.ROI_HEADS.IN_FEATURES)
+ out_channels = cfg.MODEL.HRNET.HRFPN.OUT_CHANNELS
+ hrnet = build_pose_hrnet_backbone(cfg, input_shape)
+ hrfpn = HRFPN(
+ hrnet,
+ in_features,
+ n_out_features,
+ in_channels,
+ out_channels,
+ pooling="AVG",
+ share_conv=False,
+ )
+
+ return hrfpn
diff --git a/densepose/modeling/hrnet.py b/densepose/modeling/hrnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..f8e3cab545c7f999300676bb27fa0461abd143e2
--- /dev/null
+++ b/densepose/modeling/hrnet.py
@@ -0,0 +1,476 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (leoxiaobin@gmail.com)
+# Modified by Bowen Cheng (bcheng9@illinois.edu)
+# Adapted from https://github.com/HRNet/Higher-HRNet-Human-Pose-Estimation/blob/master/lib/models/pose_higher_hrnet.py # noqa
+# ------------------------------------------------------------------------------
+
+# pyre-unsafe
+
+from __future__ import absolute_import, division, print_function
+import logging
+import torch.nn as nn
+
+from detectron2.layers import ShapeSpec
+from detectron2.modeling.backbone import BACKBONE_REGISTRY
+from detectron2.modeling.backbone.backbone import Backbone
+
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+
+__all__ = ["build_pose_hrnet_backbone", "PoseHigherResolutionNet"]
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ """3x3 convolution with padding"""
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
+
+
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
+ super(BasicBlock, self).__init__()
+ self.conv1 = conv3x3(inplanes, planes, stride)
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.relu = nn.ReLU(inplace=True)
+ self.conv2 = conv3x3(planes, planes)
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, downsample=None):
+ super(Bottleneck, self).__init__()
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+ self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
+ self.bn3 = nn.BatchNorm2d(planes * self.expansion, momentum=BN_MOMENTUM)
+ self.relu = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out += residual
+ out = self.relu(out)
+
+ return out
+
+
+class HighResolutionModule(nn.Module):
+ """HighResolutionModule
+ Building block of the PoseHigherResolutionNet (see lower)
+ arXiv: https://arxiv.org/abs/1908.10357
+ Args:
+ num_branches (int): number of branches of the modyle
+ blocks (str): type of block of the module
+ num_blocks (int): number of blocks of the module
+ num_inchannels (int): number of input channels of the module
+ num_channels (list): number of channels of each branch
+ multi_scale_output (bool): only used by the last module of PoseHigherResolutionNet
+ """
+
+ def __init__(
+ self,
+ num_branches,
+ blocks,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ multi_scale_output=True,
+ ):
+ super(HighResolutionModule, self).__init__()
+ self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels)
+
+ self.num_inchannels = num_inchannels
+ self.num_branches = num_branches
+
+ self.multi_scale_output = multi_scale_output
+
+ self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels)
+ self.fuse_layers = self._make_fuse_layers()
+ self.relu = nn.ReLU(True)
+
+ def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels):
+ if num_branches != len(num_blocks):
+ error_msg = "NUM_BRANCHES({}) <> NUM_BLOCKS({})".format(num_branches, len(num_blocks))
+ logger.error(error_msg)
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_channels):
+ error_msg = "NUM_BRANCHES({}) <> NUM_CHANNELS({})".format(
+ num_branches, len(num_channels)
+ )
+ logger.error(error_msg)
+ raise ValueError(error_msg)
+
+ if num_branches != len(num_inchannels):
+ error_msg = "NUM_BRANCHES({}) <> NUM_INCHANNELS({})".format(
+ num_branches, len(num_inchannels)
+ )
+ logger.error(error_msg)
+ raise ValueError(error_msg)
+
+ def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
+ downsample = None
+ if (
+ stride != 1
+ or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion
+ ):
+ downsample = nn.Sequential(
+ nn.Conv2d(
+ self.num_inchannels[branch_index],
+ num_channels[branch_index] * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=BN_MOMENTUM),
+ )
+
+ layers = []
+ layers.append(
+ block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)
+ )
+ self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion
+ for _ in range(1, num_blocks[branch_index]):
+ layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index]))
+
+ return nn.Sequential(*layers)
+
+ def _make_branches(self, num_branches, block, num_blocks, num_channels):
+ branches = []
+
+ for i in range(num_branches):
+ branches.append(self._make_one_branch(i, block, num_blocks, num_channels))
+
+ return nn.ModuleList(branches)
+
+ def _make_fuse_layers(self):
+ if self.num_branches == 1:
+ return None
+
+ num_branches = self.num_branches
+ num_inchannels = self.num_inchannels
+ fuse_layers = []
+ for i in range(num_branches if self.multi_scale_output else 1):
+ fuse_layer = []
+ for j in range(num_branches):
+ if j > i:
+ fuse_layer.append(
+ nn.Sequential(
+ nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False),
+ nn.BatchNorm2d(num_inchannels[i]),
+ nn.Upsample(scale_factor=2 ** (j - i), mode="nearest"),
+ )
+ )
+ elif j == i:
+ fuse_layer.append(None)
+ else:
+ conv3x3s = []
+ for k in range(i - j):
+ if k == i - j - 1:
+ num_outchannels_conv3x3 = num_inchannels[i]
+ conv3x3s.append(
+ nn.Sequential(
+ nn.Conv2d(
+ num_inchannels[j],
+ num_outchannels_conv3x3,
+ 3,
+ 2,
+ 1,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_outchannels_conv3x3),
+ )
+ )
+ else:
+ num_outchannels_conv3x3 = num_inchannels[j]
+ conv3x3s.append(
+ nn.Sequential(
+ nn.Conv2d(
+ num_inchannels[j],
+ num_outchannels_conv3x3,
+ 3,
+ 2,
+ 1,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_outchannels_conv3x3),
+ nn.ReLU(True),
+ )
+ )
+ fuse_layer.append(nn.Sequential(*conv3x3s))
+ fuse_layers.append(nn.ModuleList(fuse_layer))
+
+ return nn.ModuleList(fuse_layers)
+
+ def get_num_inchannels(self):
+ return self.num_inchannels
+
+ def forward(self, x):
+ if self.num_branches == 1:
+ return [self.branches[0](x[0])]
+
+ for i in range(self.num_branches):
+ x[i] = self.branches[i](x[i])
+
+ x_fuse = []
+
+ for i in range(len(self.fuse_layers)):
+ y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+ for j in range(1, self.num_branches):
+ if i == j:
+ y = y + x[j]
+ else:
+ z = self.fuse_layers[i][j](x[j])[:, :, : y.shape[2], : y.shape[3]]
+ y = y + z
+ x_fuse.append(self.relu(y))
+
+ return x_fuse
+
+
+blocks_dict = {"BASIC": BasicBlock, "BOTTLENECK": Bottleneck}
+
+
+class PoseHigherResolutionNet(Backbone):
+ """PoseHigherResolutionNet
+ Composed of several HighResolutionModule tied together with ConvNets
+ Adapted from the GitHub version to fit with HRFPN and the Detectron2 infrastructure
+ arXiv: https://arxiv.org/abs/1908.10357
+ """
+
+ def __init__(self, cfg, **kwargs):
+ self.inplanes = cfg.MODEL.HRNET.STEM_INPLANES
+ super(PoseHigherResolutionNet, self).__init__()
+
+ # stem net
+ self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
+ self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+ self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1, bias=False)
+ self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+ self.relu = nn.ReLU(inplace=True)
+ self.layer1 = self._make_layer(Bottleneck, 64, 4)
+
+ self.stage2_cfg = cfg.MODEL.HRNET.STAGE2
+ num_channels = self.stage2_cfg.NUM_CHANNELS
+ block = blocks_dict[self.stage2_cfg.BLOCK]
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+ self.transition1 = self._make_transition_layer([256], num_channels)
+ self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels)
+
+ self.stage3_cfg = cfg.MODEL.HRNET.STAGE3
+ num_channels = self.stage3_cfg.NUM_CHANNELS
+ block = blocks_dict[self.stage3_cfg.BLOCK]
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+ self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels)
+ self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels)
+
+ self.stage4_cfg = cfg.MODEL.HRNET.STAGE4
+ num_channels = self.stage4_cfg.NUM_CHANNELS
+ block = blocks_dict[self.stage4_cfg.BLOCK]
+ num_channels = [num_channels[i] * block.expansion for i in range(len(num_channels))]
+ self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels)
+ self.stage4, pre_stage_channels = self._make_stage(
+ self.stage4_cfg, num_channels, multi_scale_output=True
+ )
+
+ self._out_features = []
+ self._out_feature_channels = {}
+ self._out_feature_strides = {}
+
+ for i in range(cfg.MODEL.HRNET.STAGE4.NUM_BRANCHES):
+ self._out_features.append("p%d" % (i + 1))
+ self._out_feature_channels.update(
+ {self._out_features[-1]: cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS[i]}
+ )
+ self._out_feature_strides.update({self._out_features[-1]: 1})
+
+ def _get_deconv_cfg(self, deconv_kernel):
+ if deconv_kernel == 4:
+ padding = 1
+ output_padding = 0
+ elif deconv_kernel == 3:
+ padding = 1
+ output_padding = 1
+ elif deconv_kernel == 2:
+ padding = 0
+ output_padding = 0
+
+ return deconv_kernel, padding, output_padding
+
+ def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer):
+ num_branches_cur = len(num_channels_cur_layer)
+ num_branches_pre = len(num_channels_pre_layer)
+
+ transition_layers = []
+ for i in range(num_branches_cur):
+ if i < num_branches_pre:
+ if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+ transition_layers.append(
+ nn.Sequential(
+ nn.Conv2d(
+ num_channels_pre_layer[i],
+ num_channels_cur_layer[i],
+ 3,
+ 1,
+ 1,
+ bias=False,
+ ),
+ nn.BatchNorm2d(num_channels_cur_layer[i]),
+ nn.ReLU(inplace=True),
+ )
+ )
+ else:
+ transition_layers.append(None)
+ else:
+ conv3x3s = []
+ for j in range(i + 1 - num_branches_pre):
+ inchannels = num_channels_pre_layer[-1]
+ outchannels = (
+ num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels
+ )
+ conv3x3s.append(
+ nn.Sequential(
+ nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False),
+ nn.BatchNorm2d(outchannels),
+ nn.ReLU(inplace=True),
+ )
+ )
+ transition_layers.append(nn.Sequential(*conv3x3s))
+
+ return nn.ModuleList(transition_layers)
+
+ def _make_layer(self, block, planes, blocks, stride=1):
+ downsample = None
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ nn.Conv2d(
+ self.inplanes,
+ planes * block.expansion,
+ kernel_size=1,
+ stride=stride,
+ bias=False,
+ ),
+ nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+ )
+
+ layers = []
+ layers.append(block(self.inplanes, planes, stride, downsample))
+ self.inplanes = planes * block.expansion
+ for _ in range(1, blocks):
+ layers.append(block(self.inplanes, planes))
+
+ return nn.Sequential(*layers)
+
+ def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True):
+ num_modules = layer_config["NUM_MODULES"]
+ num_branches = layer_config["NUM_BRANCHES"]
+ num_blocks = layer_config["NUM_BLOCKS"]
+ num_channels = layer_config["NUM_CHANNELS"]
+ block = blocks_dict[layer_config["BLOCK"]]
+
+ modules = []
+ for i in range(num_modules):
+ # multi_scale_output is only used last module
+ if not multi_scale_output and i == num_modules - 1:
+ reset_multi_scale_output = False
+ else:
+ reset_multi_scale_output = True
+
+ modules.append(
+ HighResolutionModule(
+ num_branches,
+ block,
+ num_blocks,
+ num_inchannels,
+ num_channels,
+ reset_multi_scale_output,
+ )
+ )
+ num_inchannels = modules[-1].get_num_inchannels()
+
+ return nn.Sequential(*modules), num_inchannels
+
+ def forward(self, x):
+ x = self.conv1(x)
+ x = self.bn1(x)
+ x = self.relu(x)
+ x = self.conv2(x)
+ x = self.bn2(x)
+ x = self.relu(x)
+ x = self.layer1(x)
+
+ x_list = []
+ for i in range(self.stage2_cfg.NUM_BRANCHES):
+ if self.transition1[i] is not None:
+ x_list.append(self.transition1[i](x))
+ else:
+ x_list.append(x)
+ y_list = self.stage2(x_list)
+
+ x_list = []
+ for i in range(self.stage3_cfg.NUM_BRANCHES):
+ if self.transition2[i] is not None:
+ x_list.append(self.transition2[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage3(x_list)
+
+ x_list = []
+ for i in range(self.stage4_cfg.NUM_BRANCHES):
+ if self.transition3[i] is not None:
+ x_list.append(self.transition3[i](y_list[-1]))
+ else:
+ x_list.append(y_list[i])
+ y_list = self.stage4(x_list)
+
+ assert len(self._out_features) == len(y_list)
+ return dict(zip(self._out_features, y_list)) # final_outputs
+
+
+@BACKBONE_REGISTRY.register()
+def build_pose_hrnet_backbone(cfg, input_shape: ShapeSpec):
+ model = PoseHigherResolutionNet(cfg)
+ return model
diff --git a/densepose/modeling/inference.py b/densepose/modeling/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..a797ff9b28e61827f5553045a6147ff3390d9fe3
--- /dev/null
+++ b/densepose/modeling/inference.py
@@ -0,0 +1,46 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from dataclasses import fields
+from typing import Any, List
+import torch
+
+from detectron2.structures import Instances
+
+
+def densepose_inference(densepose_predictor_output: Any, detections: List[Instances]) -> None:
+ """
+ Splits DensePose predictor outputs into chunks, each chunk corresponds to
+ detections on one image. Predictor output chunks are stored in `pred_densepose`
+ attribute of the corresponding `Instances` object.
+
+ Args:
+ densepose_predictor_output: a dataclass instance (can be of different types,
+ depending on predictor used for inference). Each field can be `None`
+ (if the corresponding output was not inferred) or a tensor of size
+ [N, ...], where N = N_1 + N_2 + .. + N_k is a total number of
+ detections on all images, N_1 is the number of detections on image 1,
+ N_2 is the number of detections on image 2, etc.
+ detections: a list of objects of type `Instance`, k-th object corresponds
+ to detections on k-th image.
+ """
+ k = 0
+ for detection_i in detections:
+ if densepose_predictor_output is None:
+ # don't add `pred_densepose` attribute
+ continue
+ n_i = detection_i.__len__()
+
+ PredictorOutput = type(densepose_predictor_output)
+ output_i_dict = {}
+ # we assume here that `densepose_predictor_output` is a dataclass object
+ for field in fields(densepose_predictor_output):
+ field_value = getattr(densepose_predictor_output, field.name)
+ # slice tensors
+ if isinstance(field_value, torch.Tensor):
+ output_i_dict[field.name] = field_value[k : k + n_i]
+ # leave others as is
+ else:
+ output_i_dict[field.name] = field_value
+ detection_i.pred_densepose = PredictorOutput(**output_i_dict)
+ k += n_i
diff --git a/densepose/modeling/losses/__init__.py b/densepose/modeling/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..b028a23924b030e0bac4d554b61ed34f3110a798
--- /dev/null
+++ b/densepose/modeling/losses/__init__.py
@@ -0,0 +1,16 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .chart import DensePoseChartLoss
+from .chart_with_confidences import DensePoseChartWithConfidenceLoss
+from .cse import DensePoseCseLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+
+
+__all__ = [
+ "DensePoseChartLoss",
+ "DensePoseChartWithConfidenceLoss",
+ "DensePoseCseLoss",
+ "DENSEPOSE_LOSS_REGISTRY",
+]
diff --git a/densepose/modeling/losses/__pycache__/__init__.cpython-39.pyc b/densepose/modeling/losses/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b3a3ce2d26b95688e13ea6a6126f2567e1966d87
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/chart.cpython-39.pyc b/densepose/modeling/losses/__pycache__/chart.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..029fd17dbf7e3d1f385e13b45fc7527f73e4e463
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/chart.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/chart_with_confidences.cpython-39.pyc b/densepose/modeling/losses/__pycache__/chart_with_confidences.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a88b5814984b5d1f0c9eeb592cb4bc8e1d50de0e
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/chart_with_confidences.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/cse.cpython-39.pyc b/densepose/modeling/losses/__pycache__/cse.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7cda1964edaa396b5a94619f342eb6039a70ee80
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/cse.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/cycle_pix2shape.cpython-39.pyc b/densepose/modeling/losses/__pycache__/cycle_pix2shape.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3706fb0ab25036a20301e12d9502af8cf4eddb7d
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/cycle_pix2shape.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/cycle_shape2shape.cpython-39.pyc b/densepose/modeling/losses/__pycache__/cycle_shape2shape.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cdd1c216b1509bc4de0b3a6661102b59f5a77cdf
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/cycle_shape2shape.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/embed.cpython-39.pyc b/densepose/modeling/losses/__pycache__/embed.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ef99139f9886765ea30c9995c7255bd92fba7d6d
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/embed.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/embed_utils.cpython-39.pyc b/densepose/modeling/losses/__pycache__/embed_utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ca295b2a5e5f8545c8990e5729e8633aff3d2b84
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/embed_utils.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/mask.cpython-39.pyc b/densepose/modeling/losses/__pycache__/mask.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf588c5d09d0db7b4f486626761f73542edacdea
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/mask.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/mask_or_segm.cpython-39.pyc b/densepose/modeling/losses/__pycache__/mask_or_segm.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..890fd6b429bd56cea359d09e37e59cf455f705d5
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/mask_or_segm.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/registry.cpython-39.pyc b/densepose/modeling/losses/__pycache__/registry.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8bab89c7a92e32b41e8ac7448492dee61e86bc17
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/registry.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/segm.cpython-39.pyc b/densepose/modeling/losses/__pycache__/segm.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..312e345d42fd61314ece091afeca1c5f67e17ab8
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/segm.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/soft_embed.cpython-39.pyc b/densepose/modeling/losses/__pycache__/soft_embed.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f32e803597d181a739ab8ca712483e4e9e0f2af0
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/soft_embed.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/__pycache__/utils.cpython-39.pyc b/densepose/modeling/losses/__pycache__/utils.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a86ee0660ff642cc8b786af4ba756990b9c031f9
Binary files /dev/null and b/densepose/modeling/losses/__pycache__/utils.cpython-39.pyc differ
diff --git a/densepose/modeling/losses/chart.py b/densepose/modeling/losses/chart.py
new file mode 100644
index 0000000000000000000000000000000000000000..770648f3d3fddbfc553c18a3e7f5101396913593
--- /dev/null
+++ b/densepose/modeling/losses/chart.py
@@ -0,0 +1,293 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any, List
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from .mask_or_segm import MaskOrSegmentationLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+from .utils import (
+ BilinearInterpolationHelper,
+ ChartBasedAnnotationsAccumulator,
+ LossDict,
+ extract_packed_annotations_from_matches,
+)
+
+
+@DENSEPOSE_LOSS_REGISTRY.register()
+class DensePoseChartLoss:
+ """
+ DensePose loss for chart-based training. A mesh is split into charts,
+ each chart is given a label (I) and parametrized by 2 coordinates referred to
+ as U and V. Ground truth consists of a number of points annotated with
+ I, U and V values and coarse segmentation S defined for all pixels of the
+ object bounding box. In some cases (see `COARSE_SEGM_TRAINED_BY_MASKS`),
+ semantic segmentation annotations can be used as ground truth inputs as well.
+
+ Estimated values are tensors:
+ * U coordinates, tensor of shape [N, C, S, S]
+ * V coordinates, tensor of shape [N, C, S, S]
+ * fine segmentation estimates, tensor of shape [N, C, S, S] with raw unnormalized
+ scores for each fine segmentation label at each location
+ * coarse segmentation estimates, tensor of shape [N, D, S, S] with raw unnormalized
+ scores for each coarse segmentation label at each location
+ where N is the number of detections, C is the number of fine segmentation
+ labels, S is the estimate size ( = width = height) and D is the number of
+ coarse segmentation channels.
+
+ The losses are:
+ * regression (smooth L1) loss for U and V coordinates
+ * cross entropy loss for fine (I) and coarse (S) segmentations
+ Each loss has an associated weight
+ """
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Initialize chart-based loss from configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ """
+ # fmt: off
+ self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
+ self.w_points = cfg.MODEL.ROI_DENSEPOSE_HEAD.POINT_REGRESSION_WEIGHTS
+ self.w_part = cfg.MODEL.ROI_DENSEPOSE_HEAD.PART_WEIGHTS
+ self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
+ self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+ # fmt: on
+ self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+ self.segm_loss = MaskOrSegmentationLoss(cfg)
+
+ def __call__(
+ self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any, **kwargs
+ ) -> LossDict:
+ """
+ Produce chart-based DensePose losses
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated ground truth data
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
+ with estimated values; assumed to have the following attributes:
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ where N is the number of detections, C is the number of fine segmentation
+ labels, S is the estimate size ( = width = height) and D is the number of
+ coarse segmentation channels.
+
+ Return:
+ dict: str -> tensor: dict of losses with the following entries:
+ * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
+ * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
+ * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
+ segmentation estimates given ground truth labels;
+ * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
+ segmentation estimates given ground truth labels;
+ """
+ # densepose outputs are computed for all images and all bounding boxes;
+ # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+ # the outputs will have size(0) == 3+1+2+1 == 7
+
+ if not len(proposals_with_gt):
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
+
+ accumulator = ChartBasedAnnotationsAccumulator()
+ packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
+
+ # NOTE: we need to keep the same computation graph on all the GPUs to
+ # perform reduction properly. Hence even if we have no data on one
+ # of the GPUs, we still need to generate the computation graph.
+ # Add fake (zero) loss in the form Tensor.sum() * 0
+ if packed_annotations is None:
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
+
+ h, w = densepose_predictor_outputs.u.shape[2:]
+ interpolator = BilinearInterpolationHelper.from_matches(
+ packed_annotations,
+ (h, w),
+ )
+
+ j_valid_fg = interpolator.j_valid * ( # pyre-ignore[16]
+ packed_annotations.fine_segm_labels_gt > 0
+ )
+ # pyre-fixme[6]: For 1st param expected `Tensor` but got `int`.
+ if not torch.any(j_valid_fg):
+ return self.produce_fake_densepose_losses(densepose_predictor_outputs)
+
+ losses_uv = self.produce_densepose_losses_uv(
+ proposals_with_gt,
+ densepose_predictor_outputs,
+ packed_annotations,
+ interpolator,
+ j_valid_fg, # pyre-ignore[6]
+ )
+
+ losses_segm = self.produce_densepose_losses_segm(
+ proposals_with_gt,
+ densepose_predictor_outputs,
+ packed_annotations,
+ interpolator,
+ j_valid_fg, # pyre-ignore[6]
+ )
+
+ return {**losses_uv, **losses_segm}
+
+ def produce_fake_densepose_losses(self, densepose_predictor_outputs: Any) -> LossDict:
+ """
+ Fake losses for fine segmentation and U/V coordinates. These are used when
+ no suitable ground truth data was found in a batch. The loss has a value 0
+ and is primarily used to construct the computation graph, so that
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
+ reduction properly.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have the following attributes:
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ Return:
+ dict: str -> tensor: dict of losses with the following entries:
+ * `loss_densepose_U`: has value 0
+ * `loss_densepose_V`: has value 0
+ * `loss_densepose_I`: has value 0
+ * `loss_densepose_S`: has value 0
+ """
+ losses_uv = self.produce_fake_densepose_losses_uv(densepose_predictor_outputs)
+ losses_segm = self.produce_fake_densepose_losses_segm(densepose_predictor_outputs)
+ return {**losses_uv, **losses_segm}
+
+ def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
+ """
+ Fake losses for U/V coordinates. These are used when no suitable ground
+ truth data was found in a batch. The loss has a value 0
+ and is primarily used to construct the computation graph, so that
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
+ reduction properly.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have the following attributes:
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ Return:
+ dict: str -> tensor: dict of losses with the following entries:
+ * `loss_densepose_U`: has value 0
+ * `loss_densepose_V`: has value 0
+ """
+ return {
+ "loss_densepose_U": densepose_predictor_outputs.u.sum() * 0,
+ "loss_densepose_V": densepose_predictor_outputs.v.sum() * 0,
+ }
+
+ def produce_fake_densepose_losses_segm(self, densepose_predictor_outputs: Any) -> LossDict:
+ """
+ Fake losses for fine / coarse segmentation. These are used when
+ no suitable ground truth data was found in a batch. The loss has a value 0
+ and is primarily used to construct the computation graph, so that
+ `DistributedDataParallel` has similar graphs on all GPUs and can perform
+ reduction properly.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have the following attributes:
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+ Return:
+ dict: str -> tensor: dict of losses with the following entries:
+ * `loss_densepose_I`: has value 0
+ * `loss_densepose_S`: has value 0, added only if `segm_trained_by_masks` is False
+ """
+ losses = {
+ "loss_densepose_I": densepose_predictor_outputs.fine_segm.sum() * 0,
+ "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
+ }
+ return losses
+
+ def produce_densepose_losses_uv(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: Any,
+ interpolator: BilinearInterpolationHelper,
+ j_valid_fg: torch.Tensor,
+ ) -> LossDict:
+ """
+ Compute losses for U/V coordinates: smooth L1 loss between
+ estimated coordinates and the ground truth.
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated ground truth data
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have the following attributes:
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ Return:
+ dict: str -> tensor: dict of losses with the following entries:
+ * `loss_densepose_U`: smooth L1 loss for U coordinate estimates
+ * `loss_densepose_V`: smooth L1 loss for V coordinate estimates
+ """
+ u_gt = packed_annotations.u_gt[j_valid_fg]
+ u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
+ v_gt = packed_annotations.v_gt[j_valid_fg]
+ v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
+ return {
+ "loss_densepose_U": F.smooth_l1_loss(u_est, u_gt, reduction="sum") * self.w_points,
+ "loss_densepose_V": F.smooth_l1_loss(v_est, v_gt, reduction="sum") * self.w_points,
+ }
+
+ def produce_densepose_losses_segm(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: Any,
+ interpolator: BilinearInterpolationHelper,
+ j_valid_fg: torch.Tensor,
+ ) -> LossDict:
+ """
+ Losses for fine / coarse segmentation: cross-entropy
+ for segmentation unnormalized scores given ground truth labels at
+ annotated points for fine segmentation and dense mask annotations
+ for coarse segmentation.
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated ground truth data
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have the following attributes:
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+ Return:
+ dict: str -> tensor: dict of losses with the following entries:
+ * `loss_densepose_I`: cross entropy for raw unnormalized scores for fine
+ segmentation estimates given ground truth labels
+ * `loss_densepose_S`: cross entropy for raw unnormalized scores for coarse
+ segmentation estimates given ground truth labels;
+ may be included if coarse segmentation is only trained
+ using DensePose ground truth; if additional supervision through
+ instance segmentation data is performed (`segm_trained_by_masks` is True),
+ this loss is handled by `produce_mask_losses` instead
+ """
+ fine_segm_gt = packed_annotations.fine_segm_labels_gt[
+ interpolator.j_valid # pyre-ignore[16]
+ ]
+ fine_segm_est = interpolator.extract_at_points(
+ densepose_predictor_outputs.fine_segm,
+ slice_fine_segm=slice(None),
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
+ )[interpolator.j_valid, :]
+ return {
+ "loss_densepose_I": F.cross_entropy(fine_segm_est, fine_segm_gt.long()) * self.w_part,
+ "loss_densepose_S": self.segm_loss(
+ proposals_with_gt, densepose_predictor_outputs, packed_annotations
+ )
+ * self.w_segm,
+ }
diff --git a/densepose/modeling/losses/chart_with_confidences.py b/densepose/modeling/losses/chart_with_confidences.py
new file mode 100644
index 0000000000000000000000000000000000000000..d061488d7d5fb8fe0e220e7dfe3f03ea2eda7977
--- /dev/null
+++ b/densepose/modeling/losses/chart_with_confidences.py
@@ -0,0 +1,211 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import math
+from typing import Any, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from .. import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from .chart import DensePoseChartLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+from .utils import BilinearInterpolationHelper, LossDict
+
+
+@DENSEPOSE_LOSS_REGISTRY.register()
+class DensePoseChartWithConfidenceLoss(DensePoseChartLoss):
+ """ """
+
+ def __init__(self, cfg: CfgNode):
+ super().__init__(cfg)
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+ self.uv_loss_with_confidences = IIDIsotropicGaussianUVLoss(
+ self.confidence_model_cfg.uv_confidence.epsilon
+ )
+ elif self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.INDEP_ANISO:
+ self.uv_loss_with_confidences = IndepAnisotropicGaussianUVLoss(
+ self.confidence_model_cfg.uv_confidence.epsilon
+ )
+
+ def produce_fake_densepose_losses_uv(self, densepose_predictor_outputs: Any) -> LossDict:
+ """
+ Overrides fake losses for fine segmentation and U/V coordinates to
+ include computation graphs for additional confidence parameters.
+ These are used when no suitable ground truth data was found in a batch.
+ The loss has a value 0 and is primarily used to construct the computation graph,
+ so that `DistributedDataParallel` has similar graphs on all GPUs and can
+ perform reduction properly.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have the following attributes:
+ * fine_segm - fine segmentation estimates, tensor of shape [N, C, S, S]
+ * u - U coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ * v - V coordinate estimates per fine labels, tensor of shape [N, C, S, S]
+ Return:
+ dict: str -> tensor: dict of losses with the following entries:
+ * `loss_densepose_U`: has value 0
+ * `loss_densepose_V`: has value 0
+ * `loss_densepose_I`: has value 0
+ """
+ conf_type = self.confidence_model_cfg.uv_confidence.type
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ loss_uv = (
+ densepose_predictor_outputs.u.sum() + densepose_predictor_outputs.v.sum()
+ ) * 0
+ if conf_type == DensePoseUVConfidenceType.IID_ISO:
+ loss_uv += densepose_predictor_outputs.sigma_2.sum() * 0
+ elif conf_type == DensePoseUVConfidenceType.INDEP_ANISO:
+ loss_uv += (
+ densepose_predictor_outputs.sigma_2.sum()
+ + densepose_predictor_outputs.kappa_u.sum()
+ + densepose_predictor_outputs.kappa_v.sum()
+ ) * 0
+ return {"loss_densepose_UV": loss_uv}
+ else:
+ return super().produce_fake_densepose_losses_uv(densepose_predictor_outputs)
+
+ def produce_densepose_losses_uv(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: Any,
+ interpolator: BilinearInterpolationHelper,
+ j_valid_fg: torch.Tensor,
+ ) -> LossDict:
+ conf_type = self.confidence_model_cfg.uv_confidence.type
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ u_gt = packed_annotations.u_gt[j_valid_fg]
+ u_est = interpolator.extract_at_points(densepose_predictor_outputs.u)[j_valid_fg]
+ v_gt = packed_annotations.v_gt[j_valid_fg]
+ v_est = interpolator.extract_at_points(densepose_predictor_outputs.v)[j_valid_fg]
+ sigma_2_est = interpolator.extract_at_points(densepose_predictor_outputs.sigma_2)[
+ j_valid_fg
+ ]
+ if conf_type == DensePoseUVConfidenceType.IID_ISO:
+ return {
+ "loss_densepose_UV": (
+ self.uv_loss_with_confidences(u_est, v_est, sigma_2_est, u_gt, v_gt)
+ * self.w_points
+ )
+ }
+ elif conf_type in [DensePoseUVConfidenceType.INDEP_ANISO]:
+ kappa_u_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_u)[
+ j_valid_fg
+ ]
+ kappa_v_est = interpolator.extract_at_points(densepose_predictor_outputs.kappa_v)[
+ j_valid_fg
+ ]
+ return {
+ "loss_densepose_UV": (
+ self.uv_loss_with_confidences(
+ u_est, v_est, sigma_2_est, kappa_u_est, kappa_v_est, u_gt, v_gt
+ )
+ * self.w_points
+ )
+ }
+ return super().produce_densepose_losses_uv(
+ proposals_with_gt,
+ densepose_predictor_outputs,
+ packed_annotations,
+ interpolator,
+ j_valid_fg,
+ )
+
+
+class IIDIsotropicGaussianUVLoss(nn.Module):
+ """
+ Loss for the case of iid residuals with isotropic covariance:
+ $Sigma_i = sigma_i^2 I$
+ The loss (negative log likelihood) is then:
+ $1/2 sum_{i=1}^n (log(2 pi) + 2 log sigma_i^2 + ||delta_i||^2 / sigma_i^2)$,
+ where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+ difference between estimated and ground truth UV values
+ For details, see:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ """
+
+ def __init__(self, sigma_lower_bound: float):
+ super(IIDIsotropicGaussianUVLoss, self).__init__()
+ self.sigma_lower_bound = sigma_lower_bound
+ self.log2pi = math.log(2 * math.pi)
+
+ def forward(
+ self,
+ u: torch.Tensor,
+ v: torch.Tensor,
+ sigma_u: torch.Tensor,
+ target_u: torch.Tensor,
+ target_v: torch.Tensor,
+ ):
+ # compute $\sigma_i^2$
+ # use sigma_lower_bound to avoid degenerate solution for variance
+ # (sigma -> 0)
+ sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+ # compute \|delta_i\|^2
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+ delta_t_delta = (u - target_u) ** 2 + (v - target_v) ** 2
+ # the total loss from the formula above:
+ loss = 0.5 * (self.log2pi + 2 * torch.log(sigma2) + delta_t_delta / sigma2)
+ return loss.sum()
+
+
+class IndepAnisotropicGaussianUVLoss(nn.Module):
+ """
+ Loss for the case of independent residuals with anisotropic covariances:
+ $Sigma_i = sigma_i^2 I + r_i r_i^T$
+ The loss (negative log likelihood) is then:
+ $1/2 sum_{i=1}^n (log(2 pi)
+ + log sigma_i^2 (sigma_i^2 + ||r_i||^2)
+ + ||delta_i||^2 / sigma_i^2
+ - ^2 / (sigma_i^2 * (sigma_i^2 + ||r_i||^2)))$,
+ where $delta_i=(u - u', v - v')$ is a 2D vector containing UV coordinates
+ difference between estimated and ground truth UV values
+ For details, see:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ """
+
+ def __init__(self, sigma_lower_bound: float):
+ super(IndepAnisotropicGaussianUVLoss, self).__init__()
+ self.sigma_lower_bound = sigma_lower_bound
+ self.log2pi = math.log(2 * math.pi)
+
+ def forward(
+ self,
+ u: torch.Tensor,
+ v: torch.Tensor,
+ sigma_u: torch.Tensor,
+ kappa_u_est: torch.Tensor,
+ kappa_v_est: torch.Tensor,
+ target_u: torch.Tensor,
+ target_v: torch.Tensor,
+ ):
+ # compute $\sigma_i^2$
+ sigma2 = F.softplus(sigma_u) + self.sigma_lower_bound
+ # compute \|r_i\|^2
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+ r_sqnorm2 = kappa_u_est**2 + kappa_v_est**2
+ delta_u = u - target_u
+ delta_v = v - target_v
+ # compute \|delta_i\|^2
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+ delta_sqnorm = delta_u**2 + delta_v**2
+ delta_u_r_u = delta_u * kappa_u_est
+ delta_v_r_v = delta_v * kappa_v_est
+ # compute the scalar product
+ delta_r = delta_u_r_u + delta_v_r_v
+ # compute squared scalar product ^2
+ # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
+ delta_r_sqnorm = delta_r**2
+ denom2 = sigma2 * (sigma2 + r_sqnorm2)
+ loss = 0.5 * (
+ self.log2pi + torch.log(denom2) + delta_sqnorm / sigma2 - delta_r_sqnorm / denom2
+ )
+ return loss.sum()
diff --git a/densepose/modeling/losses/cse.py b/densepose/modeling/losses/cse.py
new file mode 100644
index 0000000000000000000000000000000000000000..ffe219c5474392da8048bcf409257cbfce817236
--- /dev/null
+++ b/densepose/modeling/losses/cse.py
@@ -0,0 +1,117 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from typing import Any, List
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from .cycle_pix2shape import PixToShapeCycleLoss
+from .cycle_shape2shape import ShapeToShapeCycleLoss
+from .embed import EmbeddingLoss
+from .embed_utils import CseAnnotationsAccumulator
+from .mask_or_segm import MaskOrSegmentationLoss
+from .registry import DENSEPOSE_LOSS_REGISTRY
+from .soft_embed import SoftEmbeddingLoss
+from .utils import BilinearInterpolationHelper, LossDict, extract_packed_annotations_from_matches
+
+
+@DENSEPOSE_LOSS_REGISTRY.register()
+class DensePoseCseLoss:
+ """ """
+
+ _EMBED_LOSS_REGISTRY = {
+ EmbeddingLoss.__name__: EmbeddingLoss,
+ SoftEmbeddingLoss.__name__: SoftEmbeddingLoss,
+ }
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Initialize CSE loss from configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ """
+ self.w_segm = cfg.MODEL.ROI_DENSEPOSE_HEAD.INDEX_WEIGHTS
+ self.w_embed = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_WEIGHT
+ self.segm_loss = MaskOrSegmentationLoss(cfg)
+ self.embed_loss = DensePoseCseLoss.create_embed_loss(cfg)
+ self.do_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.ENABLED
+ if self.do_shape2shape:
+ self.w_shape2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.WEIGHT
+ self.shape2shape_loss = ShapeToShapeCycleLoss(cfg)
+ self.do_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.ENABLED
+ if self.do_pix2shape:
+ self.w_pix2shape = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.WEIGHT
+ self.pix2shape_loss = PixToShapeCycleLoss(cfg)
+
+ @classmethod
+ def create_embed_loss(cls, cfg: CfgNode):
+ # registry not used here, since embedding losses are currently local
+ # and are not used anywhere else
+ return cls._EMBED_LOSS_REGISTRY[cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_LOSS_NAME](cfg)
+
+ def __call__(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ embedder: nn.Module,
+ ) -> LossDict:
+ if not len(proposals_with_gt):
+ return self.produce_fake_losses(densepose_predictor_outputs, embedder)
+ accumulator = CseAnnotationsAccumulator()
+ packed_annotations = extract_packed_annotations_from_matches(proposals_with_gt, accumulator)
+ if packed_annotations is None:
+ return self.produce_fake_losses(densepose_predictor_outputs, embedder)
+ h, w = densepose_predictor_outputs.embedding.shape[2:]
+ interpolator = BilinearInterpolationHelper.from_matches(
+ packed_annotations,
+ (h, w),
+ )
+ meshid_to_embed_losses = self.embed_loss(
+ proposals_with_gt,
+ densepose_predictor_outputs,
+ packed_annotations,
+ interpolator,
+ embedder,
+ )
+ embed_loss_dict = {
+ f"loss_densepose_E{meshid}": self.w_embed * meshid_to_embed_losses[meshid]
+ for meshid in meshid_to_embed_losses
+ }
+ all_loss_dict = {
+ "loss_densepose_S": self.w_segm
+ * self.segm_loss(proposals_with_gt, densepose_predictor_outputs, packed_annotations),
+ **embed_loss_dict,
+ }
+ if self.do_shape2shape:
+ all_loss_dict["loss_shape2shape"] = self.w_shape2shape * self.shape2shape_loss(embedder)
+ if self.do_pix2shape:
+ all_loss_dict["loss_pix2shape"] = self.w_pix2shape * self.pix2shape_loss(
+ proposals_with_gt, densepose_predictor_outputs, packed_annotations, embedder
+ )
+ return all_loss_dict
+
+ def produce_fake_losses(
+ self, densepose_predictor_outputs: Any, embedder: nn.Module
+ ) -> LossDict:
+ meshname_to_embed_losses = self.embed_loss.fake_values(
+ densepose_predictor_outputs, embedder=embedder
+ )
+ embed_loss_dict = {
+ f"loss_densepose_E{mesh_name}": meshname_to_embed_losses[mesh_name]
+ for mesh_name in meshname_to_embed_losses
+ }
+ all_loss_dict = {
+ "loss_densepose_S": self.segm_loss.fake_value(densepose_predictor_outputs),
+ **embed_loss_dict,
+ }
+ if self.do_shape2shape:
+ all_loss_dict["loss_shape2shape"] = self.shape2shape_loss.fake_value(embedder)
+ if self.do_pix2shape:
+ all_loss_dict["loss_pix2shape"] = self.pix2shape_loss.fake_value(
+ densepose_predictor_outputs, embedder
+ )
+ return all_loss_dict
diff --git a/densepose/modeling/losses/cycle_pix2shape.py b/densepose/modeling/losses/cycle_pix2shape.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4e0a94a68370a994179d9d3eb5fb0ed9ed4af39
--- /dev/null
+++ b/densepose/modeling/losses/cycle_pix2shape.py
@@ -0,0 +1,154 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from typing import Any, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from densepose.data.meshes.catalog import MeshCatalog
+from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
+
+from .embed_utils import PackedCseAnnotations
+from .mask import extract_data_for_mask_loss_from_matches
+
+
+def _create_pixel_dist_matrix(grid_size: int) -> torch.Tensor:
+ rows = torch.arange(grid_size)
+ cols = torch.arange(grid_size)
+ # at index `i` contains [row, col], where
+ # row = i // grid_size
+ # col = i % grid_size
+ pix_coords = (
+ torch.stack(torch.meshgrid(rows, cols), -1).reshape((grid_size * grid_size, 2)).float()
+ )
+ return squared_euclidean_distance_matrix(pix_coords, pix_coords)
+
+
+def _sample_fg_pixels_randperm(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
+ fg_mask_flattened = fg_mask.reshape((-1,))
+ num_pixels = int(fg_mask_flattened.sum().item())
+ fg_pixel_indices = fg_mask_flattened.nonzero(as_tuple=True)[0]
+ if (sample_size <= 0) or (num_pixels <= sample_size):
+ return fg_pixel_indices
+ sample_indices = torch.randperm(num_pixels, device=fg_mask.device)[:sample_size]
+ return fg_pixel_indices[sample_indices]
+
+
+def _sample_fg_pixels_multinomial(fg_mask: torch.Tensor, sample_size: int) -> torch.Tensor:
+ fg_mask_flattened = fg_mask.reshape((-1,))
+ num_pixels = int(fg_mask_flattened.sum().item())
+ if (sample_size <= 0) or (num_pixels <= sample_size):
+ return fg_mask_flattened.nonzero(as_tuple=True)[0]
+ return fg_mask_flattened.float().multinomial(sample_size, replacement=False)
+
+
+class PixToShapeCycleLoss(nn.Module):
+ """
+ Cycle loss for pixel-vertex correspondence
+ """
+
+ def __init__(self, cfg: CfgNode):
+ super().__init__()
+ self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
+ self.embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
+ self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NORM_P
+ self.use_all_meshes_not_gt_only = (
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.USE_ALL_MESHES_NOT_GT_ONLY
+ )
+ self.num_pixels_to_sample = (
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.NUM_PIXELS_TO_SAMPLE
+ )
+ self.pix_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.PIXEL_SIGMA
+ self.temperature_pix_to_vertex = (
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_PIXEL_TO_VERTEX
+ )
+ self.temperature_vertex_to_pix = (
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.PIX_TO_SHAPE_CYCLE_LOSS.TEMPERATURE_VERTEX_TO_PIXEL
+ )
+ self.pixel_dists = _create_pixel_dist_matrix(cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE)
+
+ def forward(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: PackedCseAnnotations,
+ embedder: nn.Module,
+ ):
+ """
+ Args:
+ proposals_with_gt (list of Instances): detections with associated
+ ground truth data; each item corresponds to instances detected
+ on 1 image; the number of items corresponds to the number of
+ images in a batch
+ densepose_predictor_outputs: an object of a dataclass that contains predictor
+ outputs with estimated values; assumed to have the following attributes:
+ * embedding - embedding estimates, tensor of shape [N, D, S, S], where
+ N = number of instances (= sum N_i, where N_i is the number of
+ instances on image i)
+ D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
+ S = output size (width and height)
+ packed_annotations (PackedCseAnnotations): contains various data useful
+ for loss computation, each data is packed into a single tensor
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
+ """
+ pix_embeds = densepose_predictor_outputs.embedding
+ if self.pixel_dists.device != pix_embeds.device:
+ # should normally be done only once
+ self.pixel_dists = self.pixel_dists.to(device=pix_embeds.device)
+ with torch.no_grad():
+ mask_loss_data = extract_data_for_mask_loss_from_matches(
+ proposals_with_gt, densepose_predictor_outputs.coarse_segm
+ )
+ # GT masks - tensor of shape [N, S, S] of int64
+ masks_gt = mask_loss_data.masks_gt.long() # pyre-ignore[16]
+ assert len(pix_embeds) == len(masks_gt), (
+ f"Number of instances with embeddings {len(pix_embeds)} != "
+ f"number of instances with GT masks {len(masks_gt)}"
+ )
+ losses = []
+ mesh_names = (
+ self.shape_names
+ if self.use_all_meshes_not_gt_only
+ else [
+ MeshCatalog.get_mesh_name(mesh_id.item())
+ for mesh_id in packed_annotations.vertex_mesh_ids_gt.unique()
+ ]
+ )
+ for pixel_embeddings, mask_gt in zip(pix_embeds, masks_gt):
+ # pixel_embeddings [D, S, S]
+ # mask_gt [S, S]
+ for mesh_name in mesh_names:
+ mesh_vertex_embeddings = embedder(mesh_name)
+ # pixel indices [M]
+ pixel_indices_flattened = _sample_fg_pixels_randperm(
+ mask_gt, self.num_pixels_to_sample
+ )
+ # pixel distances [M, M]
+ pixel_dists = self.pixel_dists.to(pixel_embeddings.device)[
+ torch.meshgrid(pixel_indices_flattened, pixel_indices_flattened)
+ ]
+ # pixel embeddings [M, D]
+ pixel_embeddings_sampled = normalize_embeddings(
+ pixel_embeddings.reshape((self.embed_size, -1))[:, pixel_indices_flattened].T
+ )
+ # pixel-vertex similarity [M, K]
+ sim_matrix = pixel_embeddings_sampled.mm(mesh_vertex_embeddings.T)
+ c_pix_vertex = F.softmax(sim_matrix / self.temperature_pix_to_vertex, dim=1)
+ c_vertex_pix = F.softmax(sim_matrix.T / self.temperature_vertex_to_pix, dim=1)
+ c_cycle = c_pix_vertex.mm(c_vertex_pix)
+ loss_cycle = torch.norm(pixel_dists * c_cycle, p=self.norm_p)
+ losses.append(loss_cycle)
+
+ if len(losses) == 0:
+ return pix_embeds.sum() * 0
+ return torch.stack(losses, dim=0).mean()
+
+ def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module):
+ losses = [embedder(mesh_name).sum() * 0 for mesh_name in embedder.mesh_names]
+ losses.append(densepose_predictor_outputs.embedding.sum() * 0)
+ return torch.mean(torch.stack(losses))
diff --git a/densepose/modeling/losses/cycle_shape2shape.py b/densepose/modeling/losses/cycle_shape2shape.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6e2ca89a39f391eadc915154964f82d6ddecdd4
--- /dev/null
+++ b/densepose/modeling/losses/cycle_shape2shape.py
@@ -0,0 +1,119 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import random
+from typing import Tuple
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+
+from densepose.structures.mesh import create_mesh
+
+from .utils import sample_random_indices
+
+
+class ShapeToShapeCycleLoss(nn.Module):
+ """
+ Cycle Loss for Shapes.
+ Inspired by:
+ "Mapping in a Cycle: Sinkhorn Regularized Unsupervised Learning for Point Cloud Shapes".
+ """
+
+ def __init__(self, cfg: CfgNode):
+ super().__init__()
+ self.shape_names = list(cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDERS.keys())
+ self.all_shape_pairs = [
+ (x, y) for i, x in enumerate(self.shape_names) for y in self.shape_names[i + 1 :]
+ ]
+ random.shuffle(self.all_shape_pairs)
+ self.cur_pos = 0
+ self.norm_p = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.NORM_P
+ self.temperature = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.TEMPERATURE
+ self.max_num_vertices = (
+ cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.SHAPE_TO_SHAPE_CYCLE_LOSS.MAX_NUM_VERTICES
+ )
+
+ def _sample_random_pair(self) -> Tuple[str, str]:
+ """
+ Produce a random pair of different mesh names
+
+ Return:
+ tuple(str, str): a pair of different mesh names
+ """
+ if self.cur_pos >= len(self.all_shape_pairs):
+ random.shuffle(self.all_shape_pairs)
+ self.cur_pos = 0
+ shape_pair = self.all_shape_pairs[self.cur_pos]
+ self.cur_pos += 1
+ return shape_pair
+
+ def forward(self, embedder: nn.Module):
+ """
+ Do a forward pass with a random pair (src, dst) pair of shapes
+ Args:
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
+ """
+ src_mesh_name, dst_mesh_name = self._sample_random_pair()
+ return self._forward_one_pair(embedder, src_mesh_name, dst_mesh_name)
+
+ def fake_value(self, embedder: nn.Module):
+ losses = []
+ for mesh_name in embedder.mesh_names:
+ losses.append(embedder(mesh_name).sum() * 0)
+ return torch.mean(torch.stack(losses))
+
+ def _get_embeddings_and_geodists_for_mesh(
+ self, embedder: nn.Module, mesh_name: str
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
+ """
+ Produces embeddings and geodesic distance tensors for a given mesh. May subsample
+ the mesh, if it contains too many vertices (controlled by
+ SHAPE_CYCLE_LOSS_MAX_NUM_VERTICES parameter).
+ Args:
+ embedder (nn.Module): module that computes embeddings for mesh vertices
+ mesh_name (str): mesh name
+ Return:
+ embeddings (torch.Tensor of size [N, D]): embeddings for selected mesh
+ vertices (N = number of selected vertices, D = embedding space dim)
+ geodists (torch.Tensor of size [N, N]): geodesic distances for the selected
+ mesh vertices (N = number of selected vertices)
+ """
+ embeddings = embedder(mesh_name)
+ indices = sample_random_indices(
+ embeddings.shape[0], self.max_num_vertices, embeddings.device
+ )
+ mesh = create_mesh(mesh_name, embeddings.device)
+ geodists = mesh.geodists
+ if indices is not None:
+ embeddings = embeddings[indices]
+ geodists = geodists[torch.meshgrid(indices, indices)]
+ return embeddings, geodists
+
+ def _forward_one_pair(
+ self, embedder: nn.Module, mesh_name_1: str, mesh_name_2: str
+ ) -> torch.Tensor:
+ """
+ Do a forward pass with a selected pair of meshes
+ Args:
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
+ mesh_name_1 (str): first mesh name
+ mesh_name_2 (str): second mesh name
+ Return:
+ Tensor containing the loss value
+ """
+ embeddings_1, geodists_1 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_1)
+ embeddings_2, geodists_2 = self._get_embeddings_and_geodists_for_mesh(embedder, mesh_name_2)
+ sim_matrix_12 = embeddings_1.mm(embeddings_2.T)
+
+ c_12 = F.softmax(sim_matrix_12 / self.temperature, dim=1)
+ c_21 = F.softmax(sim_matrix_12.T / self.temperature, dim=1)
+ c_11 = c_12.mm(c_21)
+ c_22 = c_21.mm(c_12)
+
+ loss_cycle_11 = torch.norm(geodists_1 * c_11, p=self.norm_p)
+ loss_cycle_22 = torch.norm(geodists_2 * c_22, p=self.norm_p)
+
+ return loss_cycle_11 + loss_cycle_22
diff --git a/densepose/modeling/losses/embed.py b/densepose/modeling/losses/embed.py
new file mode 100644
index 0000000000000000000000000000000000000000..2fc8a16a478649847a6ce9200004eb4da64bb01e
--- /dev/null
+++ b/densepose/modeling/losses/embed.py
@@ -0,0 +1,121 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from typing import Any, Dict, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from densepose.data.meshes.catalog import MeshCatalog
+from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
+
+from .embed_utils import PackedCseAnnotations
+from .utils import BilinearInterpolationHelper
+
+
+class EmbeddingLoss:
+ """
+ Computes losses for estimated embeddings given annotated vertices.
+ Instances in a minibatch that correspond to the same mesh are grouped
+ together. For each group, loss is computed as cross-entropy for
+ unnormalized scores given ground truth mesh vertex ids.
+ Scores are based on squared distances between estimated vertex embeddings
+ and mesh vertex embeddings.
+ """
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Initialize embedding loss from config
+ """
+ self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
+
+ def __call__(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: PackedCseAnnotations,
+ interpolator: BilinearInterpolationHelper,
+ embedder: nn.Module,
+ ) -> Dict[int, torch.Tensor]:
+ """
+ Produces losses for estimated embeddings given annotated vertices.
+ Embeddings for all the vertices of a mesh are computed by the embedder.
+ Embeddings for observed pixels are estimated by a predictor.
+ Losses are computed as cross-entropy for squared distances between
+ observed vertex embeddings and all mesh vertex embeddings given
+ ground truth vertex IDs.
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated
+ ground truth data; each item corresponds to instances detected
+ on 1 image; the number of items corresponds to the number of
+ images in a batch
+ densepose_predictor_outputs: an object of a dataclass that contains predictor
+ outputs with estimated values; assumed to have the following attributes:
+ * embedding - embedding estimates, tensor of shape [N, D, S, S], where
+ N = number of instances (= sum N_i, where N_i is the number of
+ instances on image i)
+ D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
+ S = output size (width and height)
+ packed_annotations (PackedCseAnnotations): contains various data useful
+ for loss computation, each data is packed into a single tensor
+ interpolator (BilinearInterpolationHelper): bilinear interpolation helper
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
+ Return:
+ dict(int -> tensor): losses for different mesh IDs
+ """
+ losses = {}
+ for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
+ mesh_id = mesh_id_tensor.item()
+ mesh_name = MeshCatalog.get_mesh_name(mesh_id)
+ # valid points are those that fall into estimated bbox
+ # and correspond to the current mesh
+ j_valid = interpolator.j_valid * ( # pyre-ignore[16]
+ packed_annotations.vertex_mesh_ids_gt == mesh_id
+ )
+ if not torch.any(j_valid):
+ continue
+ # extract estimated embeddings for valid points
+ # -> tensor [J, D]
+ vertex_embeddings_i = normalize_embeddings(
+ interpolator.extract_at_points(
+ densepose_predictor_outputs.embedding,
+ slice_fine_segm=slice(None),
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
+ )[j_valid, :]
+ )
+ # extract vertex ids for valid points
+ # -> tensor [J]
+ vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
+ # embeddings for all mesh vertices
+ # -> tensor [K, D]
+ mesh_vertex_embeddings = embedder(mesh_name)
+ # unnormalized scores for valid points
+ # -> tensor [J, K]
+ scores = squared_euclidean_distance_matrix(
+ vertex_embeddings_i, mesh_vertex_embeddings
+ ) / (-self.embdist_gauss_sigma)
+ losses[mesh_name] = F.cross_entropy(scores, vertex_indices_i, ignore_index=-1)
+
+ for mesh_name in embedder.mesh_names:
+ if mesh_name not in losses:
+ losses[mesh_name] = self.fake_value(
+ densepose_predictor_outputs, embedder, mesh_name
+ )
+ return losses
+
+ def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
+ losses = {}
+ for mesh_name in embedder.mesh_names:
+ losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
+ return losses
+
+ def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
+ return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0
diff --git a/densepose/modeling/losses/embed_utils.py b/densepose/modeling/losses/embed_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..92210f002c0c181c4893a9115e84aaaad512f8e3
--- /dev/null
+++ b/densepose/modeling/losses/embed_utils.py
@@ -0,0 +1,139 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from dataclasses import dataclass
+from typing import Any, Optional
+import torch
+
+from detectron2.structures import BoxMode, Instances
+
+from .utils import AnnotationsAccumulator
+
+
+@dataclass
+class PackedCseAnnotations:
+ x_gt: torch.Tensor
+ y_gt: torch.Tensor
+ coarse_segm_gt: Optional[torch.Tensor]
+ vertex_mesh_ids_gt: torch.Tensor
+ vertex_ids_gt: torch.Tensor
+ bbox_xywh_gt: torch.Tensor
+ bbox_xywh_est: torch.Tensor
+ point_bbox_with_dp_indices: torch.Tensor
+ point_bbox_indices: torch.Tensor
+ bbox_indices: torch.Tensor
+
+
+class CseAnnotationsAccumulator(AnnotationsAccumulator):
+ """
+ Accumulates annotations by batches that correspond to objects detected on
+ individual images. Can pack them together into single tensors.
+ """
+
+ def __init__(self):
+ self.x_gt = []
+ self.y_gt = []
+ self.s_gt = []
+ self.vertex_mesh_ids_gt = []
+ self.vertex_ids_gt = []
+ self.bbox_xywh_gt = []
+ self.bbox_xywh_est = []
+ self.point_bbox_with_dp_indices = []
+ self.point_bbox_indices = []
+ self.bbox_indices = []
+ self.nxt_bbox_with_dp_index = 0
+ self.nxt_bbox_index = 0
+
+ def accumulate(self, instances_one_image: Instances):
+ """
+ Accumulate instances data for one image
+
+ Args:
+ instances_one_image (Instances): instances data to accumulate
+ """
+ boxes_xywh_est = BoxMode.convert(
+ instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+ )
+ boxes_xywh_gt = BoxMode.convert(
+ instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+ )
+ n_matches = len(boxes_xywh_gt)
+ assert n_matches == len(
+ boxes_xywh_est
+ ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
+ if not n_matches:
+ # no detection - GT matches
+ return
+ if (
+ not hasattr(instances_one_image, "gt_densepose")
+ or instances_one_image.gt_densepose is None
+ ):
+ # no densepose GT for the detections, just increase the bbox index
+ self.nxt_bbox_index += n_matches
+ return
+ for box_xywh_est, box_xywh_gt, dp_gt in zip(
+ boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
+ ):
+ if (dp_gt is not None) and (len(dp_gt.x) > 0):
+ # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
+ # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
+ self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
+ self.nxt_bbox_index += 1
+
+ def _do_accumulate(self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: Any):
+ """
+ Accumulate instances data for one image, given that the data is not empty
+
+ Args:
+ box_xywh_gt (tensor): GT bounding box
+ box_xywh_est (tensor): estimated bounding box
+ dp_gt: GT densepose data with the following attributes:
+ - x: normalized X coordinates
+ - y: normalized Y coordinates
+ - segm: tensor of size [S, S] with coarse segmentation
+ -
+ """
+ self.x_gt.append(dp_gt.x)
+ self.y_gt.append(dp_gt.y)
+ if hasattr(dp_gt, "segm"):
+ self.s_gt.append(dp_gt.segm.unsqueeze(0))
+ self.vertex_ids_gt.append(dp_gt.vertex_ids)
+ self.vertex_mesh_ids_gt.append(torch.full_like(dp_gt.vertex_ids, dp_gt.mesh_id))
+ self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
+ self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
+ self.point_bbox_with_dp_indices.append(
+ torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_with_dp_index)
+ )
+ self.point_bbox_indices.append(torch.full_like(dp_gt.vertex_ids, self.nxt_bbox_index))
+ self.bbox_indices.append(self.nxt_bbox_index)
+ self.nxt_bbox_with_dp_index += 1
+
+ def pack(self) -> Optional[PackedCseAnnotations]:
+ """
+ Pack data into tensors
+ """
+ if not len(self.x_gt):
+ # TODO:
+ # returning proper empty annotations would require
+ # creating empty tensors of appropriate shape and
+ # type on an appropriate device;
+ # we return None so far to indicate empty annotations
+ return None
+ return PackedCseAnnotations(
+ x_gt=torch.cat(self.x_gt, 0),
+ y_gt=torch.cat(self.y_gt, 0),
+ vertex_mesh_ids_gt=torch.cat(self.vertex_mesh_ids_gt, 0),
+ vertex_ids_gt=torch.cat(self.vertex_ids_gt, 0),
+ # ignore segmentation annotations, if not all the instances contain those
+ coarse_segm_gt=(
+ torch.cat(self.s_gt, 0) if len(self.s_gt) == len(self.bbox_xywh_gt) else None
+ ),
+ bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
+ bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
+ point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0),
+ point_bbox_indices=torch.cat(self.point_bbox_indices, 0),
+ bbox_indices=torch.as_tensor(
+ self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
+ ),
+ )
diff --git a/densepose/modeling/losses/mask.py b/densepose/modeling/losses/mask.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f8f75a425d288e1167eaf8cb48e4dc0f851ff45
--- /dev/null
+++ b/densepose/modeling/losses/mask.py
@@ -0,0 +1,127 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from dataclasses import dataclass
+from typing import Any, Iterable, List, Optional
+import torch
+from torch.nn import functional as F
+
+from detectron2.structures import Instances
+
+
+@dataclass
+class DataForMaskLoss:
+ """
+ Contains mask GT and estimated data for proposals from multiple images:
+ """
+
+ # tensor of size (K, H, W) containing GT labels
+ masks_gt: Optional[torch.Tensor] = None
+ # tensor of size (K, C, H, W) containing estimated scores
+ masks_est: Optional[torch.Tensor] = None
+
+
+def extract_data_for_mask_loss_from_matches(
+ proposals_targets: Iterable[Instances], estimated_segm: torch.Tensor
+) -> DataForMaskLoss:
+ """
+ Extract data for mask loss from instances that contain matched GT and
+ estimated bounding boxes.
+ Args:
+ proposals_targets: Iterable[Instances]
+ matched GT and estimated results, each item in the iterable
+ corresponds to data in 1 image
+ estimated_segm: tensor(K, C, S, S) of float - raw unnormalized
+ segmentation scores, here S is the size to which GT masks are
+ to be resized
+ Return:
+ masks_est: tensor(K, C, S, S) of float - class scores
+ masks_gt: tensor(K, S, S) of int64 - labels
+ """
+ data = DataForMaskLoss()
+ masks_gt = []
+ offset = 0
+ assert estimated_segm.shape[2] == estimated_segm.shape[3], (
+ f"Expected estimated segmentation to have a square shape, "
+ f"but the actual shape is {estimated_segm.shape[2:]}"
+ )
+ mask_size = estimated_segm.shape[2]
+ num_proposals = sum(inst.proposal_boxes.tensor.size(0) for inst in proposals_targets)
+ num_estimated = estimated_segm.shape[0]
+ assert (
+ num_proposals == num_estimated
+ ), "The number of proposals {} must be equal to the number of estimates {}".format(
+ num_proposals, num_estimated
+ )
+
+ for proposals_targets_per_image in proposals_targets:
+ n_i = proposals_targets_per_image.proposal_boxes.tensor.size(0)
+ if not n_i:
+ continue
+ gt_masks_per_image = proposals_targets_per_image.gt_masks.crop_and_resize(
+ proposals_targets_per_image.proposal_boxes.tensor, mask_size
+ ).to(device=estimated_segm.device)
+ masks_gt.append(gt_masks_per_image)
+ offset += n_i
+ if masks_gt:
+ data.masks_est = estimated_segm
+ data.masks_gt = torch.cat(masks_gt, dim=0)
+ return data
+
+
+class MaskLoss:
+ """
+ Mask loss as cross-entropy for raw unnormalized scores given ground truth labels.
+ Mask ground truth labels are defined for the whole image and not only the
+ bounding box of interest. They are stored as objects that are assumed to implement
+ the `crop_and_resize` interface (e.g. BitMasks, PolygonMasks).
+ """
+
+ def __call__(
+ self, proposals_with_gt: List[Instances], densepose_predictor_outputs: Any
+ ) -> torch.Tensor:
+ """
+ Computes segmentation loss as cross-entropy for raw unnormalized
+ scores given ground truth labels.
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated ground truth data
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
+ with estimated values; assumed to have the following attribute:
+ * coarse_segm (tensor of shape [N, D, S, S]): coarse segmentation estimates
+ as raw unnormalized scores
+ where N is the number of detections, S is the estimate size ( = width = height)
+ and D is the number of coarse segmentation channels.
+ Return:
+ Cross entropy for raw unnormalized scores for coarse segmentation given
+ ground truth labels from masks
+ """
+ if not len(proposals_with_gt):
+ return self.fake_value(densepose_predictor_outputs)
+ # densepose outputs are computed for all images and all bounding boxes;
+ # i.e. if a batch has 4 images with (3, 1, 2, 1) proposals respectively,
+ # the outputs will have size(0) == 3+1+2+1 == 7
+ with torch.no_grad():
+ mask_loss_data = extract_data_for_mask_loss_from_matches(
+ proposals_with_gt, densepose_predictor_outputs.coarse_segm
+ )
+ if (mask_loss_data.masks_gt is None) or (mask_loss_data.masks_est is None):
+ return self.fake_value(densepose_predictor_outputs)
+ return F.cross_entropy(mask_loss_data.masks_est, mask_loss_data.masks_gt.long())
+
+ def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
+ """
+ Fake segmentation loss used when no suitable ground truth data
+ was found in a batch. The loss has a value 0 and is primarily used to
+ construct the computation graph, so that `DistributedDataParallel`
+ has similar graphs on all GPUs and can perform reduction properly.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have `coarse_segm`
+ attribute
+ Return:
+ Zero value loss with proper computation graph
+ """
+ return densepose_predictor_outputs.coarse_segm.sum() * 0
diff --git a/densepose/modeling/losses/mask_or_segm.py b/densepose/modeling/losses/mask_or_segm.py
new file mode 100644
index 0000000000000000000000000000000000000000..350a2ebf81b13839c3a16545984c05c1aa68f5bf
--- /dev/null
+++ b/densepose/modeling/losses/mask_or_segm.py
@@ -0,0 +1,74 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from typing import Any, List
+import torch
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from .mask import MaskLoss
+from .segm import SegmentationLoss
+
+
+class MaskOrSegmentationLoss:
+ """
+ Mask or segmentation loss as cross-entropy for raw unnormalized scores
+ given ground truth labels. Ground truth labels are either defined by coarse
+ segmentation annotation, or by mask annotation, depending on the config
+ value MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+ """
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Initialize segmentation loss from configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ """
+ self.segm_trained_by_masks = cfg.MODEL.ROI_DENSEPOSE_HEAD.COARSE_SEGM_TRAINED_BY_MASKS
+ if self.segm_trained_by_masks:
+ self.mask_loss = MaskLoss()
+ self.segm_loss = SegmentationLoss(cfg)
+
+ def __call__(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: Any,
+ ) -> torch.Tensor:
+ """
+ Compute segmentation loss as cross-entropy between aligned unnormalized
+ score estimates and ground truth; with ground truth given
+ either by masks, or by coarse segmentation annotations.
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated ground truth data
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
+ with estimated values; assumed to have the following attributes:
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+ packed_annotations: packed annotations for efficient loss computation
+ Return:
+ tensor: loss value as cross-entropy for raw unnormalized scores
+ given ground truth labels
+ """
+ if self.segm_trained_by_masks:
+ return self.mask_loss(proposals_with_gt, densepose_predictor_outputs)
+ return self.segm_loss(proposals_with_gt, densepose_predictor_outputs, packed_annotations)
+
+ def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
+ """
+ Fake segmentation loss used when no suitable ground truth data
+ was found in a batch. The loss has a value 0 and is primarily used to
+ construct the computation graph, so that `DistributedDataParallel`
+ has similar graphs on all GPUs and can perform reduction properly.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have `coarse_segm`
+ attribute
+ Return:
+ Zero value loss with proper computation graph
+ """
+ return densepose_predictor_outputs.coarse_segm.sum() * 0
diff --git a/densepose/modeling/losses/registry.py b/densepose/modeling/losses/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e8db8e82343abd352482e3d740a6922a1e12ac5
--- /dev/null
+++ b/densepose/modeling/losses/registry.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from detectron2.utils.registry import Registry
+
+DENSEPOSE_LOSS_REGISTRY = Registry("DENSEPOSE_LOSS")
diff --git a/densepose/modeling/losses/segm.py b/densepose/modeling/losses/segm.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd04d129c1d05ee0f3273bc7256a60cf7cbe64b9
--- /dev/null
+++ b/densepose/modeling/losses/segm.py
@@ -0,0 +1,85 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from typing import Any, List
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from .utils import resample_data
+
+
+class SegmentationLoss:
+ """
+ Segmentation loss as cross-entropy for raw unnormalized scores given ground truth
+ labels. Segmentation ground truth labels are defined for the bounding box of
+ interest at some fixed resolution [S, S], where
+ S = MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE.
+ """
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Initialize segmentation loss from configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ """
+ self.heatmap_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.HEATMAP_SIZE
+ self.n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+
+ def __call__(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: Any,
+ ) -> torch.Tensor:
+ """
+ Compute segmentation loss as cross-entropy on aligned segmentation
+ ground truth and estimated scores.
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated ground truth data
+ densepose_predictor_outputs: an object of a dataclass that contains predictor outputs
+ with estimated values; assumed to have the following attributes:
+ * coarse_segm - coarse segmentation estimates, tensor of shape [N, D, S, S]
+ packed_annotations: packed annotations for efficient loss computation;
+ the following attributes are used:
+ - coarse_segm_gt
+ - bbox_xywh_gt
+ - bbox_xywh_est
+ """
+ if packed_annotations.coarse_segm_gt is None:
+ return self.fake_value(densepose_predictor_outputs)
+ coarse_segm_est = densepose_predictor_outputs.coarse_segm[packed_annotations.bbox_indices]
+ with torch.no_grad():
+ coarse_segm_gt = resample_data(
+ packed_annotations.coarse_segm_gt.unsqueeze(1),
+ packed_annotations.bbox_xywh_gt,
+ packed_annotations.bbox_xywh_est,
+ self.heatmap_size,
+ self.heatmap_size,
+ mode="nearest",
+ padding_mode="zeros",
+ ).squeeze(1)
+ if self.n_segm_chan == 2:
+ coarse_segm_gt = coarse_segm_gt > 0
+ return F.cross_entropy(coarse_segm_est, coarse_segm_gt.long())
+
+ def fake_value(self, densepose_predictor_outputs: Any) -> torch.Tensor:
+ """
+ Fake segmentation loss used when no suitable ground truth data
+ was found in a batch. The loss has a value 0 and is primarily used to
+ construct the computation graph, so that `DistributedDataParallel`
+ has similar graphs on all GPUs and can perform reduction properly.
+
+ Args:
+ densepose_predictor_outputs: DensePose predictor outputs, an object
+ of a dataclass that is assumed to have `coarse_segm`
+ attribute
+ Return:
+ Zero value loss with proper computation graph
+ """
+ return densepose_predictor_outputs.coarse_segm.sum() * 0
diff --git a/densepose/modeling/losses/soft_embed.py b/densepose/modeling/losses/soft_embed.py
new file mode 100644
index 0000000000000000000000000000000000000000..f746d67a75738c9d38f84830f59f72da55a99280
--- /dev/null
+++ b/densepose/modeling/losses/soft_embed.py
@@ -0,0 +1,135 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from typing import Any, Dict, List
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.structures import Instances
+
+from densepose.data.meshes.catalog import MeshCatalog
+from densepose.modeling.cse.utils import normalize_embeddings, squared_euclidean_distance_matrix
+from densepose.structures.mesh import create_mesh
+
+from .embed_utils import PackedCseAnnotations
+from .utils import BilinearInterpolationHelper
+
+
+class SoftEmbeddingLoss:
+ """
+ Computes losses for estimated embeddings given annotated vertices.
+ Instances in a minibatch that correspond to the same mesh are grouped
+ together. For each group, loss is computed as cross-entropy for
+ unnormalized scores given ground truth mesh vertex ids.
+ Scores are based on:
+ 1) squared distances between estimated vertex embeddings
+ and mesh vertex embeddings;
+ 2) geodesic distances between vertices of a mesh
+ """
+
+ def __init__(self, cfg: CfgNode):
+ """
+ Initialize embedding loss from config
+ """
+ self.embdist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBEDDING_DIST_GAUSS_SIGMA
+ self.geodist_gauss_sigma = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.GEODESIC_DIST_GAUSS_SIGMA
+
+ def __call__(
+ self,
+ proposals_with_gt: List[Instances],
+ densepose_predictor_outputs: Any,
+ packed_annotations: PackedCseAnnotations,
+ interpolator: BilinearInterpolationHelper,
+ embedder: nn.Module,
+ ) -> Dict[int, torch.Tensor]:
+ """
+ Produces losses for estimated embeddings given annotated vertices.
+ Embeddings for all the vertices of a mesh are computed by the embedder.
+ Embeddings for observed pixels are estimated by a predictor.
+ Losses are computed as cross-entropy for unnormalized scores given
+ ground truth vertex IDs.
+ 1) squared distances between estimated vertex embeddings
+ and mesh vertex embeddings;
+ 2) geodesic distances between vertices of a mesh
+
+ Args:
+ proposals_with_gt (list of Instances): detections with associated
+ ground truth data; each item corresponds to instances detected
+ on 1 image; the number of items corresponds to the number of
+ images in a batch
+ densepose_predictor_outputs: an object of a dataclass that contains predictor
+ outputs with estimated values; assumed to have the following attributes:
+ * embedding - embedding estimates, tensor of shape [N, D, S, S], where
+ N = number of instances (= sum N_i, where N_i is the number of
+ instances on image i)
+ D = embedding space dimensionality (MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE)
+ S = output size (width and height)
+ packed_annotations (PackedCseAnnotations): contains various data useful
+ for loss computation, each data is packed into a single tensor
+ interpolator (BilinearInterpolationHelper): bilinear interpolation helper
+ embedder (nn.Module): module that computes vertex embeddings for different meshes
+ Return:
+ dict(int -> tensor): losses for different mesh IDs
+ """
+ losses = {}
+ for mesh_id_tensor in packed_annotations.vertex_mesh_ids_gt.unique():
+ mesh_id = mesh_id_tensor.item()
+ mesh_name = MeshCatalog.get_mesh_name(mesh_id)
+ # valid points are those that fall into estimated bbox
+ # and correspond to the current mesh
+ j_valid = interpolator.j_valid * ( # pyre-ignore[16]
+ packed_annotations.vertex_mesh_ids_gt == mesh_id
+ )
+ if not torch.any(j_valid):
+ continue
+ # extract estimated embeddings for valid points
+ # -> tensor [J, D]
+ vertex_embeddings_i = normalize_embeddings(
+ interpolator.extract_at_points(
+ densepose_predictor_outputs.embedding,
+ slice_fine_segm=slice(None),
+ w_ylo_xlo=interpolator.w_ylo_xlo[:, None], # pyre-ignore[16]
+ w_ylo_xhi=interpolator.w_ylo_xhi[:, None], # pyre-ignore[16]
+ w_yhi_xlo=interpolator.w_yhi_xlo[:, None], # pyre-ignore[16]
+ w_yhi_xhi=interpolator.w_yhi_xhi[:, None], # pyre-ignore[16]
+ )[j_valid, :]
+ )
+ # extract vertex ids for valid points
+ # -> tensor [J]
+ vertex_indices_i = packed_annotations.vertex_ids_gt[j_valid]
+ # embeddings for all mesh vertices
+ # -> tensor [K, D]
+ mesh_vertex_embeddings = embedder(mesh_name)
+ # softmax values of geodesic distances for GT mesh vertices
+ # -> tensor [J, K]
+ mesh = create_mesh(mesh_name, mesh_vertex_embeddings.device)
+ geodist_softmax_values = F.softmax(
+ mesh.geodists[vertex_indices_i] / (-self.geodist_gauss_sigma), dim=1
+ )
+ # logsoftmax values for valid points
+ # -> tensor [J, K]
+ embdist_logsoftmax_values = F.log_softmax(
+ squared_euclidean_distance_matrix(vertex_embeddings_i, mesh_vertex_embeddings)
+ / (-self.embdist_gauss_sigma),
+ dim=1,
+ )
+ losses[mesh_name] = (-geodist_softmax_values * embdist_logsoftmax_values).sum(1).mean()
+
+ for mesh_name in embedder.mesh_names:
+ if mesh_name not in losses:
+ losses[mesh_name] = self.fake_value(
+ densepose_predictor_outputs, embedder, mesh_name
+ )
+ return losses
+
+ def fake_values(self, densepose_predictor_outputs: Any, embedder: nn.Module):
+ losses = {}
+ for mesh_name in embedder.mesh_names:
+ losses[mesh_name] = self.fake_value(densepose_predictor_outputs, embedder, mesh_name)
+ return losses
+
+ def fake_value(self, densepose_predictor_outputs: Any, embedder: nn.Module, mesh_name: str):
+ return densepose_predictor_outputs.embedding.sum() * 0 + embedder(mesh_name).sum() * 0
diff --git a/densepose/modeling/losses/utils.py b/densepose/modeling/losses/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..f865798760c798c814b4c12eb9c185a13fba7146
--- /dev/null
+++ b/densepose/modeling/losses/utils.py
@@ -0,0 +1,445 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional, Tuple
+import torch
+from torch.nn import functional as F
+
+from detectron2.structures import BoxMode, Instances
+
+from densepose import DensePoseDataRelative
+
+LossDict = Dict[str, torch.Tensor]
+
+
+def _linear_interpolation_utilities(v_norm, v0_src, size_src, v0_dst, size_dst, size_z):
+ """
+ Computes utility values for linear interpolation at points v.
+ The points are given as normalized offsets in the source interval
+ (v0_src, v0_src + size_src), more precisely:
+ v = v0_src + v_norm * size_src / 256.0
+ The computed utilities include lower points v_lo, upper points v_hi,
+ interpolation weights v_w and flags j_valid indicating whether the
+ points falls into the destination interval (v0_dst, v0_dst + size_dst).
+
+ Args:
+ v_norm (:obj: `torch.Tensor`): tensor of size N containing
+ normalized point offsets
+ v0_src (:obj: `torch.Tensor`): tensor of size N containing
+ left bounds of source intervals for normalized points
+ size_src (:obj: `torch.Tensor`): tensor of size N containing
+ source interval sizes for normalized points
+ v0_dst (:obj: `torch.Tensor`): tensor of size N containing
+ left bounds of destination intervals
+ size_dst (:obj: `torch.Tensor`): tensor of size N containing
+ destination interval sizes
+ size_z (int): interval size for data to be interpolated
+
+ Returns:
+ v_lo (:obj: `torch.Tensor`): int tensor of size N containing
+ indices of lower values used for interpolation, all values are
+ integers from [0, size_z - 1]
+ v_hi (:obj: `torch.Tensor`): int tensor of size N containing
+ indices of upper values used for interpolation, all values are
+ integers from [0, size_z - 1]
+ v_w (:obj: `torch.Tensor`): float tensor of size N containing
+ interpolation weights
+ j_valid (:obj: `torch.Tensor`): uint8 tensor of size N containing
+ 0 for points outside the estimation interval
+ (v0_est, v0_est + size_est) and 1 otherwise
+ """
+ v = v0_src + v_norm * size_src / 256.0
+ j_valid = (v - v0_dst >= 0) * (v - v0_dst < size_dst)
+ v_grid = (v - v0_dst) * size_z / size_dst
+ v_lo = v_grid.floor().long().clamp(min=0, max=size_z - 1)
+ v_hi = (v_lo + 1).clamp(max=size_z - 1)
+ v_grid = torch.min(v_hi.float(), v_grid)
+ v_w = v_grid - v_lo.float()
+ return v_lo, v_hi, v_w, j_valid
+
+
+class BilinearInterpolationHelper:
+ """
+ Args:
+ packed_annotations: object that contains packed annotations
+ j_valid (:obj: `torch.Tensor`): uint8 tensor of size M containing
+ 0 for points to be discarded and 1 for points to be selected
+ y_lo (:obj: `torch.Tensor`): int tensor of indices of upper values
+ in z_est for each point
+ y_hi (:obj: `torch.Tensor`): int tensor of indices of lower values
+ in z_est for each point
+ x_lo (:obj: `torch.Tensor`): int tensor of indices of left values
+ in z_est for each point
+ x_hi (:obj: `torch.Tensor`): int tensor of indices of right values
+ in z_est for each point
+ w_ylo_xlo (:obj: `torch.Tensor`): float tensor of size M;
+ contains upper-left value weight for each point
+ w_ylo_xhi (:obj: `torch.Tensor`): float tensor of size M;
+ contains upper-right value weight for each point
+ w_yhi_xlo (:obj: `torch.Tensor`): float tensor of size M;
+ contains lower-left value weight for each point
+ w_yhi_xhi (:obj: `torch.Tensor`): float tensor of size M;
+ contains lower-right value weight for each point
+ """
+
+ def __init__(
+ self,
+ packed_annotations: Any,
+ j_valid: torch.Tensor,
+ y_lo: torch.Tensor,
+ y_hi: torch.Tensor,
+ x_lo: torch.Tensor,
+ x_hi: torch.Tensor,
+ w_ylo_xlo: torch.Tensor,
+ w_ylo_xhi: torch.Tensor,
+ w_yhi_xlo: torch.Tensor,
+ w_yhi_xhi: torch.Tensor,
+ ):
+ for k, v in locals().items():
+ if k != "self":
+ setattr(self, k, v)
+
+ @staticmethod
+ def from_matches(
+ packed_annotations: Any, densepose_outputs_size_hw: Tuple[int, int]
+ ) -> "BilinearInterpolationHelper":
+ """
+ Args:
+ packed_annotations: annotations packed into tensors, the following
+ attributes are required:
+ - bbox_xywh_gt
+ - bbox_xywh_est
+ - x_gt
+ - y_gt
+ - point_bbox_with_dp_indices
+ - point_bbox_indices
+ densepose_outputs_size_hw (tuple [int, int]): resolution of
+ DensePose predictor outputs (H, W)
+ Return:
+ An instance of `BilinearInterpolationHelper` used to perform
+ interpolation for the given annotation points and output resolution
+ """
+
+ zh, zw = densepose_outputs_size_hw
+ x0_gt, y0_gt, w_gt, h_gt = packed_annotations.bbox_xywh_gt[
+ packed_annotations.point_bbox_with_dp_indices
+ ].unbind(dim=1)
+ x0_est, y0_est, w_est, h_est = packed_annotations.bbox_xywh_est[
+ packed_annotations.point_bbox_with_dp_indices
+ ].unbind(dim=1)
+ x_lo, x_hi, x_w, jx_valid = _linear_interpolation_utilities(
+ packed_annotations.x_gt, x0_gt, w_gt, x0_est, w_est, zw
+ )
+ y_lo, y_hi, y_w, jy_valid = _linear_interpolation_utilities(
+ packed_annotations.y_gt, y0_gt, h_gt, y0_est, h_est, zh
+ )
+ j_valid = jx_valid * jy_valid
+
+ w_ylo_xlo = (1.0 - x_w) * (1.0 - y_w)
+ w_ylo_xhi = x_w * (1.0 - y_w)
+ w_yhi_xlo = (1.0 - x_w) * y_w
+ w_yhi_xhi = x_w * y_w
+
+ return BilinearInterpolationHelper(
+ packed_annotations,
+ j_valid,
+ y_lo,
+ y_hi,
+ x_lo,
+ x_hi,
+ w_ylo_xlo, # pyre-ignore[6]
+ w_ylo_xhi,
+ # pyre-fixme[6]: Expected `Tensor` for 9th param but got `float`.
+ w_yhi_xlo,
+ w_yhi_xhi,
+ )
+
+ def extract_at_points(
+ self,
+ z_est,
+ slice_fine_segm=None,
+ w_ylo_xlo=None,
+ w_ylo_xhi=None,
+ w_yhi_xlo=None,
+ w_yhi_xhi=None,
+ ):
+ """
+ Extract ground truth values z_gt for valid point indices and estimated
+ values z_est using bilinear interpolation over top-left (y_lo, x_lo),
+ top-right (y_lo, x_hi), bottom-left (y_hi, x_lo) and bottom-right
+ (y_hi, x_hi) values in z_est with corresponding weights:
+ w_ylo_xlo, w_ylo_xhi, w_yhi_xlo and w_yhi_xhi.
+ Use slice_fine_segm to slice dim=1 in z_est
+ """
+ slice_fine_segm = (
+ self.packed_annotations.fine_segm_labels_gt
+ if slice_fine_segm is None
+ else slice_fine_segm
+ )
+ w_ylo_xlo = self.w_ylo_xlo if w_ylo_xlo is None else w_ylo_xlo
+ w_ylo_xhi = self.w_ylo_xhi if w_ylo_xhi is None else w_ylo_xhi
+ w_yhi_xlo = self.w_yhi_xlo if w_yhi_xlo is None else w_yhi_xlo
+ w_yhi_xhi = self.w_yhi_xhi if w_yhi_xhi is None else w_yhi_xhi
+
+ index_bbox = self.packed_annotations.point_bbox_indices
+ z_est_sampled = (
+ z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_lo] * w_ylo_xlo
+ + z_est[index_bbox, slice_fine_segm, self.y_lo, self.x_hi] * w_ylo_xhi
+ + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_lo] * w_yhi_xlo
+ + z_est[index_bbox, slice_fine_segm, self.y_hi, self.x_hi] * w_yhi_xhi
+ )
+ return z_est_sampled
+
+
+def resample_data(
+ z, bbox_xywh_src, bbox_xywh_dst, wout, hout, mode: str = "nearest", padding_mode: str = "zeros"
+):
+ """
+ Args:
+ z (:obj: `torch.Tensor`): tensor of size (N,C,H,W) with data to be
+ resampled
+ bbox_xywh_src (:obj: `torch.Tensor`): tensor of size (N,4) containing
+ source bounding boxes in format XYWH
+ bbox_xywh_dst (:obj: `torch.Tensor`): tensor of size (N,4) containing
+ destination bounding boxes in format XYWH
+ Return:
+ zresampled (:obj: `torch.Tensor`): tensor of size (N, C, Hout, Wout)
+ with resampled values of z, where D is the discretization size
+ """
+ n = bbox_xywh_src.size(0)
+ assert n == bbox_xywh_dst.size(0), (
+ "The number of "
+ "source ROIs for resampling ({}) should be equal to the number "
+ "of destination ROIs ({})".format(bbox_xywh_src.size(0), bbox_xywh_dst.size(0))
+ )
+ x0src, y0src, wsrc, hsrc = bbox_xywh_src.unbind(dim=1)
+ x0dst, y0dst, wdst, hdst = bbox_xywh_dst.unbind(dim=1)
+ x0dst_norm = 2 * (x0dst - x0src) / wsrc - 1
+ y0dst_norm = 2 * (y0dst - y0src) / hsrc - 1
+ x1dst_norm = 2 * (x0dst + wdst - x0src) / wsrc - 1
+ y1dst_norm = 2 * (y0dst + hdst - y0src) / hsrc - 1
+ grid_w = torch.arange(wout, device=z.device, dtype=torch.float) / wout
+ grid_h = torch.arange(hout, device=z.device, dtype=torch.float) / hout
+ grid_w_expanded = grid_w[None, None, :].expand(n, hout, wout)
+ grid_h_expanded = grid_h[None, :, None].expand(n, hout, wout)
+ dx_expanded = (x1dst_norm - x0dst_norm)[:, None, None].expand(n, hout, wout)
+ dy_expanded = (y1dst_norm - y0dst_norm)[:, None, None].expand(n, hout, wout)
+ x0_expanded = x0dst_norm[:, None, None].expand(n, hout, wout)
+ y0_expanded = y0dst_norm[:, None, None].expand(n, hout, wout)
+ grid_x = grid_w_expanded * dx_expanded + x0_expanded
+ grid_y = grid_h_expanded * dy_expanded + y0_expanded
+ grid = torch.stack((grid_x, grid_y), dim=3)
+ # resample Z from (N, C, H, W) into (N, C, Hout, Wout)
+ zresampled = F.grid_sample(z, grid, mode=mode, padding_mode=padding_mode, align_corners=True)
+ return zresampled
+
+
+class AnnotationsAccumulator(ABC):
+ """
+ Abstract class for an accumulator for annotations that can produce
+ dense annotations packed into tensors.
+ """
+
+ @abstractmethod
+ def accumulate(self, instances_one_image: Instances):
+ """
+ Accumulate instances data for one image
+
+ Args:
+ instances_one_image (Instances): instances data to accumulate
+ """
+ pass
+
+ @abstractmethod
+ def pack(self) -> Any:
+ """
+ Pack data into tensors
+ """
+ pass
+
+
+@dataclass
+class PackedChartBasedAnnotations:
+ """
+ Packed annotations for chart-based model training. The following attributes
+ are defined:
+ - fine_segm_labels_gt (tensor [K] of `int64`): GT fine segmentation point labels
+ - x_gt (tensor [K] of `float32`): GT normalized X point coordinates
+ - y_gt (tensor [K] of `float32`): GT normalized Y point coordinates
+ - u_gt (tensor [K] of `float32`): GT point U values
+ - v_gt (tensor [K] of `float32`): GT point V values
+ - coarse_segm_gt (tensor [N, S, S] of `float32`): GT segmentation for bounding boxes
+ - bbox_xywh_gt (tensor [N, 4] of `float32`): selected GT bounding boxes in
+ XYWH format
+ - bbox_xywh_est (tensor [N, 4] of `float32`): selected matching estimated
+ bounding boxes in XYWH format
+ - point_bbox_with_dp_indices (tensor [K] of `int64`): indices of bounding boxes
+ with DensePose annotations that correspond to the point data
+ - point_bbox_indices (tensor [K] of `int64`): indices of bounding boxes
+ (not necessarily the selected ones with DensePose data) that correspond
+ to the point data
+ - bbox_indices (tensor [N] of `int64`): global indices of selected bounding
+ boxes with DensePose annotations; these indices could be used to access
+ features that are computed for all bounding boxes, not only the ones with
+ DensePose annotations.
+ Here K is the total number of points and N is the total number of instances
+ with DensePose annotations.
+ """
+
+ fine_segm_labels_gt: torch.Tensor
+ x_gt: torch.Tensor
+ y_gt: torch.Tensor
+ u_gt: torch.Tensor
+ v_gt: torch.Tensor
+ coarse_segm_gt: Optional[torch.Tensor]
+ bbox_xywh_gt: torch.Tensor
+ bbox_xywh_est: torch.Tensor
+ point_bbox_with_dp_indices: torch.Tensor
+ point_bbox_indices: torch.Tensor
+ bbox_indices: torch.Tensor
+
+
+class ChartBasedAnnotationsAccumulator(AnnotationsAccumulator):
+ """
+ Accumulates annotations by batches that correspond to objects detected on
+ individual images. Can pack them together into single tensors.
+ """
+
+ def __init__(self):
+ self.i_gt = []
+ self.x_gt = []
+ self.y_gt = []
+ self.u_gt = []
+ self.v_gt = []
+ self.s_gt = []
+ self.bbox_xywh_gt = []
+ self.bbox_xywh_est = []
+ self.point_bbox_with_dp_indices = []
+ self.point_bbox_indices = []
+ self.bbox_indices = []
+ self.nxt_bbox_with_dp_index = 0
+ self.nxt_bbox_index = 0
+
+ def accumulate(self, instances_one_image: Instances):
+ """
+ Accumulate instances data for one image
+
+ Args:
+ instances_one_image (Instances): instances data to accumulate
+ """
+ boxes_xywh_est = BoxMode.convert(
+ instances_one_image.proposal_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+ )
+ boxes_xywh_gt = BoxMode.convert(
+ instances_one_image.gt_boxes.tensor.clone(), BoxMode.XYXY_ABS, BoxMode.XYWH_ABS
+ )
+ n_matches = len(boxes_xywh_gt)
+ assert n_matches == len(
+ boxes_xywh_est
+ ), f"Got {len(boxes_xywh_est)} proposal boxes and {len(boxes_xywh_gt)} GT boxes"
+ if not n_matches:
+ # no detection - GT matches
+ return
+ if (
+ not hasattr(instances_one_image, "gt_densepose")
+ or instances_one_image.gt_densepose is None
+ ):
+ # no densepose GT for the detections, just increase the bbox index
+ self.nxt_bbox_index += n_matches
+ return
+ for box_xywh_est, box_xywh_gt, dp_gt in zip(
+ boxes_xywh_est, boxes_xywh_gt, instances_one_image.gt_densepose
+ ):
+ if (dp_gt is not None) and (len(dp_gt.x) > 0):
+ # pyre-fixme[6]: For 1st argument expected `Tensor` but got `float`.
+ # pyre-fixme[6]: For 2nd argument expected `Tensor` but got `float`.
+ self._do_accumulate(box_xywh_gt, box_xywh_est, dp_gt)
+ self.nxt_bbox_index += 1
+
+ def _do_accumulate(
+ self, box_xywh_gt: torch.Tensor, box_xywh_est: torch.Tensor, dp_gt: DensePoseDataRelative
+ ):
+ """
+ Accumulate instances data for one image, given that the data is not empty
+
+ Args:
+ box_xywh_gt (tensor): GT bounding box
+ box_xywh_est (tensor): estimated bounding box
+ dp_gt (DensePoseDataRelative): GT densepose data
+ """
+ self.i_gt.append(dp_gt.i)
+ self.x_gt.append(dp_gt.x)
+ self.y_gt.append(dp_gt.y)
+ self.u_gt.append(dp_gt.u)
+ self.v_gt.append(dp_gt.v)
+ if hasattr(dp_gt, "segm"):
+ self.s_gt.append(dp_gt.segm.unsqueeze(0))
+ self.bbox_xywh_gt.append(box_xywh_gt.view(-1, 4))
+ self.bbox_xywh_est.append(box_xywh_est.view(-1, 4))
+ self.point_bbox_with_dp_indices.append(
+ torch.full_like(dp_gt.i, self.nxt_bbox_with_dp_index)
+ )
+ self.point_bbox_indices.append(torch.full_like(dp_gt.i, self.nxt_bbox_index))
+ self.bbox_indices.append(self.nxt_bbox_index)
+ self.nxt_bbox_with_dp_index += 1
+
+ def pack(self) -> Optional[PackedChartBasedAnnotations]:
+ """
+ Pack data into tensors
+ """
+ if not len(self.i_gt):
+ # TODO:
+ # returning proper empty annotations would require
+ # creating empty tensors of appropriate shape and
+ # type on an appropriate device;
+ # we return None so far to indicate empty annotations
+ return None
+ return PackedChartBasedAnnotations(
+ fine_segm_labels_gt=torch.cat(self.i_gt, 0).long(),
+ x_gt=torch.cat(self.x_gt, 0),
+ y_gt=torch.cat(self.y_gt, 0),
+ u_gt=torch.cat(self.u_gt, 0),
+ v_gt=torch.cat(self.v_gt, 0),
+ # ignore segmentation annotations, if not all the instances contain those
+ coarse_segm_gt=(
+ torch.cat(self.s_gt, 0) if len(self.s_gt) == len(self.bbox_xywh_gt) else None
+ ),
+ bbox_xywh_gt=torch.cat(self.bbox_xywh_gt, 0),
+ bbox_xywh_est=torch.cat(self.bbox_xywh_est, 0),
+ point_bbox_with_dp_indices=torch.cat(self.point_bbox_with_dp_indices, 0).long(),
+ point_bbox_indices=torch.cat(self.point_bbox_indices, 0).long(),
+ bbox_indices=torch.as_tensor(
+ self.bbox_indices, dtype=torch.long, device=self.x_gt[0].device
+ ).long(),
+ )
+
+
+def extract_packed_annotations_from_matches(
+ proposals_with_targets: List[Instances], accumulator: AnnotationsAccumulator
+) -> Any:
+ for proposals_targets_per_image in proposals_with_targets:
+ accumulator.accumulate(proposals_targets_per_image)
+ return accumulator.pack()
+
+
+def sample_random_indices(
+ n_indices: int, n_samples: int, device: Optional[torch.device] = None
+) -> Optional[torch.Tensor]:
+ """
+ Samples `n_samples` random indices from range `[0..n_indices - 1]`.
+ If `n_indices` is smaller than `n_samples`, returns `None` meaning that all indices
+ are selected.
+ Args:
+ n_indices (int): total number of indices
+ n_samples (int): number of indices to sample
+ device (torch.device): the desired device of returned tensor
+ Return:
+ Tensor of selected vertex indices, or `None`, if all vertices are selected
+ """
+ if (n_samples <= 0) or (n_indices <= n_samples):
+ return None
+ indices = torch.randperm(n_indices, device=device)[:n_samples]
+ return indices
diff --git a/densepose/modeling/predictors/__init__.py b/densepose/modeling/predictors/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c749ea264690d0b4c85abc520e7476bc4365175d
--- /dev/null
+++ b/densepose/modeling/predictors/__init__.py
@@ -0,0 +1,11 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .chart import DensePoseChartPredictor
+from .chart_confidence import DensePoseChartConfidencePredictorMixin
+from .chart_with_confidence import DensePoseChartWithConfidencePredictor
+from .cse import DensePoseEmbeddingPredictor
+from .cse_confidence import DensePoseEmbeddingConfidencePredictorMixin
+from .cse_with_confidence import DensePoseEmbeddingWithConfidencePredictor
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
diff --git a/densepose/modeling/predictors/__pycache__/__init__.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..49bf3176cb5463f326249cb7bef5a624c878b6f6
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/__pycache__/chart.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/chart.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9cf846e5b9f98daafadd44693d1b66516d11e03
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/chart.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/__pycache__/chart_confidence.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/chart_confidence.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09bafc804749b612f6473339a0689f69ffe605ea
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/chart_confidence.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/__pycache__/chart_with_confidence.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/chart_with_confidence.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0405d8e1e531989ce76f7210f8edf191f3eba2f
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/chart_with_confidence.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/__pycache__/cse.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/cse.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..276180023c4b103763674faeb0575f1b2fb7e010
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/cse.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/__pycache__/cse_confidence.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/cse_confidence.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ace37119c22ef8933bd8c42fd62ce5442a9af1e7
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/cse_confidence.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/__pycache__/cse_with_confidence.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/cse_with_confidence.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fa6f8a1d7721b2502fb97fecfec63e3d1d4b4e8b
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/cse_with_confidence.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/__pycache__/registry.cpython-39.pyc b/densepose/modeling/predictors/__pycache__/registry.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b40ea844c3f65ff969c2162fe021d2d9b680fab
Binary files /dev/null and b/densepose/modeling/predictors/__pycache__/registry.cpython-39.pyc differ
diff --git a/densepose/modeling/predictors/chart.py b/densepose/modeling/predictors/chart.py
new file mode 100644
index 0000000000000000000000000000000000000000..67fc401d70fe5e7d7baec3530d435955d4a23f7c
--- /dev/null
+++ b/densepose/modeling/predictors/chart.py
@@ -0,0 +1,96 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import torch
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d, interpolate
+
+from ...structures import DensePoseChartPredictorOutput
+from ..utils import initialize_module_params
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+
+
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseChartPredictor(nn.Module):
+ """
+ Predictor (last layers of a DensePose model) that takes DensePose head outputs as an input
+ and produces 4 tensors which represent DensePose results for predefined body parts
+ (patches / charts):
+ * coarse segmentation, a tensor of shape [N, K, Hout, Wout]
+ * fine segmentation, a tensor of shape [N, C, Hout, Wout]
+ * U coordinates, a tensor of shape [N, C, Hout, Wout]
+ * V coordinates, a tensor of shape [N, C, Hout, Wout]
+ where
+ - N is the number of instances
+ - K is the number of coarse segmentation channels (
+ 2 = foreground / background,
+ 15 = one of 14 body parts / background)
+ - C is the number of fine segmentation channels (
+ 24 fine body parts / background)
+ - Hout and Wout are height and width of predictions
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize predictor using configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): input tensor size along the channel dimension
+ """
+ super().__init__()
+ dim_in = input_channels
+ n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+ # coarse segmentation
+ self.ann_index_lowres = ConvTranspose2d(
+ dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ # fine segmentation
+ self.index_uv_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ # U
+ self.u_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ # V
+ self.v_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
+ initialize_module_params(self)
+
+ def interp2d(self, tensor_nchw: torch.Tensor):
+ """
+ Bilinear interpolation method to be used for upscaling
+
+ Args:
+ tensor_nchw (tensor): tensor of shape (N, C, H, W)
+ Return:
+ tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
+ by applying the scale factor to H and W
+ """
+ return interpolate(
+ tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+ )
+
+ def forward(self, head_outputs: torch.Tensor):
+ """
+ Perform forward step on DensePose head outputs
+
+ Args:
+ head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
+ Return:
+ An instance of DensePoseChartPredictorOutput
+ """
+ return DensePoseChartPredictorOutput(
+ coarse_segm=self.interp2d(self.ann_index_lowres(head_outputs)),
+ fine_segm=self.interp2d(self.index_uv_lowres(head_outputs)),
+ u=self.interp2d(self.u_lowres(head_outputs)),
+ v=self.interp2d(self.v_lowres(head_outputs)),
+ )
diff --git a/densepose/modeling/predictors/chart_confidence.py b/densepose/modeling/predictors/chart_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2220efa3a8c48e8f86bb4d1d11b3643c3cd6157
--- /dev/null
+++ b/densepose/modeling/predictors/chart_confidence.py
@@ -0,0 +1,176 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d
+
+from ...structures import decorate_predictor_output_class_with_confidences
+from ..confidence import DensePoseConfidenceModelConfig, DensePoseUVConfidenceType
+from ..utils import initialize_module_params
+
+
+class DensePoseChartConfidencePredictorMixin:
+ """
+ Predictor contains the last layers of a DensePose model that take DensePose head
+ outputs as an input and produce model outputs. Confidence predictor mixin is used
+ to generate confidences for segmentation and UV tensors estimated by some
+ base predictor. Several assumptions need to hold for the base predictor:
+ 1) the `forward` method must return SIUV tuple as the first result (
+ S = coarse segmentation, I = fine segmentation, U and V are intrinsic
+ chart coordinates)
+ 2) `interp2d` method must be defined to perform bilinear interpolation;
+ the same method is typically used for SIUV and confidences
+ Confidence predictor mixin provides confidence estimates, as described in:
+ N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
+ from Noisy Labels, NeurIPS 2019
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize confidence predictor using configuration options.
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): number of input channels
+ """
+ # we rely on base predictor to call nn.Module.__init__
+ super().__init__(cfg, input_channels) # pyre-ignore[19]
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+ self._initialize_confidence_estimation_layers(cfg, input_channels)
+ self._registry = {}
+ initialize_module_params(self) # pyre-ignore[6]
+
+ def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
+ """
+ Initialize confidence estimation layers based on configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ dim_in (int): number of input channels
+ """
+ dim_out_patches = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_PATCHES + 1
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+ self.sigma_2_lowres = ConvTranspose2d( # pyre-ignore[16]
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ elif (
+ self.confidence_model_cfg.uv_confidence.type
+ == DensePoseUVConfidenceType.INDEP_ANISO
+ ):
+ self.sigma_2_lowres = ConvTranspose2d(
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.kappa_u_lowres = ConvTranspose2d( # pyre-ignore[16]
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.kappa_v_lowres = ConvTranspose2d( # pyre-ignore[16]
+ dim_in, dim_out_patches, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ else:
+ raise ValueError(
+ f"Unknown confidence model type: "
+ f"{self.confidence_model_cfg.confidence_model_type}"
+ )
+ if self.confidence_model_cfg.segm_confidence.enabled:
+ self.fine_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+
+ def forward(self, head_outputs: torch.Tensor):
+ """
+ Perform forward operation on head outputs used as inputs for the predictor.
+ Calls forward method from the base predictor and uses its outputs to compute
+ confidences.
+
+ Args:
+ head_outputs (Tensor): head outputs used as predictor inputs
+ Return:
+ An instance of outputs with confidences,
+ see `decorate_predictor_output_class_with_confidences`
+ """
+ # assuming base class returns SIUV estimates in its first result
+ base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
+
+ # create output instance by extending base predictor outputs:
+ output = self._create_output_instance(base_predictor_outputs)
+
+ if self.confidence_model_cfg.uv_confidence.enabled:
+ if self.confidence_model_cfg.uv_confidence.type == DensePoseUVConfidenceType.IID_ISO:
+ # assuming base class defines interp2d method for bilinear interpolation
+ output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs)) # pyre-ignore[16]
+ elif (
+ self.confidence_model_cfg.uv_confidence.type
+ == DensePoseUVConfidenceType.INDEP_ANISO
+ ):
+ # assuming base class defines interp2d method for bilinear interpolation
+ output.sigma_2 = self.interp2d(self.sigma_2_lowres(head_outputs))
+ output.kappa_u = self.interp2d(self.kappa_u_lowres(head_outputs)) # pyre-ignore[16]
+ output.kappa_v = self.interp2d(self.kappa_v_lowres(head_outputs)) # pyre-ignore[16]
+ else:
+ raise ValueError(
+ f"Unknown confidence model type: "
+ f"{self.confidence_model_cfg.confidence_model_type}"
+ )
+ if self.confidence_model_cfg.segm_confidence.enabled:
+ # base predictor outputs are assumed to have `fine_segm` and `coarse_segm` attributes
+ # base predictor is assumed to define `interp2d` method for bilinear interpolation
+ output.fine_segm_confidence = (
+ F.softplus(
+ self.interp2d(self.fine_segm_confidence_lowres(head_outputs)) # pyre-ignore[16]
+ )
+ + self.confidence_model_cfg.segm_confidence.epsilon
+ )
+ output.fine_segm = base_predictor_outputs.fine_segm * torch.repeat_interleave(
+ output.fine_segm_confidence, base_predictor_outputs.fine_segm.shape[1], dim=1
+ )
+ output.coarse_segm_confidence = (
+ F.softplus(
+ self.interp2d(
+ self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
+ )
+ )
+ + self.confidence_model_cfg.segm_confidence.epsilon
+ )
+ output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
+ output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
+ )
+
+ return output
+
+ def _create_output_instance(self, base_predictor_outputs: Any):
+ """
+ Create an instance of predictor outputs by copying the outputs from the
+ base predictor and initializing confidence
+
+ Args:
+ base_predictor_outputs: an instance of base predictor outputs
+ (the outputs type is assumed to be a dataclass)
+ Return:
+ An instance of outputs with confidences
+ """
+ PredictorOutput = decorate_predictor_output_class_with_confidences(
+ type(base_predictor_outputs) # pyre-ignore[6]
+ )
+ # base_predictor_outputs is assumed to be a dataclass
+ # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
+ output = PredictorOutput(
+ **base_predictor_outputs.__dict__,
+ coarse_segm_confidence=None,
+ fine_segm_confidence=None,
+ sigma_1=None,
+ sigma_2=None,
+ kappa_u=None,
+ kappa_v=None,
+ )
+ return output
diff --git a/densepose/modeling/predictors/chart_with_confidence.py b/densepose/modeling/predictors/chart_with_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..902032c77c65408e0268077f776bd957e80091a1
--- /dev/null
+++ b/densepose/modeling/predictors/chart_with_confidence.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from . import DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+
+
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseChartWithConfidencePredictor(
+ DensePoseChartConfidencePredictorMixin, DensePoseChartPredictor
+):
+ """
+ Predictor that combines chart and chart confidence estimation
+ """
+
+ pass
diff --git a/densepose/modeling/predictors/cse.py b/densepose/modeling/predictors/cse.py
new file mode 100644
index 0000000000000000000000000000000000000000..8494b7975bab1f64e704c4d7c6bdcca4a43ba817
--- /dev/null
+++ b/densepose/modeling/predictors/cse.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import torch
+from torch import nn
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d, interpolate
+
+from ...structures import DensePoseEmbeddingPredictorOutput
+from ..utils import initialize_module_params
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+
+
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseEmbeddingPredictor(nn.Module):
+ """
+ Last layers of a DensePose model that take DensePose head outputs as an input
+ and produce model outputs for continuous surface embeddings (CSE).
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize predictor using configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): input tensor size along the channel dimension
+ """
+ super().__init__()
+ dim_in = input_channels
+ n_segm_chan = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+ embed_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+ # coarse segmentation
+ self.coarse_segm_lowres = ConvTranspose2d(
+ dim_in, n_segm_chan, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ # embedding
+ self.embed_lowres = ConvTranspose2d(
+ dim_in, embed_size, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+ self.scale_factor = cfg.MODEL.ROI_DENSEPOSE_HEAD.UP_SCALE
+ initialize_module_params(self)
+
+ def interp2d(self, tensor_nchw: torch.Tensor):
+ """
+ Bilinear interpolation method to be used for upscaling
+
+ Args:
+ tensor_nchw (tensor): tensor of shape (N, C, H, W)
+ Return:
+ tensor of shape (N, C, Hout, Wout), where Hout and Wout are computed
+ by applying the scale factor to H and W
+ """
+ return interpolate(
+ tensor_nchw, scale_factor=self.scale_factor, mode="bilinear", align_corners=False
+ )
+
+ def forward(self, head_outputs):
+ """
+ Perform forward step on DensePose head outputs
+
+ Args:
+ head_outputs (tensor): DensePose head outputs, tensor of shape [N, D, H, W]
+ """
+ embed_lowres = self.embed_lowres(head_outputs)
+ coarse_segm_lowres = self.coarse_segm_lowres(head_outputs)
+ embed = self.interp2d(embed_lowres)
+ coarse_segm = self.interp2d(coarse_segm_lowres)
+ return DensePoseEmbeddingPredictorOutput(embedding=embed, coarse_segm=coarse_segm)
diff --git a/densepose/modeling/predictors/cse_confidence.py b/densepose/modeling/predictors/cse_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d19b354fa14eb5f79e584c090f2bc0cb4d28c5f
--- /dev/null
+++ b/densepose/modeling/predictors/cse_confidence.py
@@ -0,0 +1,117 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from typing import Any
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import ConvTranspose2d
+
+from densepose.modeling.confidence import DensePoseConfidenceModelConfig
+from densepose.modeling.utils import initialize_module_params
+from densepose.structures import decorate_cse_predictor_output_class_with_confidences
+
+
+class DensePoseEmbeddingConfidencePredictorMixin:
+ """
+ Predictor contains the last layers of a DensePose model that take DensePose head
+ outputs as an input and produce model outputs. Confidence predictor mixin is used
+ to generate confidences for coarse segmentation estimated by some
+ base predictor. Several assumptions need to hold for the base predictor:
+ 1) the `forward` method must return CSE DensePose head outputs,
+ tensor of shape [N, D, H, W]
+ 2) `interp2d` method must be defined to perform bilinear interpolation;
+ the same method is typically used for masks and confidences
+ Confidence predictor mixin provides confidence estimates, as described in:
+ N. Neverova et al., Correlated Uncertainty for Learning Dense Correspondences
+ from Noisy Labels, NeurIPS 2019
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize confidence predictor using configuration options.
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): number of input channels
+ """
+ # we rely on base predictor to call nn.Module.__init__
+ super().__init__(cfg, input_channels) # pyre-ignore[19]
+ self.confidence_model_cfg = DensePoseConfidenceModelConfig.from_cfg(cfg)
+ self._initialize_confidence_estimation_layers(cfg, input_channels)
+ self._registry = {}
+ initialize_module_params(self) # pyre-ignore[6]
+
+ def _initialize_confidence_estimation_layers(self, cfg: CfgNode, dim_in: int):
+ """
+ Initialize confidence estimation layers based on configuration options
+
+ Args:
+ cfg (CfgNode): configuration options
+ dim_in (int): number of input channels
+ """
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECONV_KERNEL
+ if self.confidence_model_cfg.segm_confidence.enabled:
+ self.coarse_segm_confidence_lowres = ConvTranspose2d( # pyre-ignore[16]
+ dim_in, 1, kernel_size, stride=2, padding=int(kernel_size / 2 - 1)
+ )
+
+ def forward(self, head_outputs: torch.Tensor):
+ """
+ Perform forward operation on head outputs used as inputs for the predictor.
+ Calls forward method from the base predictor and uses its outputs to compute
+ confidences.
+
+ Args:
+ head_outputs (Tensor): head outputs used as predictor inputs
+ Return:
+ An instance of outputs with confidences,
+ see `decorate_cse_predictor_output_class_with_confidences`
+ """
+ # assuming base class returns SIUV estimates in its first result
+ base_predictor_outputs = super().forward(head_outputs) # pyre-ignore[16]
+
+ # create output instance by extending base predictor outputs:
+ output = self._create_output_instance(base_predictor_outputs)
+
+ if self.confidence_model_cfg.segm_confidence.enabled:
+ # base predictor outputs are assumed to have `coarse_segm` attribute
+ # base predictor is assumed to define `interp2d` method for bilinear interpolation
+ output.coarse_segm_confidence = (
+ F.softplus(
+ self.interp2d( # pyre-ignore[16]
+ self.coarse_segm_confidence_lowres(head_outputs) # pyre-ignore[16]
+ )
+ )
+ + self.confidence_model_cfg.segm_confidence.epsilon
+ )
+ output.coarse_segm = base_predictor_outputs.coarse_segm * torch.repeat_interleave(
+ output.coarse_segm_confidence, base_predictor_outputs.coarse_segm.shape[1], dim=1
+ )
+
+ return output
+
+ def _create_output_instance(self, base_predictor_outputs: Any):
+ """
+ Create an instance of predictor outputs by copying the outputs from the
+ base predictor and initializing confidence
+
+ Args:
+ base_predictor_outputs: an instance of base predictor outputs
+ (the outputs type is assumed to be a dataclass)
+ Return:
+ An instance of outputs with confidences
+ """
+ PredictorOutput = decorate_cse_predictor_output_class_with_confidences(
+ type(base_predictor_outputs) # pyre-ignore[6]
+ )
+ # base_predictor_outputs is assumed to be a dataclass
+ # reassign all the fields from base_predictor_outputs (no deep copy!), add new fields
+ output = PredictorOutput(
+ **base_predictor_outputs.__dict__,
+ coarse_segm_confidence=None,
+ )
+ return output
diff --git a/densepose/modeling/predictors/cse_with_confidence.py b/densepose/modeling/predictors/cse_with_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..02389dbcbe734c89e6eb86757d877c9657fd12b1
--- /dev/null
+++ b/densepose/modeling/predictors/cse_with_confidence.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from . import DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
+from .registry import DENSEPOSE_PREDICTOR_REGISTRY
+
+
+@DENSEPOSE_PREDICTOR_REGISTRY.register()
+class DensePoseEmbeddingWithConfidencePredictor(
+ DensePoseEmbeddingConfidencePredictorMixin, DensePoseEmbeddingPredictor
+):
+ """
+ Predictor that combines CSE and CSE confidence estimation
+ """
+
+ pass
diff --git a/densepose/modeling/predictors/registry.py b/densepose/modeling/predictors/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..c883ba3538e8d8e5b11c68811fdf1990a2964a71
--- /dev/null
+++ b/densepose/modeling/predictors/registry.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from detectron2.utils.registry import Registry
+
+DENSEPOSE_PREDICTOR_REGISTRY = Registry("DENSEPOSE_PREDICTOR")
diff --git a/densepose/modeling/roi_heads/__init__.py b/densepose/modeling/roi_heads/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a055a65454517876107c621ba53e4742fa5eb54
--- /dev/null
+++ b/densepose/modeling/roi_heads/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .v1convx import DensePoseV1ConvXHead
+from .deeplab import DensePoseDeepLabHead
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+from .roi_head import Decoder, DensePoseROIHeads
diff --git a/densepose/modeling/roi_heads/__pycache__/__init__.cpython-39.pyc b/densepose/modeling/roi_heads/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..987d23c7c539e2deb35c7c0c0dc2116151c5856e
Binary files /dev/null and b/densepose/modeling/roi_heads/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/modeling/roi_heads/__pycache__/deeplab.cpython-39.pyc b/densepose/modeling/roi_heads/__pycache__/deeplab.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b5197471220a5e71b609f73fe171249e91e34201
Binary files /dev/null and b/densepose/modeling/roi_heads/__pycache__/deeplab.cpython-39.pyc differ
diff --git a/densepose/modeling/roi_heads/__pycache__/registry.cpython-39.pyc b/densepose/modeling/roi_heads/__pycache__/registry.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dc43717ab47c4470f2044a68ade8f1aa4ad265b0
Binary files /dev/null and b/densepose/modeling/roi_heads/__pycache__/registry.cpython-39.pyc differ
diff --git a/densepose/modeling/roi_heads/__pycache__/roi_head.cpython-39.pyc b/densepose/modeling/roi_heads/__pycache__/roi_head.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7c9d77481b049d645ec7e4d5998a83a3557a72a
Binary files /dev/null and b/densepose/modeling/roi_heads/__pycache__/roi_head.cpython-39.pyc differ
diff --git a/densepose/modeling/roi_heads/__pycache__/v1convx.cpython-39.pyc b/densepose/modeling/roi_heads/__pycache__/v1convx.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..264b30c248d452b1c141da59c32b3b4787428a71
Binary files /dev/null and b/densepose/modeling/roi_heads/__pycache__/v1convx.cpython-39.pyc differ
diff --git a/densepose/modeling/roi_heads/deeplab.py b/densepose/modeling/roi_heads/deeplab.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f42d20681a34b319c15967548839ffffa77c89a
--- /dev/null
+++ b/densepose/modeling/roi_heads/deeplab.py
@@ -0,0 +1,265 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import fvcore.nn.weight_init as weight_init
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseDeepLabHead(nn.Module):
+ """
+ DensePose head using DeepLabV3 model from
+ "Rethinking Atrous Convolution for Semantic Image Segmentation"
+ .
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ super(DensePoseDeepLabHead, self).__init__()
+ # fmt: off
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NORM
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+ self.use_nonlocal = cfg.MODEL.ROI_DENSEPOSE_HEAD.DEEPLAB.NONLOCAL_ON
+ # fmt: on
+ pad_size = kernel_size // 2
+ n_channels = input_channels
+
+ self.ASPP = ASPP(input_channels, [6, 12, 56], n_channels) # 6, 12, 56
+ self.add_module("ASPP", self.ASPP)
+
+ if self.use_nonlocal:
+ self.NLBlock = NONLocalBlock2D(input_channels, bn_layer=True)
+ self.add_module("NLBlock", self.NLBlock)
+ # weight_init.c2_msra_fill(self.ASPP)
+
+ for i in range(self.n_stacked_convs):
+ norm_module = nn.GroupNorm(32, hidden_dim) if norm == "GN" else None
+ layer = Conv2d(
+ n_channels,
+ hidden_dim,
+ kernel_size,
+ stride=1,
+ padding=pad_size,
+ bias=not norm,
+ norm=norm_module,
+ )
+ weight_init.c2_msra_fill(layer)
+ n_channels = hidden_dim
+ layer_name = self._get_layer_name(i)
+ self.add_module(layer_name, layer)
+ self.n_out_channels = hidden_dim
+ # initialize_module_params(self)
+
+ def forward(self, features):
+ x0 = features
+ x = self.ASPP(x0)
+ if self.use_nonlocal:
+ x = self.NLBlock(x)
+ output = x
+ for i in range(self.n_stacked_convs):
+ layer_name = self._get_layer_name(i)
+ x = getattr(self, layer_name)(x)
+ x = F.relu(x)
+ output = x
+ return output
+
+ def _get_layer_name(self, i: int):
+ layer_name = "body_conv_fcn{}".format(i + 1)
+ return layer_name
+
+
+# Copied from
+# https://github.com/pytorch/vision/blob/master/torchvision/models/segmentation/deeplabv3.py
+# See https://arxiv.org/pdf/1706.05587.pdf for details
+class ASPPConv(nn.Sequential):
+ def __init__(self, in_channels, out_channels, dilation):
+ modules = [
+ nn.Conv2d(
+ in_channels, out_channels, 3, padding=dilation, dilation=dilation, bias=False
+ ),
+ nn.GroupNorm(32, out_channels),
+ nn.ReLU(),
+ ]
+ super(ASPPConv, self).__init__(*modules)
+
+
+class ASPPPooling(nn.Sequential):
+ def __init__(self, in_channels, out_channels):
+ super(ASPPPooling, self).__init__(
+ nn.AdaptiveAvgPool2d(1),
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
+ nn.GroupNorm(32, out_channels),
+ nn.ReLU(),
+ )
+
+ def forward(self, x):
+ size = x.shape[-2:]
+ x = super(ASPPPooling, self).forward(x)
+ return F.interpolate(x, size=size, mode="bilinear", align_corners=False)
+
+
+class ASPP(nn.Module):
+ def __init__(self, in_channels, atrous_rates, out_channels):
+ super(ASPP, self).__init__()
+ modules = []
+ modules.append(
+ nn.Sequential(
+ nn.Conv2d(in_channels, out_channels, 1, bias=False),
+ nn.GroupNorm(32, out_channels),
+ nn.ReLU(),
+ )
+ )
+
+ rate1, rate2, rate3 = tuple(atrous_rates)
+ modules.append(ASPPConv(in_channels, out_channels, rate1))
+ modules.append(ASPPConv(in_channels, out_channels, rate2))
+ modules.append(ASPPConv(in_channels, out_channels, rate3))
+ modules.append(ASPPPooling(in_channels, out_channels))
+
+ self.convs = nn.ModuleList(modules)
+
+ self.project = nn.Sequential(
+ nn.Conv2d(5 * out_channels, out_channels, 1, bias=False),
+ # nn.BatchNorm2d(out_channels),
+ nn.ReLU(),
+ # nn.Dropout(0.5)
+ )
+
+ def forward(self, x):
+ res = []
+ for conv in self.convs:
+ res.append(conv(x))
+ res = torch.cat(res, dim=1)
+ return self.project(res)
+
+
+# copied from
+# https://github.com/AlexHex7/Non-local_pytorch/blob/master/lib/non_local_embedded_gaussian.py
+# See https://arxiv.org/abs/1711.07971 for details
+class _NonLocalBlockND(nn.Module):
+ def __init__(
+ self, in_channels, inter_channels=None, dimension=3, sub_sample=True, bn_layer=True
+ ):
+ super(_NonLocalBlockND, self).__init__()
+
+ assert dimension in [1, 2, 3]
+
+ self.dimension = dimension
+ self.sub_sample = sub_sample
+
+ self.in_channels = in_channels
+ self.inter_channels = inter_channels
+
+ if self.inter_channels is None:
+ self.inter_channels = in_channels // 2
+ if self.inter_channels == 0:
+ self.inter_channels = 1
+
+ if dimension == 3:
+ conv_nd = nn.Conv3d
+ max_pool_layer = nn.MaxPool3d(kernel_size=(1, 2, 2))
+ bn = nn.GroupNorm # (32, hidden_dim) #nn.BatchNorm3d
+ elif dimension == 2:
+ conv_nd = nn.Conv2d
+ max_pool_layer = nn.MaxPool2d(kernel_size=(2, 2))
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm2d
+ else:
+ conv_nd = nn.Conv1d
+ max_pool_layer = nn.MaxPool1d(kernel_size=2)
+ bn = nn.GroupNorm # (32, hidden_dim)nn.BatchNorm1d
+
+ self.g = conv_nd(
+ in_channels=self.in_channels,
+ out_channels=self.inter_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+
+ if bn_layer:
+ self.W = nn.Sequential(
+ conv_nd(
+ in_channels=self.inter_channels,
+ out_channels=self.in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ ),
+ bn(32, self.in_channels),
+ )
+ nn.init.constant_(self.W[1].weight, 0)
+ nn.init.constant_(self.W[1].bias, 0)
+ else:
+ self.W = conv_nd(
+ in_channels=self.inter_channels,
+ out_channels=self.in_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+ nn.init.constant_(self.W.weight, 0)
+ nn.init.constant_(self.W.bias, 0)
+
+ self.theta = conv_nd(
+ in_channels=self.in_channels,
+ out_channels=self.inter_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+ self.phi = conv_nd(
+ in_channels=self.in_channels,
+ out_channels=self.inter_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0,
+ )
+
+ if sub_sample:
+ self.g = nn.Sequential(self.g, max_pool_layer)
+ self.phi = nn.Sequential(self.phi, max_pool_layer)
+
+ def forward(self, x):
+ """
+ :param x: (b, c, t, h, w)
+ :return:
+ """
+
+ batch_size = x.size(0)
+
+ g_x = self.g(x).view(batch_size, self.inter_channels, -1)
+ g_x = g_x.permute(0, 2, 1)
+
+ theta_x = self.theta(x).view(batch_size, self.inter_channels, -1)
+ theta_x = theta_x.permute(0, 2, 1)
+ phi_x = self.phi(x).view(batch_size, self.inter_channels, -1)
+ f = torch.matmul(theta_x, phi_x)
+ f_div_C = F.softmax(f, dim=-1)
+
+ y = torch.matmul(f_div_C, g_x)
+ y = y.permute(0, 2, 1).contiguous()
+ y = y.view(batch_size, self.inter_channels, *x.size()[2:])
+ W_y = self.W(y)
+ z = W_y + x
+
+ return z
+
+
+class NONLocalBlock2D(_NonLocalBlockND):
+ def __init__(self, in_channels, inter_channels=None, sub_sample=True, bn_layer=True):
+ super(NONLocalBlock2D, self).__init__(
+ in_channels,
+ inter_channels=inter_channels,
+ dimension=2,
+ sub_sample=sub_sample,
+ bn_layer=bn_layer,
+ )
diff --git a/densepose/modeling/roi_heads/registry.py b/densepose/modeling/roi_heads/registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..89514279ffba6a65fc499e03bc0177ed8039482f
--- /dev/null
+++ b/densepose/modeling/roi_heads/registry.py
@@ -0,0 +1,7 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from detectron2.utils.registry import Registry
+
+ROI_DENSEPOSE_HEAD_REGISTRY = Registry("ROI_DENSEPOSE_HEAD")
diff --git a/densepose/modeling/roi_heads/roi_head.py b/densepose/modeling/roi_heads/roi_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b8f869f84aa59a09286c421123b31c9db436ae6
--- /dev/null
+++ b/densepose/modeling/roi_heads/roi_head.py
@@ -0,0 +1,220 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import numpy as np
+from typing import Dict, List, Optional
+import fvcore.nn.weight_init as weight_init
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.modeling import ROI_HEADS_REGISTRY, StandardROIHeads
+from detectron2.modeling.poolers import ROIPooler
+from detectron2.modeling.roi_heads import select_foreground_proposals
+from detectron2.structures import ImageList, Instances
+
+from .. import (
+ build_densepose_data_filter,
+ build_densepose_embedder,
+ build_densepose_head,
+ build_densepose_losses,
+ build_densepose_predictor,
+ densepose_inference,
+)
+
+
+class Decoder(nn.Module):
+ """
+ A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
+ (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
+ all levels of the FPN into single output.
+ """
+
+ def __init__(self, cfg, input_shape: Dict[str, ShapeSpec], in_features):
+ super(Decoder, self).__init__()
+
+ # fmt: off
+ self.in_features = in_features
+ feature_strides = {k: v.stride for k, v in input_shape.items()}
+ feature_channels = {k: v.channels for k, v in input_shape.items()}
+ num_classes = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NUM_CLASSES
+ conv_dims = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_CONV_DIMS
+ self.common_stride = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_COMMON_STRIDE
+ norm = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_NORM
+ # fmt: on
+
+ self.scale_heads = []
+ for in_feature in self.in_features:
+ head_ops = []
+ head_length = max(
+ 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
+ )
+ for k in range(head_length):
+ conv = Conv2d(
+ feature_channels[in_feature] if k == 0 else conv_dims,
+ conv_dims,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias=not norm,
+ norm=get_norm(norm, conv_dims),
+ activation=F.relu,
+ )
+ weight_init.c2_msra_fill(conv)
+ head_ops.append(conv)
+ if feature_strides[in_feature] != self.common_stride:
+ head_ops.append(
+ nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
+ )
+ self.scale_heads.append(nn.Sequential(*head_ops))
+ self.add_module(in_feature, self.scale_heads[-1])
+ self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
+ weight_init.c2_msra_fill(self.predictor)
+
+ def forward(self, features: List[torch.Tensor]):
+ for i, _ in enumerate(self.in_features):
+ if i == 0:
+ x = self.scale_heads[i](features[i])
+ else:
+ x = x + self.scale_heads[i](features[i])
+ x = self.predictor(x)
+ return x
+
+
+@ROI_HEADS_REGISTRY.register()
+class DensePoseROIHeads(StandardROIHeads):
+ """
+ A Standard ROIHeads which contains an addition of DensePose head.
+ """
+
+ def __init__(self, cfg, input_shape):
+ super().__init__(cfg, input_shape)
+ self._init_densepose_head(cfg, input_shape)
+
+ def _init_densepose_head(self, cfg, input_shape):
+ # fmt: off
+ self.densepose_on = cfg.MODEL.DENSEPOSE_ON
+ if not self.densepose_on:
+ return
+ self.densepose_data_filter = build_densepose_data_filter(cfg)
+ dp_pooler_resolution = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_RESOLUTION
+ dp_pooler_sampling_ratio = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_SAMPLING_RATIO
+ dp_pooler_type = cfg.MODEL.ROI_DENSEPOSE_HEAD.POOLER_TYPE
+ self.use_decoder = cfg.MODEL.ROI_DENSEPOSE_HEAD.DECODER_ON
+ # fmt: on
+ if self.use_decoder:
+ dp_pooler_scales = (1.0 / input_shape[self.in_features[0]].stride,)
+ else:
+ dp_pooler_scales = tuple(1.0 / input_shape[k].stride for k in self.in_features)
+ in_channels = [input_shape[f].channels for f in self.in_features][0]
+
+ if self.use_decoder:
+ self.decoder = Decoder(cfg, input_shape, self.in_features)
+
+ self.densepose_pooler = ROIPooler(
+ output_size=dp_pooler_resolution,
+ scales=dp_pooler_scales,
+ sampling_ratio=dp_pooler_sampling_ratio,
+ pooler_type=dp_pooler_type,
+ )
+ self.densepose_head = build_densepose_head(cfg, in_channels)
+ self.densepose_predictor = build_densepose_predictor(
+ cfg, self.densepose_head.n_out_channels
+ )
+ self.densepose_losses = build_densepose_losses(cfg)
+ self.embedder = build_densepose_embedder(cfg)
+
+ def _forward_densepose(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
+ """
+ Forward logic of the densepose prediction branch.
+
+ Args:
+ features (dict[str, Tensor]): input data as a mapping from feature
+ map name to tensor. Axis 0 represents the number of images `N` in
+ the input data; axes 1-3 are channels, height, and width, which may
+ vary between feature maps (e.g., if a feature pyramid is used).
+ instances (list[Instances]): length `N` list of `Instances`. The i-th
+ `Instances` contains instances for the i-th input image,
+ In training, they can be the proposals.
+ In inference, they can be the predicted boxes.
+
+ Returns:
+ In training, a dict of losses.
+ In inference, update `instances` with new fields "densepose" and return it.
+ """
+ if not self.densepose_on:
+ return {} if self.training else instances
+
+ features_list = [features[f] for f in self.in_features]
+ if self.training:
+ proposals, _ = select_foreground_proposals(instances, self.num_classes)
+ features_list, proposals = self.densepose_data_filter(features_list, proposals)
+ if len(proposals) > 0:
+ proposal_boxes = [x.proposal_boxes for x in proposals]
+
+ if self.use_decoder:
+ features_list = [self.decoder(features_list)]
+
+ features_dp = self.densepose_pooler(features_list, proposal_boxes)
+ densepose_head_outputs = self.densepose_head(features_dp)
+ densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
+ densepose_loss_dict = self.densepose_losses(
+ proposals, densepose_predictor_outputs, embedder=self.embedder
+ )
+ return densepose_loss_dict
+ else:
+ pred_boxes = [x.pred_boxes for x in instances]
+
+ if self.use_decoder:
+ features_list = [self.decoder(features_list)]
+
+ features_dp = self.densepose_pooler(features_list, pred_boxes)
+ if len(features_dp) > 0:
+ densepose_head_outputs = self.densepose_head(features_dp)
+ densepose_predictor_outputs = self.densepose_predictor(densepose_head_outputs)
+ else:
+ densepose_predictor_outputs = None
+
+ densepose_inference(densepose_predictor_outputs, instances)
+ return instances
+
+ def forward(
+ self,
+ images: ImageList,
+ features: Dict[str, torch.Tensor],
+ proposals: List[Instances],
+ targets: Optional[List[Instances]] = None,
+ ):
+ instances, losses = super().forward(images, features, proposals, targets)
+ del targets, images
+
+ if self.training:
+ losses.update(self._forward_densepose(features, instances))
+ return instances, losses
+
+ def forward_with_given_boxes(
+ self, features: Dict[str, torch.Tensor], instances: List[Instances]
+ ):
+ """
+ Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
+
+ This is useful for downstream tasks where a box is known, but need to obtain
+ other attributes (outputs of other heads).
+ Test-time augmentation also uses this.
+
+ Args:
+ features: same as in `forward()`
+ instances (list[Instances]): instances to predict other outputs. Expect the keys
+ "pred_boxes" and "pred_classes" to exist.
+
+ Returns:
+ instances (list[Instances]):
+ the same `Instances` objects, with extra
+ fields such as `pred_masks` or `pred_keypoints`.
+ """
+
+ instances = super().forward_with_given_boxes(features, instances)
+ instances = self._forward_densepose(features, instances)
+ return instances
diff --git a/densepose/modeling/roi_heads/v1convx.py b/densepose/modeling/roi_heads/v1convx.py
new file mode 100644
index 0000000000000000000000000000000000000000..d81c375c5a488af4cb9ab41676d5e6688f740e61
--- /dev/null
+++ b/densepose/modeling/roi_heads/v1convx.py
@@ -0,0 +1,66 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from detectron2.config import CfgNode
+from detectron2.layers import Conv2d
+
+from ..utils import initialize_module_params
+from .registry import ROI_DENSEPOSE_HEAD_REGISTRY
+
+
+@ROI_DENSEPOSE_HEAD_REGISTRY.register()
+class DensePoseV1ConvXHead(nn.Module):
+ """
+ Fully convolutional DensePose head.
+ """
+
+ def __init__(self, cfg: CfgNode, input_channels: int):
+ """
+ Initialize DensePose fully convolutional head
+
+ Args:
+ cfg (CfgNode): configuration options
+ input_channels (int): number of input channels
+ """
+ super(DensePoseV1ConvXHead, self).__init__()
+ # fmt: off
+ hidden_dim = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_DIM
+ kernel_size = cfg.MODEL.ROI_DENSEPOSE_HEAD.CONV_HEAD_KERNEL
+ self.n_stacked_convs = cfg.MODEL.ROI_DENSEPOSE_HEAD.NUM_STACKED_CONVS
+ # fmt: on
+ pad_size = kernel_size // 2
+ n_channels = input_channels
+ for i in range(self.n_stacked_convs):
+ layer = Conv2d(n_channels, hidden_dim, kernel_size, stride=1, padding=pad_size)
+ layer_name = self._get_layer_name(i)
+ self.add_module(layer_name, layer)
+ n_channels = hidden_dim
+ self.n_out_channels = n_channels
+ initialize_module_params(self)
+
+ def forward(self, features: torch.Tensor):
+ """
+ Apply DensePose fully convolutional head to the input features
+
+ Args:
+ features (tensor): input features
+ Result:
+ A tensor of DensePose head outputs
+ """
+ x = features
+ output = x
+ for i in range(self.n_stacked_convs):
+ layer_name = self._get_layer_name(i)
+ x = getattr(self, layer_name)(x)
+ x = F.relu(x)
+ output = x
+ return output
+
+ def _get_layer_name(self, i: int):
+ layer_name = "body_conv_fcn{}".format(i + 1)
+ return layer_name
diff --git a/densepose/modeling/test_time_augmentation.py b/densepose/modeling/test_time_augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e4cfa038f2fe3072a2520978ff4408df9bca5b3
--- /dev/null
+++ b/densepose/modeling/test_time_augmentation.py
@@ -0,0 +1,209 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import copy
+import numpy as np
+import torch
+from fvcore.transforms import HFlipTransform, TransformList
+from torch.nn import functional as F
+
+from detectron2.data.transforms import RandomRotation, RotationTransform, apply_transform_gens
+from detectron2.modeling.postprocessing import detector_postprocess
+from detectron2.modeling.test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
+
+from ..converters import HFlipConverter
+
+
+class DensePoseDatasetMapperTTA(DatasetMapperTTA):
+ def __init__(self, cfg):
+ super().__init__(cfg=cfg)
+ self.angles = cfg.TEST.AUG.ROTATION_ANGLES
+
+ def __call__(self, dataset_dict):
+ ret = super().__call__(dataset_dict=dataset_dict)
+ numpy_image = dataset_dict["image"].permute(1, 2, 0).numpy()
+ for angle in self.angles:
+ rotate = RandomRotation(angle=angle, expand=True)
+ new_numpy_image, tfms = apply_transform_gens([rotate], np.copy(numpy_image))
+ torch_image = torch.from_numpy(np.ascontiguousarray(new_numpy_image.transpose(2, 0, 1)))
+ dic = copy.deepcopy(dataset_dict)
+ # In DatasetMapperTTA, there is a pre_tfm transform (resize or no-op) that is
+ # added at the beginning of each TransformList. That's '.transforms[0]'.
+ dic["transforms"] = TransformList(
+ [ret[-1]["transforms"].transforms[0]] + tfms.transforms
+ )
+ dic["image"] = torch_image
+ ret.append(dic)
+ return ret
+
+
+class DensePoseGeneralizedRCNNWithTTA(GeneralizedRCNNWithTTA):
+ def __init__(self, cfg, model, transform_data, tta_mapper=None, batch_size=1):
+ """
+ Args:
+ cfg (CfgNode):
+ model (GeneralizedRCNN): a GeneralizedRCNN to apply TTA on.
+ transform_data (DensePoseTransformData): contains symmetry label
+ transforms used for horizontal flip
+ tta_mapper (callable): takes a dataset dict and returns a list of
+ augmented versions of the dataset dict. Defaults to
+ `DatasetMapperTTA(cfg)`.
+ batch_size (int): batch the augmented images into this batch size for inference.
+ """
+ self._transform_data = transform_data.to(model.device)
+ super().__init__(cfg=cfg, model=model, tta_mapper=tta_mapper, batch_size=batch_size)
+
+ # the implementation follows closely the one from detectron2/modeling
+ def _inference_one_image(self, input):
+ """
+ Args:
+ input (dict): one dataset dict with "image" field being a CHW tensor
+
+ Returns:
+ dict: one output dict
+ """
+ orig_shape = (input["height"], input["width"])
+ # For some reason, resize with uint8 slightly increases box AP but decreases densepose AP
+ input["image"] = input["image"].to(torch.uint8)
+ augmented_inputs, tfms = self._get_augmented_inputs(input)
+ # Detect boxes from all augmented versions
+ with self._turn_off_roi_heads(["mask_on", "keypoint_on", "densepose_on"]):
+ # temporarily disable roi heads
+ all_boxes, all_scores, all_classes = self._get_augmented_boxes(augmented_inputs, tfms)
+ merged_instances = self._merge_detections(all_boxes, all_scores, all_classes, orig_shape)
+
+ if self.cfg.MODEL.MASK_ON or self.cfg.MODEL.DENSEPOSE_ON:
+ # Use the detected boxes to obtain new fields
+ augmented_instances = self._rescale_detected_boxes(
+ augmented_inputs, merged_instances, tfms
+ )
+ # run forward on the detected boxes
+ outputs = self._batch_inference(augmented_inputs, augmented_instances)
+ # Delete now useless variables to avoid being out of memory
+ del augmented_inputs, augmented_instances
+ # average the predictions
+ if self.cfg.MODEL.MASK_ON:
+ merged_instances.pred_masks = self._reduce_pred_masks(outputs, tfms)
+ if self.cfg.MODEL.DENSEPOSE_ON:
+ merged_instances.pred_densepose = self._reduce_pred_densepose(outputs, tfms)
+ # postprocess
+ merged_instances = detector_postprocess(merged_instances, *orig_shape)
+ return {"instances": merged_instances}
+ else:
+ return {"instances": merged_instances}
+
+ def _get_augmented_boxes(self, augmented_inputs, tfms):
+ # Heavily based on detectron2/modeling/test_time_augmentation.py
+ # Only difference is that RotationTransform is excluded from bbox computation
+ # 1: forward with all augmented images
+ outputs = self._batch_inference(augmented_inputs)
+ # 2: union the results
+ all_boxes = []
+ all_scores = []
+ all_classes = []
+ for output, tfm in zip(outputs, tfms):
+ # Need to inverse the transforms on boxes, to obtain results on original image
+ if not any(isinstance(t, RotationTransform) for t in tfm.transforms):
+ # Some transforms can't compute bbox correctly
+ pred_boxes = output.pred_boxes.tensor
+ original_pred_boxes = tfm.inverse().apply_box(pred_boxes.cpu().numpy())
+ all_boxes.append(torch.from_numpy(original_pred_boxes).to(pred_boxes.device))
+ all_scores.extend(output.scores)
+ all_classes.extend(output.pred_classes)
+ all_boxes = torch.cat(all_boxes, dim=0)
+ return all_boxes, all_scores, all_classes
+
+ def _reduce_pred_densepose(self, outputs, tfms):
+ # Should apply inverse transforms on densepose preds.
+ # We assume only rotation, resize & flip are used. pred_masks is a scale-invariant
+ # representation, so we handle the other ones specially
+ for idx, (output, tfm) in enumerate(zip(outputs, tfms)):
+ for t in tfm.transforms:
+ for attr in ["coarse_segm", "fine_segm", "u", "v"]:
+ setattr(
+ output.pred_densepose,
+ attr,
+ _inverse_rotation(
+ getattr(output.pred_densepose, attr), output.pred_boxes.tensor, t
+ ),
+ )
+ if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
+ output.pred_densepose = HFlipConverter.convert(
+ output.pred_densepose, self._transform_data
+ )
+ self._incremental_avg_dp(outputs[0].pred_densepose, output.pred_densepose, idx)
+ return outputs[0].pred_densepose
+
+ # incrementally computed average: u_(n + 1) = u_n + (x_(n+1) - u_n) / (n + 1).
+ def _incremental_avg_dp(self, avg, new_el, idx):
+ for attr in ["coarse_segm", "fine_segm", "u", "v"]:
+ setattr(avg, attr, (getattr(avg, attr) * idx + getattr(new_el, attr)) / (idx + 1))
+ if idx:
+ # Deletion of the > 0 index intermediary values to prevent GPU OOM
+ setattr(new_el, attr, None)
+ return avg
+
+
+def _inverse_rotation(densepose_attrs, boxes, transform):
+ # resample outputs to image size and rotate back the densepose preds
+ # on the rotated images to the space of the original image
+ if len(boxes) == 0 or not isinstance(transform, RotationTransform):
+ return densepose_attrs
+ boxes = boxes.int().cpu().numpy()
+ wh_boxes = boxes[:, 2:] - boxes[:, :2] # bboxes in the rotated space
+ inv_boxes = rotate_box_inverse(transform, boxes).astype(int) # bboxes in original image
+ wh_diff = (inv_boxes[:, 2:] - inv_boxes[:, :2] - wh_boxes) // 2 # diff between new/old bboxes
+ rotation_matrix = torch.tensor([transform.rm_image]).to(device=densepose_attrs.device).float()
+ rotation_matrix[:, :, -1] = 0
+ # To apply grid_sample for rotation, we need to have enough space to fit the original and
+ # rotated bboxes. l_bds and r_bds are the left/right bounds that will be used to
+ # crop the difference once the rotation is done
+ l_bds = np.maximum(0, -wh_diff)
+ for i in range(len(densepose_attrs)):
+ if min(wh_boxes[i]) <= 0:
+ continue
+ densepose_attr = densepose_attrs[[i]].clone()
+ # 1. Interpolate densepose attribute to size of the rotated bbox
+ densepose_attr = F.interpolate(densepose_attr, wh_boxes[i].tolist()[::-1], mode="bilinear")
+ # 2. Pad the interpolated attribute so it has room for the original + rotated bbox
+ densepose_attr = F.pad(densepose_attr, tuple(np.repeat(np.maximum(0, wh_diff[i]), 2)))
+ # 3. Compute rotation grid and transform
+ grid = F.affine_grid(rotation_matrix, size=densepose_attr.shape)
+ densepose_attr = F.grid_sample(densepose_attr, grid)
+ # 4. Compute right bounds and crop the densepose_attr to the size of the original bbox
+ r_bds = densepose_attr.shape[2:][::-1] - l_bds[i]
+ densepose_attr = densepose_attr[:, :, l_bds[i][1] : r_bds[1], l_bds[i][0] : r_bds[0]]
+ if min(densepose_attr.shape) > 0:
+ # Interpolate back to the original size of the densepose attribute
+ densepose_attr = F.interpolate(
+ densepose_attr, densepose_attrs.shape[-2:], mode="bilinear"
+ )
+ # Adding a very small probability to the background class to fill padded zones
+ densepose_attr[:, 0] += 1e-10
+ densepose_attrs[i] = densepose_attr
+ return densepose_attrs
+
+
+def rotate_box_inverse(rot_tfm, rotated_box):
+ """
+ rotated_box is a N * 4 array of [x0, y0, x1, y1] boxes
+ When a bbox is rotated, it gets bigger, because we need to surround the tilted bbox
+ So when a bbox is rotated then inverse-rotated, it is much bigger than the original
+ This function aims to invert the rotation on the box, but also resize it to its original size
+ """
+ # 1. Compute the inverse rotation of the rotated bboxes (bigger than it )
+ invrot_box = rot_tfm.inverse().apply_box(rotated_box)
+ h, w = rotated_box[:, 3] - rotated_box[:, 1], rotated_box[:, 2] - rotated_box[:, 0]
+ ih, iw = invrot_box[:, 3] - invrot_box[:, 1], invrot_box[:, 2] - invrot_box[:, 0]
+ assert 2 * rot_tfm.abs_sin**2 != 1, "45 degrees angle can't be inverted"
+ # 2. Inverse the corresponding computation in the rotation transform
+ # to get the original height/width of the rotated boxes
+ orig_h = (h * rot_tfm.abs_cos - w * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
+ orig_w = (w * rot_tfm.abs_cos - h * rot_tfm.abs_sin) / (1 - 2 * rot_tfm.abs_sin**2)
+ # 3. Resize the inverse-rotated bboxes to their original size
+ invrot_box[:, 0] += (iw - orig_w) / 2
+ invrot_box[:, 1] += (ih - orig_h) / 2
+ invrot_box[:, 2] -= (iw - orig_w) / 2
+ invrot_box[:, 3] -= (ih - orig_h) / 2
+
+ return invrot_box
diff --git a/densepose/modeling/utils.py b/densepose/modeling/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..64f53369b5ae3bc69f064c590e0837583ebc213e
--- /dev/null
+++ b/densepose/modeling/utils.py
@@ -0,0 +1,13 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from torch import nn
+
+
+def initialize_module_params(module: nn.Module) -> None:
+ for name, param in module.named_parameters():
+ if "bias" in name:
+ nn.init.constant_(param, 0)
+ elif "weight" in name:
+ nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
diff --git a/densepose/structures/__init__.py b/densepose/structures/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ee84836219994a54bb1249c90a7d0d6f8b72e8b
--- /dev/null
+++ b/densepose/structures/__init__.py
@@ -0,0 +1,19 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from .chart import DensePoseChartPredictorOutput
+from .chart_confidence import decorate_predictor_output_class_with_confidences
+from .cse_confidence import decorate_cse_predictor_output_class_with_confidences
+from .chart_result import (
+ DensePoseChartResult,
+ DensePoseChartResultWithConfidences,
+ quantize_densepose_chart_result,
+ compress_quantized_densepose_chart_result,
+ decompress_compressed_densepose_chart_result,
+)
+from .cse import DensePoseEmbeddingPredictorOutput
+from .data_relative import DensePoseDataRelative
+from .list import DensePoseList
+from .mesh import Mesh, create_mesh
+from .transform_data import DensePoseTransformData, normalized_coords_transform
diff --git a/densepose/structures/__pycache__/__init__.cpython-39.pyc b/densepose/structures/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4014f415bc4f953968367d17acd1e2057668f6e4
Binary files /dev/null and b/densepose/structures/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/chart.cpython-39.pyc b/densepose/structures/__pycache__/chart.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..10743b5e6630c2a321422b93ecfc45c2d2826267
Binary files /dev/null and b/densepose/structures/__pycache__/chart.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/chart_confidence.cpython-39.pyc b/densepose/structures/__pycache__/chart_confidence.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f94586ecddcfb840662e7f20219eff31575ed704
Binary files /dev/null and b/densepose/structures/__pycache__/chart_confidence.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/chart_result.cpython-39.pyc b/densepose/structures/__pycache__/chart_result.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a17b1df1d6266225326e715425e1ed2d0e866974
Binary files /dev/null and b/densepose/structures/__pycache__/chart_result.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/cse.cpython-39.pyc b/densepose/structures/__pycache__/cse.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eb47655ba11bd3caf3dfd39948fda288a560e121
Binary files /dev/null and b/densepose/structures/__pycache__/cse.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/cse_confidence.cpython-39.pyc b/densepose/structures/__pycache__/cse_confidence.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a270e69545008b6a113dc1e60bce28e2f04cded3
Binary files /dev/null and b/densepose/structures/__pycache__/cse_confidence.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/data_relative.cpython-39.pyc b/densepose/structures/__pycache__/data_relative.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..97064160c973fc0cca20a6668bb97c1b60f63132
Binary files /dev/null and b/densepose/structures/__pycache__/data_relative.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/list.cpython-39.pyc b/densepose/structures/__pycache__/list.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..41381ad6470111845b23978de8c37c9fc018e6d2
Binary files /dev/null and b/densepose/structures/__pycache__/list.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/mesh.cpython-39.pyc b/densepose/structures/__pycache__/mesh.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c1acd657eedcb1e8b6c5f8c96943a27c1343c6d9
Binary files /dev/null and b/densepose/structures/__pycache__/mesh.cpython-39.pyc differ
diff --git a/densepose/structures/__pycache__/transform_data.cpython-39.pyc b/densepose/structures/__pycache__/transform_data.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb7e717c72243246caecb18cb0e917d414ea14b0
Binary files /dev/null and b/densepose/structures/__pycache__/transform_data.cpython-39.pyc differ
diff --git a/densepose/structures/chart.py b/densepose/structures/chart.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f8640ef3dc9ca7e66e1a639e2e23211300dbbac
--- /dev/null
+++ b/densepose/structures/chart.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from dataclasses import dataclass
+from typing import Union
+import torch
+
+
+@dataclass
+class DensePoseChartPredictorOutput:
+ """
+ Predictor output that contains segmentation and inner coordinates predictions for predefined
+ body parts:
+ * coarse segmentation, a tensor of shape [N, K, Hout, Wout]
+ * fine segmentation, a tensor of shape [N, C, Hout, Wout]
+ * U coordinates, a tensor of shape [N, C, Hout, Wout]
+ * V coordinates, a tensor of shape [N, C, Hout, Wout]
+ where
+ - N is the number of instances
+ - K is the number of coarse segmentation channels (
+ 2 = foreground / background,
+ 15 = one of 14 body parts / background)
+ - C is the number of fine segmentation channels (
+ 24 fine body parts / background)
+ - Hout and Wout are height and width of predictions
+ """
+
+ coarse_segm: torch.Tensor
+ fine_segm: torch.Tensor
+ u: torch.Tensor
+ v: torch.Tensor
+
+ def __len__(self):
+ """
+ Number of instances (N) in the output
+ """
+ return self.coarse_segm.size(0)
+
+ def __getitem__(
+ self, item: Union[int, slice, torch.BoolTensor]
+ ) -> "DensePoseChartPredictorOutput":
+ """
+ Get outputs for the selected instance(s)
+
+ Args:
+ item (int or slice or tensor): selected items
+ """
+ if isinstance(item, int):
+ return DensePoseChartPredictorOutput(
+ coarse_segm=self.coarse_segm[item].unsqueeze(0),
+ fine_segm=self.fine_segm[item].unsqueeze(0),
+ u=self.u[item].unsqueeze(0),
+ v=self.v[item].unsqueeze(0),
+ )
+ else:
+ return DensePoseChartPredictorOutput(
+ coarse_segm=self.coarse_segm[item],
+ fine_segm=self.fine_segm[item],
+ u=self.u[item],
+ v=self.v[item],
+ )
+
+ def to(self, device: torch.device):
+ """
+ Transfers all tensors to the given device
+ """
+ coarse_segm = self.coarse_segm.to(device)
+ fine_segm = self.fine_segm.to(device)
+ u = self.u.to(device)
+ v = self.v.to(device)
+ return DensePoseChartPredictorOutput(coarse_segm=coarse_segm, fine_segm=fine_segm, u=u, v=v)
diff --git a/densepose/structures/chart_confidence.py b/densepose/structures/chart_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..faec3a0f161939591a8424058871d50198327b08
--- /dev/null
+++ b/densepose/structures/chart_confidence.py
@@ -0,0 +1,100 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from dataclasses import make_dataclass
+from functools import lru_cache
+from typing import Any, Optional
+import torch
+
+
+@lru_cache(maxsize=None)
+def decorate_predictor_output_class_with_confidences(BasePredictorOutput: type) -> type:
+ """
+ Create a new output class from an existing one by adding new attributes
+ related to confidence estimation:
+ - sigma_1 (tensor)
+ - sigma_2 (tensor)
+ - kappa_u (tensor)
+ - kappa_v (tensor)
+ - fine_segm_confidence (tensor)
+ - coarse_segm_confidence (tensor)
+
+ Details on confidence estimation parameters can be found in:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+
+ The new class inherits the provided `BasePredictorOutput` class,
+ it's name is composed of the name of the provided class and
+ "WithConfidences" suffix.
+
+ Args:
+ BasePredictorOutput (type): output type to which confidence data
+ is to be added, assumed to be a dataclass
+ Return:
+ New dataclass derived from the provided one that has attributes
+ for confidence estimation
+ """
+
+ PredictorOutput = make_dataclass(
+ BasePredictorOutput.__name__ + "WithConfidences",
+ fields=[
+ ("sigma_1", Optional[torch.Tensor], None),
+ ("sigma_2", Optional[torch.Tensor], None),
+ ("kappa_u", Optional[torch.Tensor], None),
+ ("kappa_v", Optional[torch.Tensor], None),
+ ("fine_segm_confidence", Optional[torch.Tensor], None),
+ ("coarse_segm_confidence", Optional[torch.Tensor], None),
+ ],
+ bases=(BasePredictorOutput,),
+ )
+
+ # add possibility to index PredictorOutput
+
+ def slice_if_not_none(data, item):
+ if data is None:
+ return None
+ if isinstance(item, int):
+ return data[item].unsqueeze(0)
+ return data[item]
+
+ def PredictorOutput_getitem(self, item):
+ PredictorOutput = type(self)
+ base_predictor_output_sliced = super(PredictorOutput, self).__getitem__(item)
+ return PredictorOutput(
+ **base_predictor_output_sliced.__dict__,
+ coarse_segm_confidence=slice_if_not_none(self.coarse_segm_confidence, item),
+ fine_segm_confidence=slice_if_not_none(self.fine_segm_confidence, item),
+ sigma_1=slice_if_not_none(self.sigma_1, item),
+ sigma_2=slice_if_not_none(self.sigma_2, item),
+ kappa_u=slice_if_not_none(self.kappa_u, item),
+ kappa_v=slice_if_not_none(self.kappa_v, item),
+ )
+
+ PredictorOutput.__getitem__ = PredictorOutput_getitem
+
+ def PredictorOutput_to(self, device: torch.device):
+ """
+ Transfers all tensors to the given device
+ """
+ PredictorOutput = type(self)
+ base_predictor_output_to = super(PredictorOutput, self).to(device) # pyre-ignore[16]
+
+ def to_device_if_tensor(var: Any):
+ if isinstance(var, torch.Tensor):
+ return var.to(device)
+ return var
+
+ return PredictorOutput(
+ **base_predictor_output_to.__dict__,
+ sigma_1=to_device_if_tensor(self.sigma_1),
+ sigma_2=to_device_if_tensor(self.sigma_2),
+ kappa_u=to_device_if_tensor(self.kappa_u),
+ kappa_v=to_device_if_tensor(self.kappa_v),
+ fine_segm_confidence=to_device_if_tensor(self.fine_segm_confidence),
+ coarse_segm_confidence=to_device_if_tensor(self.coarse_segm_confidence),
+ )
+
+ PredictorOutput.to = PredictorOutput_to
+ return PredictorOutput
diff --git a/densepose/structures/chart_result.py b/densepose/structures/chart_result.py
new file mode 100644
index 0000000000000000000000000000000000000000..3a9e56dee9fb81fd6a6596c524dcd9f2e471af19
--- /dev/null
+++ b/densepose/structures/chart_result.py
@@ -0,0 +1,185 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from dataclasses import dataclass
+from typing import Any, Optional, Tuple
+import torch
+
+
+@dataclass
+class DensePoseChartResult:
+ """
+ DensePose results for chart-based methods represented by labels and inner
+ coordinates (U, V) of individual charts. Each chart is a 2D manifold
+ that has an associated label and is parameterized by two coordinates U and V.
+ Both U and V take values in [0, 1].
+ Thus the results are represented by two tensors:
+ - labels (tensor [H, W] of long): contains estimated label for each pixel of
+ the detection bounding box of size (H, W)
+ - uv (tensor [2, H, W] of float): contains estimated U and V coordinates
+ for each pixel of the detection bounding box of size (H, W)
+ """
+
+ labels: torch.Tensor
+ uv: torch.Tensor
+
+ def to(self, device: torch.device):
+ """
+ Transfers all tensors to the given device
+ """
+ labels = self.labels.to(device)
+ uv = self.uv.to(device)
+ return DensePoseChartResult(labels=labels, uv=uv)
+
+
+@dataclass
+class DensePoseChartResultWithConfidences:
+ """
+ We add confidence values to DensePoseChartResult
+ Thus the results are represented by two tensors:
+ - labels (tensor [H, W] of long): contains estimated label for each pixel of
+ the detection bounding box of size (H, W)
+ - uv (tensor [2, H, W] of float): contains estimated U and V coordinates
+ for each pixel of the detection bounding box of size (H, W)
+ Plus one [H, W] tensor of float for each confidence type
+ """
+
+ labels: torch.Tensor
+ uv: torch.Tensor
+ sigma_1: Optional[torch.Tensor] = None
+ sigma_2: Optional[torch.Tensor] = None
+ kappa_u: Optional[torch.Tensor] = None
+ kappa_v: Optional[torch.Tensor] = None
+ fine_segm_confidence: Optional[torch.Tensor] = None
+ coarse_segm_confidence: Optional[torch.Tensor] = None
+
+ def to(self, device: torch.device):
+ """
+ Transfers all tensors to the given device, except if their value is None
+ """
+
+ def to_device_if_tensor(var: Any):
+ if isinstance(var, torch.Tensor):
+ return var.to(device)
+ return var
+
+ return DensePoseChartResultWithConfidences(
+ labels=self.labels.to(device),
+ uv=self.uv.to(device),
+ sigma_1=to_device_if_tensor(self.sigma_1),
+ sigma_2=to_device_if_tensor(self.sigma_2),
+ kappa_u=to_device_if_tensor(self.kappa_u),
+ kappa_v=to_device_if_tensor(self.kappa_v),
+ fine_segm_confidence=to_device_if_tensor(self.fine_segm_confidence),
+ coarse_segm_confidence=to_device_if_tensor(self.coarse_segm_confidence),
+ )
+
+
+@dataclass
+class DensePoseChartResultQuantized:
+ """
+ DensePose results for chart-based methods represented by labels and quantized
+ inner coordinates (U, V) of individual charts. Each chart is a 2D manifold
+ that has an associated label and is parameterized by two coordinates U and V.
+ Both U and V take values in [0, 1].
+ Quantized coordinates Uq and Vq have uint8 values which are obtained as:
+ Uq = U * 255 (hence 0 <= Uq <= 255)
+ Vq = V * 255 (hence 0 <= Vq <= 255)
+ Thus the results are represented by one tensor:
+ - labels_uv_uint8 (tensor [3, H, W] of uint8): contains estimated label
+ and quantized coordinates Uq and Vq for each pixel of the detection
+ bounding box of size (H, W)
+ """
+
+ labels_uv_uint8: torch.Tensor
+
+ def to(self, device: torch.device):
+ """
+ Transfers all tensors to the given device
+ """
+ labels_uv_uint8 = self.labels_uv_uint8.to(device)
+ return DensePoseChartResultQuantized(labels_uv_uint8=labels_uv_uint8)
+
+
+@dataclass
+class DensePoseChartResultCompressed:
+ """
+ DensePose results for chart-based methods represented by a PNG-encoded string.
+ The tensor of quantized DensePose results of size [3, H, W] is considered
+ as an image with 3 color channels. PNG compression is applied and the result
+ is stored as a Base64-encoded string. The following attributes are defined:
+ - shape_chw (tuple of 3 int): contains shape of the result tensor
+ (number of channels, height, width)
+ - labels_uv_str (str): contains Base64-encoded results tensor of size
+ [3, H, W] compressed with PNG compression methods
+ """
+
+ shape_chw: Tuple[int, int, int]
+ labels_uv_str: str
+
+
+def quantize_densepose_chart_result(result: DensePoseChartResult) -> DensePoseChartResultQuantized:
+ """
+ Applies quantization to DensePose chart-based result.
+
+ Args:
+ result (DensePoseChartResult): DensePose chart-based result
+ Return:
+ Quantized DensePose chart-based result (DensePoseChartResultQuantized)
+ """
+ h, w = result.labels.shape
+ labels_uv_uint8 = torch.zeros([3, h, w], dtype=torch.uint8, device=result.labels.device)
+ labels_uv_uint8[0] = result.labels
+ labels_uv_uint8[1:] = (result.uv * 255).clamp(0, 255).byte()
+ return DensePoseChartResultQuantized(labels_uv_uint8=labels_uv_uint8)
+
+
+def compress_quantized_densepose_chart_result(
+ result: DensePoseChartResultQuantized,
+) -> DensePoseChartResultCompressed:
+ """
+ Compresses quantized DensePose chart-based result
+
+ Args:
+ result (DensePoseChartResultQuantized): quantized DensePose chart-based result
+ Return:
+ Compressed DensePose chart-based result (DensePoseChartResultCompressed)
+ """
+ import base64
+ import numpy as np
+ from io import BytesIO
+ from PIL import Image
+
+ labels_uv_uint8_np_chw = result.labels_uv_uint8.cpu().numpy()
+ labels_uv_uint8_np_hwc = np.moveaxis(labels_uv_uint8_np_chw, 0, -1)
+ im = Image.fromarray(labels_uv_uint8_np_hwc)
+ fstream = BytesIO()
+ im.save(fstream, format="png", optimize=True)
+ labels_uv_str = base64.encodebytes(fstream.getvalue()).decode()
+ shape_chw = labels_uv_uint8_np_chw.shape
+ return DensePoseChartResultCompressed(labels_uv_str=labels_uv_str, shape_chw=shape_chw)
+
+
+def decompress_compressed_densepose_chart_result(
+ result: DensePoseChartResultCompressed,
+) -> DensePoseChartResultQuantized:
+ """
+ Decompresses DensePose chart-based result encoded into a base64 string
+
+ Args:
+ result (DensePoseChartResultCompressed): compressed DensePose chart result
+ Return:
+ Quantized DensePose chart-based result (DensePoseChartResultQuantized)
+ """
+ import base64
+ import numpy as np
+ from io import BytesIO
+ from PIL import Image
+
+ fstream = BytesIO(base64.decodebytes(result.labels_uv_str.encode()))
+ im = Image.open(fstream)
+ labels_uv_uint8_np_chw = np.moveaxis(np.array(im, dtype=np.uint8), -1, 0)
+ return DensePoseChartResultQuantized(
+ labels_uv_uint8=torch.from_numpy(labels_uv_uint8_np_chw.reshape(result.shape_chw))
+ )
diff --git a/densepose/structures/cse.py b/densepose/structures/cse.py
new file mode 100644
index 0000000000000000000000000000000000000000..381f1384a8d4d42f81cda8ff1558002149bdea74
--- /dev/null
+++ b/densepose/structures/cse.py
@@ -0,0 +1,54 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+from dataclasses import dataclass
+from typing import Union
+import torch
+
+
+@dataclass
+class DensePoseEmbeddingPredictorOutput:
+ """
+ Predictor output that contains embedding and coarse segmentation data:
+ * embedding: float tensor of size [N, D, H, W], contains estimated embeddings
+ * coarse_segm: float tensor of size [N, K, H, W]
+ Here D = MODEL.ROI_DENSEPOSE_HEAD.CSE.EMBED_SIZE
+ K = MODEL.ROI_DENSEPOSE_HEAD.NUM_COARSE_SEGM_CHANNELS
+ """
+
+ embedding: torch.Tensor
+ coarse_segm: torch.Tensor
+
+ def __len__(self):
+ """
+ Number of instances (N) in the output
+ """
+ return self.coarse_segm.size(0)
+
+ def __getitem__(
+ self, item: Union[int, slice, torch.BoolTensor]
+ ) -> "DensePoseEmbeddingPredictorOutput":
+ """
+ Get outputs for the selected instance(s)
+
+ Args:
+ item (int or slice or tensor): selected items
+ """
+ if isinstance(item, int):
+ return DensePoseEmbeddingPredictorOutput(
+ coarse_segm=self.coarse_segm[item].unsqueeze(0),
+ embedding=self.embedding[item].unsqueeze(0),
+ )
+ else:
+ return DensePoseEmbeddingPredictorOutput(
+ coarse_segm=self.coarse_segm[item], embedding=self.embedding[item]
+ )
+
+ def to(self, device: torch.device):
+ """
+ Transfers all tensors to the given device
+ """
+ coarse_segm = self.coarse_segm.to(device)
+ embedding = self.embedding.to(device)
+ return DensePoseEmbeddingPredictorOutput(coarse_segm=coarse_segm, embedding=embedding)
diff --git a/densepose/structures/cse_confidence.py b/densepose/structures/cse_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..251a7e823e38931fb1b86b017417538af5350944
--- /dev/null
+++ b/densepose/structures/cse_confidence.py
@@ -0,0 +1,80 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+
+from dataclasses import make_dataclass
+from functools import lru_cache
+from typing import Any, Optional
+import torch
+
+
+@lru_cache(maxsize=None)
+def decorate_cse_predictor_output_class_with_confidences(BasePredictorOutput: type) -> type:
+ """
+ Create a new output class from an existing one by adding new attributes
+ related to confidence estimation:
+ - coarse_segm_confidence (tensor)
+
+ Details on confidence estimation parameters can be found in:
+ N. Neverova, D. Novotny, A. Vedaldi "Correlated Uncertainty for Learning
+ Dense Correspondences from Noisy Labels", p. 918--926, in Proc. NIPS 2019
+ A. Sanakoyeu et al., Transferring Dense Pose to Proximal Animal Classes, CVPR 2020
+
+ The new class inherits the provided `BasePredictorOutput` class,
+ it's name is composed of the name of the provided class and
+ "WithConfidences" suffix.
+
+ Args:
+ BasePredictorOutput (type): output type to which confidence data
+ is to be added, assumed to be a dataclass
+ Return:
+ New dataclass derived from the provided one that has attributes
+ for confidence estimation
+ """
+
+ PredictorOutput = make_dataclass(
+ BasePredictorOutput.__name__ + "WithConfidences",
+ fields=[
+ ("coarse_segm_confidence", Optional[torch.Tensor], None),
+ ],
+ bases=(BasePredictorOutput,),
+ )
+
+ # add possibility to index PredictorOutput
+
+ def slice_if_not_none(data, item):
+ if data is None:
+ return None
+ if isinstance(item, int):
+ return data[item].unsqueeze(0)
+ return data[item]
+
+ def PredictorOutput_getitem(self, item):
+ PredictorOutput = type(self)
+ base_predictor_output_sliced = super(PredictorOutput, self).__getitem__(item)
+ return PredictorOutput(
+ **base_predictor_output_sliced.__dict__,
+ coarse_segm_confidence=slice_if_not_none(self.coarse_segm_confidence, item),
+ )
+
+ PredictorOutput.__getitem__ = PredictorOutput_getitem
+
+ def PredictorOutput_to(self, device: torch.device):
+ """
+ Transfers all tensors to the given device
+ """
+ PredictorOutput = type(self)
+ base_predictor_output_to = super(PredictorOutput, self).to(device) # pyre-ignore[16]
+
+ def to_device_if_tensor(var: Any):
+ if isinstance(var, torch.Tensor):
+ return var.to(device)
+ return var
+
+ return PredictorOutput(
+ **base_predictor_output_to.__dict__,
+ coarse_segm_confidence=to_device_if_tensor(self.coarse_segm_confidence),
+ )
+
+ PredictorOutput.to = PredictorOutput_to
+ return PredictorOutput
diff --git a/densepose/structures/data_relative.py b/densepose/structures/data_relative.py
new file mode 100644
index 0000000000000000000000000000000000000000..bcf27ef9bb69f5d9f74d6499e55408e8d4ec5803
--- /dev/null
+++ b/densepose/structures/data_relative.py
@@ -0,0 +1,245 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import numpy as np
+import torch
+from torch.nn import functional as F
+
+from densepose.data.meshes.catalog import MeshCatalog
+from densepose.structures.mesh import load_mesh_symmetry
+from densepose.structures.transform_data import DensePoseTransformData
+
+
+class DensePoseDataRelative:
+ """
+ Dense pose relative annotations that can be applied to any bounding box:
+ x - normalized X coordinates [0, 255] of annotated points
+ y - normalized Y coordinates [0, 255] of annotated points
+ i - body part labels 0,...,24 for annotated points
+ u - body part U coordinates [0, 1] for annotated points
+ v - body part V coordinates [0, 1] for annotated points
+ segm - 256x256 segmentation mask with values 0,...,14
+ To obtain absolute x and y data wrt some bounding box one needs to first
+ divide the data by 256, multiply by the respective bounding box size
+ and add bounding box offset:
+ x_img = x0 + x_norm * w / 256.0
+ y_img = y0 + y_norm * h / 256.0
+ Segmentation masks are typically sampled to get image-based masks.
+ """
+
+ # Key for normalized X coordinates in annotation dict
+ X_KEY = "dp_x"
+ # Key for normalized Y coordinates in annotation dict
+ Y_KEY = "dp_y"
+ # Key for U part coordinates in annotation dict (used in chart-based annotations)
+ U_KEY = "dp_U"
+ # Key for V part coordinates in annotation dict (used in chart-based annotations)
+ V_KEY = "dp_V"
+ # Key for I point labels in annotation dict (used in chart-based annotations)
+ I_KEY = "dp_I"
+ # Key for segmentation mask in annotation dict
+ S_KEY = "dp_masks"
+ # Key for vertex ids (used in continuous surface embeddings annotations)
+ VERTEX_IDS_KEY = "dp_vertex"
+ # Key for mesh id (used in continuous surface embeddings annotations)
+ MESH_NAME_KEY = "ref_model"
+ # Number of body parts in segmentation masks
+ N_BODY_PARTS = 14
+ # Number of parts in point labels
+ N_PART_LABELS = 24
+ MASK_SIZE = 256
+
+ def __init__(self, annotation, cleanup=False):
+ self.x = torch.as_tensor(annotation[DensePoseDataRelative.X_KEY])
+ self.y = torch.as_tensor(annotation[DensePoseDataRelative.Y_KEY])
+ if (
+ DensePoseDataRelative.I_KEY in annotation
+ and DensePoseDataRelative.U_KEY in annotation
+ and DensePoseDataRelative.V_KEY in annotation
+ ):
+ self.i = torch.as_tensor(annotation[DensePoseDataRelative.I_KEY])
+ self.u = torch.as_tensor(annotation[DensePoseDataRelative.U_KEY])
+ self.v = torch.as_tensor(annotation[DensePoseDataRelative.V_KEY])
+ if (
+ DensePoseDataRelative.VERTEX_IDS_KEY in annotation
+ and DensePoseDataRelative.MESH_NAME_KEY in annotation
+ ):
+ self.vertex_ids = torch.as_tensor(
+ annotation[DensePoseDataRelative.VERTEX_IDS_KEY], dtype=torch.long
+ )
+ self.mesh_id = MeshCatalog.get_mesh_id(annotation[DensePoseDataRelative.MESH_NAME_KEY])
+ if DensePoseDataRelative.S_KEY in annotation:
+ self.segm = DensePoseDataRelative.extract_segmentation_mask(annotation)
+ self.device = torch.device("cpu")
+ if cleanup:
+ DensePoseDataRelative.cleanup_annotation(annotation)
+
+ def to(self, device):
+ if self.device == device:
+ return self
+ new_data = DensePoseDataRelative.__new__(DensePoseDataRelative)
+ new_data.x = self.x.to(device)
+ new_data.y = self.y.to(device)
+ for attr in ["i", "u", "v", "vertex_ids", "segm"]:
+ if hasattr(self, attr):
+ setattr(new_data, attr, getattr(self, attr).to(device))
+ if hasattr(self, "mesh_id"):
+ new_data.mesh_id = self.mesh_id
+ new_data.device = device
+ return new_data
+
+ @staticmethod
+ def extract_segmentation_mask(annotation):
+ import pycocotools.mask as mask_utils
+
+ # TODO: annotation instance is accepted if it contains either
+ # DensePose segmentation or instance segmentation. However, here we
+ # only rely on DensePose segmentation
+ poly_specs = annotation[DensePoseDataRelative.S_KEY]
+ if isinstance(poly_specs, torch.Tensor):
+ # data is already given as mask tensors, no need to decode
+ return poly_specs
+ segm = torch.zeros((DensePoseDataRelative.MASK_SIZE,) * 2, dtype=torch.float32)
+ if isinstance(poly_specs, dict):
+ if poly_specs:
+ mask = mask_utils.decode(poly_specs)
+ segm[mask > 0] = 1
+ else:
+ for i in range(len(poly_specs)):
+ poly_i = poly_specs[i]
+ if poly_i:
+ mask_i = mask_utils.decode(poly_i)
+ segm[mask_i > 0] = i + 1
+ return segm
+
+ @staticmethod
+ def validate_annotation(annotation):
+ for key in [
+ DensePoseDataRelative.X_KEY,
+ DensePoseDataRelative.Y_KEY,
+ ]:
+ if key not in annotation:
+ return False, "no {key} data in the annotation".format(key=key)
+ valid_for_iuv_setting = all(
+ key in annotation
+ for key in [
+ DensePoseDataRelative.I_KEY,
+ DensePoseDataRelative.U_KEY,
+ DensePoseDataRelative.V_KEY,
+ ]
+ )
+ valid_for_cse_setting = all(
+ key in annotation
+ for key in [
+ DensePoseDataRelative.VERTEX_IDS_KEY,
+ DensePoseDataRelative.MESH_NAME_KEY,
+ ]
+ )
+ if not valid_for_iuv_setting and not valid_for_cse_setting:
+ return (
+ False,
+ "expected either {} (IUV setting) or {} (CSE setting) annotations".format(
+ ", ".join(
+ [
+ DensePoseDataRelative.I_KEY,
+ DensePoseDataRelative.U_KEY,
+ DensePoseDataRelative.V_KEY,
+ ]
+ ),
+ ", ".join(
+ [
+ DensePoseDataRelative.VERTEX_IDS_KEY,
+ DensePoseDataRelative.MESH_NAME_KEY,
+ ]
+ ),
+ ),
+ )
+ return True, None
+
+ @staticmethod
+ def cleanup_annotation(annotation):
+ for key in [
+ DensePoseDataRelative.X_KEY,
+ DensePoseDataRelative.Y_KEY,
+ DensePoseDataRelative.I_KEY,
+ DensePoseDataRelative.U_KEY,
+ DensePoseDataRelative.V_KEY,
+ DensePoseDataRelative.S_KEY,
+ DensePoseDataRelative.VERTEX_IDS_KEY,
+ DensePoseDataRelative.MESH_NAME_KEY,
+ ]:
+ if key in annotation:
+ del annotation[key]
+
+ def apply_transform(self, transforms, densepose_transform_data):
+ self._transform_pts(transforms, densepose_transform_data)
+ if hasattr(self, "segm"):
+ self._transform_segm(transforms, densepose_transform_data)
+
+ def _transform_pts(self, transforms, dp_transform_data):
+ import detectron2.data.transforms as T
+
+ # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+ do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+ if do_hflip:
+ self.x = self.MASK_SIZE - self.x
+ if hasattr(self, "i"):
+ self._flip_iuv_semantics(dp_transform_data)
+ if hasattr(self, "vertex_ids"):
+ self._flip_vertices()
+
+ for t in transforms.transforms:
+ if isinstance(t, T.RotationTransform):
+ xy_scale = np.array((t.w, t.h)) / DensePoseDataRelative.MASK_SIZE
+ xy = t.apply_coords(np.stack((self.x, self.y), axis=1) * xy_scale)
+ self.x, self.y = torch.tensor(xy / xy_scale, dtype=self.x.dtype).T
+
+ def _flip_iuv_semantics(self, dp_transform_data: DensePoseTransformData) -> None:
+ i_old = self.i.clone()
+ uv_symmetries = dp_transform_data.uv_symmetries
+ pt_label_symmetries = dp_transform_data.point_label_symmetries
+ for i in range(self.N_PART_LABELS):
+ if i + 1 in i_old:
+ annot_indices_i = i_old == i + 1
+ if pt_label_symmetries[i + 1] != i + 1:
+ self.i[annot_indices_i] = pt_label_symmetries[i + 1]
+ u_loc = (self.u[annot_indices_i] * 255).long()
+ v_loc = (self.v[annot_indices_i] * 255).long()
+ self.u[annot_indices_i] = uv_symmetries["U_transforms"][i][v_loc, u_loc].to(
+ device=self.u.device
+ )
+ self.v[annot_indices_i] = uv_symmetries["V_transforms"][i][v_loc, u_loc].to(
+ device=self.v.device
+ )
+
+ def _flip_vertices(self):
+ mesh_info = MeshCatalog[MeshCatalog.get_mesh_name(self.mesh_id)]
+ mesh_symmetry = (
+ load_mesh_symmetry(mesh_info.symmetry) if mesh_info.symmetry is not None else None
+ )
+ self.vertex_ids = mesh_symmetry["vertex_transforms"][self.vertex_ids]
+
+ def _transform_segm(self, transforms, dp_transform_data):
+ import detectron2.data.transforms as T
+
+ # NOTE: This assumes that HorizFlipTransform is the only one that does flip
+ do_hflip = sum(isinstance(t, T.HFlipTransform) for t in transforms.transforms) % 2 == 1
+ if do_hflip:
+ self.segm = torch.flip(self.segm, [1])
+ self._flip_segm_semantics(dp_transform_data)
+
+ for t in transforms.transforms:
+ if isinstance(t, T.RotationTransform):
+ self._transform_segm_rotation(t)
+
+ def _flip_segm_semantics(self, dp_transform_data):
+ old_segm = self.segm.clone()
+ mask_label_symmetries = dp_transform_data.mask_label_symmetries
+ for i in range(self.N_BODY_PARTS):
+ if mask_label_symmetries[i + 1] != i + 1:
+ self.segm[old_segm == i + 1] = mask_label_symmetries[i + 1]
+
+ def _transform_segm_rotation(self, rotation):
+ self.segm = F.interpolate(self.segm[None, None, :], (rotation.h, rotation.w)).numpy()
+ self.segm = torch.tensor(rotation.apply_segmentation(self.segm[0, 0]))[None, None, :]
+ self.segm = F.interpolate(self.segm, [DensePoseDataRelative.MASK_SIZE] * 2)[0, 0]
diff --git a/densepose/structures/list.py b/densepose/structures/list.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7dde3acd42ff33c103a50bcf6eebff21a59ce53
--- /dev/null
+++ b/densepose/structures/list.py
@@ -0,0 +1,72 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import torch
+
+from densepose.structures.data_relative import DensePoseDataRelative
+
+
+class DensePoseList:
+
+ _TORCH_DEVICE_CPU = torch.device("cpu")
+
+ def __init__(self, densepose_datas, boxes_xyxy_abs, image_size_hw, device=_TORCH_DEVICE_CPU):
+ assert len(densepose_datas) == len(
+ boxes_xyxy_abs
+ ), "Attempt to initialize DensePoseList with {} DensePose datas " "and {} boxes".format(
+ len(densepose_datas), len(boxes_xyxy_abs)
+ )
+ self.densepose_datas = []
+ for densepose_data in densepose_datas:
+ assert isinstance(densepose_data, DensePoseDataRelative) or densepose_data is None, (
+ "Attempt to initialize DensePoseList with DensePose datas "
+ "of type {}, expected DensePoseDataRelative".format(type(densepose_data))
+ )
+ densepose_data_ondevice = (
+ densepose_data.to(device) if densepose_data is not None else None
+ )
+ self.densepose_datas.append(densepose_data_ondevice)
+ self.boxes_xyxy_abs = boxes_xyxy_abs.to(device)
+ self.image_size_hw = image_size_hw
+ self.device = device
+
+ def to(self, device):
+ if self.device == device:
+ return self
+ return DensePoseList(self.densepose_datas, self.boxes_xyxy_abs, self.image_size_hw, device)
+
+ def __iter__(self):
+ return iter(self.densepose_datas)
+
+ def __len__(self):
+ return len(self.densepose_datas)
+
+ def __repr__(self):
+ s = self.__class__.__name__ + "("
+ s += "num_instances={}, ".format(len(self.densepose_datas))
+ s += "image_width={}, ".format(self.image_size_hw[1])
+ s += "image_height={})".format(self.image_size_hw[0])
+ return s
+
+ def __getitem__(self, item):
+ if isinstance(item, int):
+ densepose_data_rel = self.densepose_datas[item]
+ return densepose_data_rel
+ elif isinstance(item, slice):
+ densepose_datas_rel = self.densepose_datas[item]
+ boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+ return DensePoseList(
+ densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+ )
+ elif isinstance(item, torch.Tensor) and (item.dtype == torch.bool):
+ densepose_datas_rel = [self.densepose_datas[i] for i, x in enumerate(item) if x > 0]
+ boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+ return DensePoseList(
+ densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+ )
+ else:
+ densepose_datas_rel = [self.densepose_datas[i] for i in item]
+ boxes_xyxy_abs = self.boxes_xyxy_abs[item]
+ return DensePoseList(
+ densepose_datas_rel, boxes_xyxy_abs, self.image_size_hw, self.device
+ )
diff --git a/densepose/structures/mesh.py b/densepose/structures/mesh.py
new file mode 100644
index 0000000000000000000000000000000000000000..faaad9cb5650f5e6a1bef76c599d5fd370238e4c
--- /dev/null
+++ b/densepose/structures/mesh.py
@@ -0,0 +1,172 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+
+import pickle
+from functools import lru_cache
+from typing import Dict, Optional, Tuple
+import torch
+
+from detectron2.utils.file_io import PathManager
+
+from densepose.data.meshes.catalog import MeshCatalog, MeshInfo
+
+
+def _maybe_copy_to_device(
+ attribute: Optional[torch.Tensor], device: torch.device
+) -> Optional[torch.Tensor]:
+ if attribute is None:
+ return None
+ return attribute.to(device)
+
+
+class Mesh:
+ def __init__(
+ self,
+ vertices: Optional[torch.Tensor] = None,
+ faces: Optional[torch.Tensor] = None,
+ geodists: Optional[torch.Tensor] = None,
+ symmetry: Optional[Dict[str, torch.Tensor]] = None,
+ texcoords: Optional[torch.Tensor] = None,
+ mesh_info: Optional[MeshInfo] = None,
+ device: Optional[torch.device] = None,
+ ):
+ """
+ Args:
+ vertices (tensor [N, 3] of float32): vertex coordinates in 3D
+ faces (tensor [M, 3] of long): triangular face represented as 3
+ vertex indices
+ geodists (tensor [N, N] of float32): geodesic distances from
+ vertex `i` to vertex `j` (optional, default: None)
+ symmetry (dict: str -> tensor): various mesh symmetry data:
+ - "vertex_transforms": vertex mapping under horizontal flip,
+ tensor of size [N] of type long; vertex `i` is mapped to
+ vertex `tensor[i]` (optional, default: None)
+ texcoords (tensor [N, 2] of float32): texture coordinates, i.e. global
+ and normalized mesh UVs (optional, default: None)
+ mesh_info (MeshInfo type): necessary to load the attributes on-the-go,
+ can be used instead of passing all the variables one by one
+ device (torch.device): device of the Mesh. If not provided, will use
+ the device of the vertices
+ """
+ self._vertices = vertices
+ self._faces = faces
+ self._geodists = geodists
+ self._symmetry = symmetry
+ self._texcoords = texcoords
+ self.mesh_info = mesh_info
+ self.device = device
+
+ assert self._vertices is not None or self.mesh_info is not None
+
+ all_fields = [self._vertices, self._faces, self._geodists, self._texcoords]
+
+ if self.device is None:
+ for field in all_fields:
+ if field is not None:
+ self.device = field.device
+ break
+ if self.device is None and symmetry is not None:
+ for key in symmetry:
+ self.device = symmetry[key].device
+ break
+ self.device = torch.device("cpu") if self.device is None else self.device
+
+ assert all([var.device == self.device for var in all_fields if var is not None])
+ if symmetry:
+ assert all(symmetry[key].device == self.device for key in symmetry)
+ if texcoords and vertices:
+ assert len(vertices) == len(texcoords)
+
+ def to(self, device: torch.device):
+ device_symmetry = self._symmetry
+ if device_symmetry:
+ device_symmetry = {key: value.to(device) for key, value in device_symmetry.items()}
+ return Mesh(
+ _maybe_copy_to_device(self._vertices, device),
+ _maybe_copy_to_device(self._faces, device),
+ _maybe_copy_to_device(self._geodists, device),
+ device_symmetry,
+ _maybe_copy_to_device(self._texcoords, device),
+ self.mesh_info,
+ device,
+ )
+
+ @property
+ def vertices(self):
+ if self._vertices is None and self.mesh_info is not None:
+ self._vertices = load_mesh_data(self.mesh_info.data, "vertices", self.device)
+ return self._vertices
+
+ @property
+ def faces(self):
+ if self._faces is None and self.mesh_info is not None:
+ self._faces = load_mesh_data(self.mesh_info.data, "faces", self.device)
+ return self._faces
+
+ @property
+ def geodists(self):
+ if self._geodists is None and self.mesh_info is not None:
+ self._geodists = load_mesh_auxiliary_data(self.mesh_info.geodists, self.device)
+ return self._geodists
+
+ @property
+ def symmetry(self):
+ if self._symmetry is None and self.mesh_info is not None:
+ self._symmetry = load_mesh_symmetry(self.mesh_info.symmetry, self.device)
+ return self._symmetry
+
+ @property
+ def texcoords(self):
+ if self._texcoords is None and self.mesh_info is not None:
+ self._texcoords = load_mesh_auxiliary_data(self.mesh_info.texcoords, self.device)
+ return self._texcoords
+
+ def get_geodists(self):
+ if self.geodists is None:
+ self.geodists = self._compute_geodists()
+ return self.geodists
+
+ def _compute_geodists(self):
+ # TODO: compute using Laplace-Beltrami
+ geodists = None
+ return geodists
+
+
+def load_mesh_data(
+ mesh_fpath: str, field: str, device: Optional[torch.device] = None
+) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor]]:
+ with PathManager.open(mesh_fpath, "rb") as hFile:
+ # pyre-fixme[7]: Expected `Tuple[Optional[Tensor], Optional[Tensor]]` but
+ # got `Tensor`.
+ return torch.as_tensor(pickle.load(hFile)[field], dtype=torch.float).to(device)
+ return None
+
+
+def load_mesh_auxiliary_data(
+ fpath: str, device: Optional[torch.device] = None
+) -> Optional[torch.Tensor]:
+ fpath_local = PathManager.get_local_path(fpath)
+ with PathManager.open(fpath_local, "rb") as hFile:
+ return torch.as_tensor(pickle.load(hFile), dtype=torch.float).to(device)
+ return None
+
+
+@lru_cache()
+def load_mesh_symmetry(
+ symmetry_fpath: str, device: Optional[torch.device] = None
+) -> Optional[Dict[str, torch.Tensor]]:
+ with PathManager.open(symmetry_fpath, "rb") as hFile:
+ symmetry_loaded = pickle.load(hFile)
+ symmetry = {
+ "vertex_transforms": torch.as_tensor(
+ symmetry_loaded["vertex_transforms"], dtype=torch.long
+ ).to(device),
+ }
+ return symmetry
+ return None
+
+
+@lru_cache()
+def create_mesh(mesh_name: str, device: Optional[torch.device] = None) -> Mesh:
+ return Mesh(mesh_info=MeshCatalog[mesh_name], device=device)
diff --git a/densepose/structures/transform_data.py b/densepose/structures/transform_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..c85ec88514205679d39808a794c00613a8c0f495
--- /dev/null
+++ b/densepose/structures/transform_data.py
@@ -0,0 +1,73 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from typing import BinaryIO, Dict, Union
+import torch
+
+
+def normalized_coords_transform(x0, y0, w, h):
+ """
+ Coordinates transform that maps top left corner to (-1, -1) and bottom
+ right corner to (1, 1). Used for torch.grid_sample to initialize the
+ grid
+ """
+
+ def f(p):
+ return (2 * (p[0] - x0) / w - 1, 2 * (p[1] - y0) / h - 1)
+
+ return f
+
+
+class DensePoseTransformData:
+
+ # Horizontal symmetry label transforms used for horizontal flip
+ MASK_LABEL_SYMMETRIES = [0, 1, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 14]
+ # fmt: off
+ POINT_LABEL_SYMMETRIES = [ 0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 18, 17, 20, 19, 22, 21, 24, 23] # noqa
+ # fmt: on
+
+ def __init__(self, uv_symmetries: Dict[str, torch.Tensor], device: torch.device):
+ self.mask_label_symmetries = DensePoseTransformData.MASK_LABEL_SYMMETRIES
+ self.point_label_symmetries = DensePoseTransformData.POINT_LABEL_SYMMETRIES
+ self.uv_symmetries = uv_symmetries
+ self.device = torch.device("cpu")
+
+ def to(self, device: torch.device, copy: bool = False) -> "DensePoseTransformData":
+ """
+ Convert transform data to the specified device
+
+ Args:
+ device (torch.device): device to convert the data to
+ copy (bool): flag that specifies whether to copy or to reference the data
+ in case the device is the same
+ Return:
+ An instance of `DensePoseTransformData` with data stored on the specified device
+ """
+ if self.device == device and not copy:
+ return self
+ uv_symmetry_map = {}
+ for key in self.uv_symmetries:
+ uv_symmetry_map[key] = self.uv_symmetries[key].to(device=device, copy=copy)
+ return DensePoseTransformData(uv_symmetry_map, device)
+
+ @staticmethod
+ def load(io: Union[str, BinaryIO]):
+ """
+ Args:
+ io: (str or binary file-like object): input file to load data from
+ Returns:
+ An instance of `DensePoseTransformData` with transforms loaded from the file
+ """
+ import scipy.io
+
+ uv_symmetry_map = scipy.io.loadmat(io)
+ uv_symmetry_map_torch = {}
+ for key in ["U_transforms", "V_transforms"]:
+ uv_symmetry_map_torch[key] = []
+ map_src = uv_symmetry_map[key]
+ map_dst = uv_symmetry_map_torch[key]
+ for i in range(map_src.shape[1]):
+ map_dst.append(torch.from_numpy(map_src[0, i]).to(dtype=torch.float))
+ uv_symmetry_map_torch[key] = torch.stack(map_dst, dim=0)
+ transform_data = DensePoseTransformData(uv_symmetry_map_torch, device=torch.device("cpu"))
+ return transform_data
diff --git a/densepose/utils/__init__.py b/densepose/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/densepose/utils/__pycache__/__init__.cpython-39.pyc b/densepose/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45529431ff8dcc517cf540085bd2918a4b48bc20
Binary files /dev/null and b/densepose/utils/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/utils/__pycache__/transform.cpython-39.pyc b/densepose/utils/__pycache__/transform.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..979e5ed0aa7dd396155869a64262d1daa6db9f69
Binary files /dev/null and b/densepose/utils/__pycache__/transform.cpython-39.pyc differ
diff --git a/densepose/utils/dbhelper.py b/densepose/utils/dbhelper.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba380303a06f42674aa59f03690504f825b56ed7
--- /dev/null
+++ b/densepose/utils/dbhelper.py
@@ -0,0 +1,149 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from typing import Any, Dict, Optional, Tuple
+
+
+class EntrySelector:
+ """
+ Base class for entry selectors
+ """
+
+ @staticmethod
+ def from_string(spec: str) -> "EntrySelector":
+ if spec == "*":
+ return AllEntrySelector()
+ return FieldEntrySelector(spec)
+
+
+class AllEntrySelector(EntrySelector):
+ """
+ Selector that accepts all entries
+ """
+
+ SPECIFIER = "*"
+
+ def __call__(self, entry):
+ return True
+
+
+class FieldEntrySelector(EntrySelector):
+ """
+ Selector that accepts only entries that match provided field
+ specifier(s). Only a limited set of specifiers is supported for now:
+ ::=[]
+ ::=[]
+ is a valid identifier
+ ::= "int" | "str"
+ ::= "="
+ ::= ","
+ ::= ":"
+ ::= |
+ ::=
+ ::= "-"
+ is a string without spaces and special symbols
+ (e.g. , , , )
+ """
+
+ _SPEC_DELIM = ","
+ _TYPE_DELIM = ":"
+ _RANGE_DELIM = "-"
+ _EQUAL = "="
+ _ERROR_PREFIX = "Invalid field selector specifier"
+
+ class _FieldEntryValuePredicate:
+ """
+ Predicate that checks strict equality for the specified entry field
+ """
+
+ def __init__(self, name: str, typespec: Optional[str], value: str):
+ import builtins
+
+ self.name = name
+ self.type = getattr(builtins, typespec) if typespec is not None else str
+ self.value = value
+
+ def __call__(self, entry):
+ return entry[self.name] == self.type(self.value)
+
+ class _FieldEntryRangePredicate:
+ """
+ Predicate that checks whether an entry field falls into the specified range
+ """
+
+ def __init__(self, name: str, typespec: Optional[str], vmin: str, vmax: str):
+ import builtins
+
+ self.name = name
+ self.type = getattr(builtins, typespec) if typespec is not None else str
+ self.vmin = vmin
+ self.vmax = vmax
+
+ def __call__(self, entry):
+ return (entry[self.name] >= self.type(self.vmin)) and (
+ entry[self.name] <= self.type(self.vmax)
+ )
+
+ def __init__(self, spec: str):
+ self._predicates = self._parse_specifier_into_predicates(spec)
+
+ def __call__(self, entry: Dict[str, Any]):
+ for predicate in self._predicates:
+ if not predicate(entry):
+ return False
+ return True
+
+ def _parse_specifier_into_predicates(self, spec: str):
+ predicates = []
+ specs = spec.split(self._SPEC_DELIM)
+ for subspec in specs:
+ eq_idx = subspec.find(self._EQUAL)
+ if eq_idx > 0:
+ field_name_with_type = subspec[:eq_idx]
+ field_name, field_type = self._parse_field_name_type(field_name_with_type)
+ field_value_or_range = subspec[eq_idx + 1 :]
+ if self._is_range_spec(field_value_or_range):
+ vmin, vmax = self._get_range_spec(field_value_or_range)
+ predicate = FieldEntrySelector._FieldEntryRangePredicate(
+ field_name, field_type, vmin, vmax
+ )
+ else:
+ predicate = FieldEntrySelector._FieldEntryValuePredicate(
+ field_name, field_type, field_value_or_range
+ )
+ predicates.append(predicate)
+ elif eq_idx == 0:
+ self._parse_error(f'"{subspec}", field name is empty!')
+ else:
+ self._parse_error(f'"{subspec}", should have format ' "=!")
+ return predicates
+
+ def _parse_field_name_type(self, field_name_with_type: str) -> Tuple[str, Optional[str]]:
+ type_delim_idx = field_name_with_type.find(self._TYPE_DELIM)
+ if type_delim_idx > 0:
+ field_name = field_name_with_type[:type_delim_idx]
+ field_type = field_name_with_type[type_delim_idx + 1 :]
+ elif type_delim_idx == 0:
+ self._parse_error(f'"{field_name_with_type}", field name is empty!')
+ else:
+ field_name = field_name_with_type
+ field_type = None
+ # pyre-fixme[61]: `field_name` may not be initialized here.
+ # pyre-fixme[61]: `field_type` may not be initialized here.
+ return field_name, field_type
+
+ def _is_range_spec(self, field_value_or_range):
+ delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+ return delim_idx > 0
+
+ def _get_range_spec(self, field_value_or_range):
+ if self._is_range_spec(field_value_or_range):
+ delim_idx = field_value_or_range.find(self._RANGE_DELIM)
+ vmin = field_value_or_range[:delim_idx]
+ vmax = field_value_or_range[delim_idx + 1 :]
+ return vmin, vmax
+ else:
+ self._parse_error('"field_value_or_range", range of values expected!')
+
+ def _parse_error(self, msg):
+ raise ValueError(f"{self._ERROR_PREFIX}: {msg}")
diff --git a/densepose/utils/logger.py b/densepose/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..7aad2c0895afff0514c59b10cc80d01e47d50918
--- /dev/null
+++ b/densepose/utils/logger.py
@@ -0,0 +1,15 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import logging
+
+
+def verbosity_to_level(verbosity) -> int:
+ if verbosity is not None:
+ if verbosity == 0:
+ return logging.WARNING
+ elif verbosity == 1:
+ return logging.INFO
+ elif verbosity >= 2:
+ return logging.DEBUG
+ return logging.WARNING
diff --git a/densepose/utils/transform.py b/densepose/utils/transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f8a8ba038588bf8c014390f8b8feadfcdc40307
--- /dev/null
+++ b/densepose/utils/transform.py
@@ -0,0 +1,17 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from detectron2.data import MetadataCatalog
+from detectron2.utils.file_io import PathManager
+
+from densepose import DensePoseTransformData
+
+
+def load_for_dataset(dataset_name):
+ path = MetadataCatalog.get(dataset_name).densepose_transform_src
+ densepose_transform_data_fpath = PathManager.get_local_path(path)
+ return DensePoseTransformData.load(densepose_transform_data_fpath)
+
+
+def load_from_cfg(cfg):
+ return load_for_dataset(cfg.DATASETS.TEST[0])
diff --git a/densepose/vis/__init__.py b/densepose/vis/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/densepose/vis/__pycache__/__init__.cpython-39.pyc b/densepose/vis/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e293aa2ffa8b0ce5cf6b69d26a05c881c7deebea
Binary files /dev/null and b/densepose/vis/__pycache__/__init__.cpython-39.pyc differ
diff --git a/densepose/vis/__pycache__/base.cpython-39.pyc b/densepose/vis/__pycache__/base.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c565bdf280e17a433fc48caec6d3378f6e17251
Binary files /dev/null and b/densepose/vis/__pycache__/base.cpython-39.pyc differ
diff --git a/densepose/vis/__pycache__/bounding_box.cpython-39.pyc b/densepose/vis/__pycache__/bounding_box.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..07c03156bc94a24482afefb71c54c580536020ce
Binary files /dev/null and b/densepose/vis/__pycache__/bounding_box.cpython-39.pyc differ
diff --git a/densepose/vis/__pycache__/densepose_outputs_vertex.cpython-39.pyc b/densepose/vis/__pycache__/densepose_outputs_vertex.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11feb65e9310f0ee96445fb31fd31de9e129b8d5
Binary files /dev/null and b/densepose/vis/__pycache__/densepose_outputs_vertex.cpython-39.pyc differ
diff --git a/densepose/vis/__pycache__/densepose_results.cpython-39.pyc b/densepose/vis/__pycache__/densepose_results.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..af6e1f537ba749d0897ee69e4036ff43ff40cb26
Binary files /dev/null and b/densepose/vis/__pycache__/densepose_results.cpython-39.pyc differ
diff --git a/densepose/vis/__pycache__/densepose_results_textures.cpython-39.pyc b/densepose/vis/__pycache__/densepose_results_textures.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..336c6dc9692946ffcd3ddaa47cfab22f3a893bc3
Binary files /dev/null and b/densepose/vis/__pycache__/densepose_results_textures.cpython-39.pyc differ
diff --git a/densepose/vis/__pycache__/extractor.cpython-39.pyc b/densepose/vis/__pycache__/extractor.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..940ab2d0ade32e7013675a6c02fecab6a9f2836d
Binary files /dev/null and b/densepose/vis/__pycache__/extractor.cpython-39.pyc differ
diff --git a/densepose/vis/base.py b/densepose/vis/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a7b07000c41f49386de5d7752c0d277b9da1979
--- /dev/null
+++ b/densepose/vis/base.py
@@ -0,0 +1,193 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import logging
+import numpy as np
+import cv2
+import torch
+
+Image = np.ndarray
+Boxes = torch.Tensor
+
+
+class MatrixVisualizer:
+ """
+ Base visualizer for matrix data
+ """
+
+ def __init__(
+ self,
+ inplace=True,
+ cmap=cv2.COLORMAP_PARULA,
+ val_scale=1.0,
+ alpha=0.7,
+ interp_method_matrix=cv2.INTER_LINEAR,
+ interp_method_mask=cv2.INTER_NEAREST,
+ ):
+ self.inplace = inplace
+ self.cmap = cmap
+ self.val_scale = val_scale
+ self.alpha = alpha
+ self.interp_method_matrix = interp_method_matrix
+ self.interp_method_mask = interp_method_mask
+
+ def visualize(self, image_bgr, mask, matrix, bbox_xywh):
+ self._check_image(image_bgr)
+ self._check_mask_matrix(mask, matrix)
+ if self.inplace:
+ image_target_bgr = image_bgr
+ else:
+ image_target_bgr = image_bgr * 0
+ x, y, w, h = [int(v) for v in bbox_xywh]
+ if w <= 0 or h <= 0:
+ return image_bgr
+ mask, matrix = self._resize(mask, matrix, w, h)
+ mask_bg = np.tile((mask == 0)[:, :, np.newaxis], [1, 1, 3])
+ matrix_scaled = matrix.astype(np.float32) * self.val_scale
+ _EPSILON = 1e-6
+ if np.any(matrix_scaled > 255 + _EPSILON):
+ logger = logging.getLogger(__name__)
+ logger.warning(
+ f"Matrix has values > {255 + _EPSILON} after " f"scaling, clipping to [0..255]"
+ )
+ matrix_scaled_8u = matrix_scaled.clip(0, 255).astype(np.uint8)
+ matrix_vis = cv2.applyColorMap(matrix_scaled_8u, self.cmap)
+ matrix_vis[mask_bg] = image_target_bgr[y : y + h, x : x + w, :][mask_bg]
+ image_target_bgr[y : y + h, x : x + w, :] = (
+ image_target_bgr[y : y + h, x : x + w, :] * (1.0 - self.alpha) + matrix_vis * self.alpha
+ )
+ return image_target_bgr.astype(np.uint8)
+
+ def _resize(self, mask, matrix, w, h):
+ if (w != mask.shape[1]) or (h != mask.shape[0]):
+ mask = cv2.resize(mask, (w, h), self.interp_method_mask)
+ if (w != matrix.shape[1]) or (h != matrix.shape[0]):
+ matrix = cv2.resize(matrix, (w, h), self.interp_method_matrix)
+ return mask, matrix
+
+ def _check_image(self, image_rgb):
+ assert len(image_rgb.shape) == 3
+ assert image_rgb.shape[2] == 3
+ assert image_rgb.dtype == np.uint8
+
+ def _check_mask_matrix(self, mask, matrix):
+ assert len(matrix.shape) == 2
+ assert len(mask.shape) == 2
+ assert mask.dtype == np.uint8
+
+
+class RectangleVisualizer:
+
+ _COLOR_GREEN = (18, 127, 15)
+
+ def __init__(self, color=_COLOR_GREEN, thickness=1):
+ self.color = color
+ self.thickness = thickness
+
+ def visualize(self, image_bgr, bbox_xywh, color=None, thickness=None):
+ x, y, w, h = bbox_xywh
+ color = color or self.color
+ thickness = thickness or self.thickness
+ cv2.rectangle(image_bgr, (int(x), int(y)), (int(x + w), int(y + h)), color, thickness)
+ return image_bgr
+
+
+class PointsVisualizer:
+
+ _COLOR_GREEN = (18, 127, 15)
+
+ def __init__(self, color_bgr=_COLOR_GREEN, r=5):
+ self.color_bgr = color_bgr
+ self.r = r
+
+ def visualize(self, image_bgr, pts_xy, colors_bgr=None, rs=None):
+ for j, pt_xy in enumerate(pts_xy):
+ x, y = pt_xy
+ color_bgr = colors_bgr[j] if colors_bgr is not None else self.color_bgr
+ r = rs[j] if rs is not None else self.r
+ cv2.circle(image_bgr, (x, y), r, color_bgr, -1)
+ return image_bgr
+
+
+class TextVisualizer:
+
+ _COLOR_GRAY = (218, 227, 218)
+ _COLOR_WHITE = (255, 255, 255)
+
+ def __init__(
+ self,
+ font_face=cv2.FONT_HERSHEY_SIMPLEX,
+ font_color_bgr=_COLOR_GRAY,
+ font_scale=0.35,
+ font_line_type=cv2.LINE_AA,
+ font_line_thickness=1,
+ fill_color_bgr=_COLOR_WHITE,
+ fill_color_transparency=1.0,
+ frame_color_bgr=_COLOR_WHITE,
+ frame_color_transparency=1.0,
+ frame_thickness=1,
+ ):
+ self.font_face = font_face
+ self.font_color_bgr = font_color_bgr
+ self.font_scale = font_scale
+ self.font_line_type = font_line_type
+ self.font_line_thickness = font_line_thickness
+ self.fill_color_bgr = fill_color_bgr
+ self.fill_color_transparency = fill_color_transparency
+ self.frame_color_bgr = frame_color_bgr
+ self.frame_color_transparency = frame_color_transparency
+ self.frame_thickness = frame_thickness
+
+ def visualize(self, image_bgr, txt, topleft_xy):
+ txt_w, txt_h = self.get_text_size_wh(txt)
+ topleft_xy = tuple(map(int, topleft_xy))
+ x, y = topleft_xy
+ if self.frame_color_transparency < 1.0:
+ t = self.frame_thickness
+ image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :] = (
+ image_bgr[y - t : y + txt_h + t, x - t : x + txt_w + t, :]
+ * self.frame_color_transparency
+ + np.array(self.frame_color_bgr) * (1.0 - self.frame_color_transparency)
+ ).astype(float)
+ if self.fill_color_transparency < 1.0:
+ image_bgr[y : y + txt_h, x : x + txt_w, :] = (
+ image_bgr[y : y + txt_h, x : x + txt_w, :] * self.fill_color_transparency
+ + np.array(self.fill_color_bgr) * (1.0 - self.fill_color_transparency)
+ ).astype(float)
+ cv2.putText(
+ image_bgr,
+ txt,
+ topleft_xy,
+ self.font_face,
+ self.font_scale,
+ self.font_color_bgr,
+ self.font_line_thickness,
+ self.font_line_type,
+ )
+ return image_bgr
+
+ def get_text_size_wh(self, txt):
+ ((txt_w, txt_h), _) = cv2.getTextSize(
+ txt, self.font_face, self.font_scale, self.font_line_thickness
+ )
+ return txt_w, txt_h
+
+
+class CompoundVisualizer:
+ def __init__(self, visualizers):
+ self.visualizers = visualizers
+
+ def visualize(self, image_bgr, data):
+ assert len(data) == len(
+ self.visualizers
+ ), "The number of datas {} should match the number of visualizers" " {}".format(
+ len(data), len(self.visualizers)
+ )
+ image = image_bgr
+ for i, visualizer in enumerate(self.visualizers):
+ image = visualizer.visualize(image, data[i])
+ return image
+
+ def __str__(self):
+ visualizer_str = ", ".join([str(v) for v in self.visualizers])
+ return "Compound Visualizer [{}]".format(visualizer_str)
diff --git a/densepose/vis/bounding_box.py b/densepose/vis/bounding_box.py
new file mode 100644
index 0000000000000000000000000000000000000000..a88ba0ce74b8da539ea3a25c703a9795be8163a6
--- /dev/null
+++ b/densepose/vis/bounding_box.py
@@ -0,0 +1,39 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+from .base import RectangleVisualizer, TextVisualizer
+
+
+class BoundingBoxVisualizer:
+ def __init__(self):
+ self.rectangle_visualizer = RectangleVisualizer()
+
+ def visualize(self, image_bgr, boxes_xywh):
+ for bbox_xywh in boxes_xywh:
+ image_bgr = self.rectangle_visualizer.visualize(image_bgr, bbox_xywh)
+ return image_bgr
+
+
+class ScoredBoundingBoxVisualizer:
+ def __init__(self, bbox_visualizer_params=None, score_visualizer_params=None, **kwargs):
+ if bbox_visualizer_params is None:
+ bbox_visualizer_params = {}
+ if score_visualizer_params is None:
+ score_visualizer_params = {}
+ self.visualizer_bbox = RectangleVisualizer(**bbox_visualizer_params)
+ self.visualizer_score = TextVisualizer(**score_visualizer_params)
+
+ def visualize(self, image_bgr, scored_bboxes):
+ boxes_xywh, box_scores = scored_bboxes
+ assert len(boxes_xywh) == len(
+ box_scores
+ ), "Number of bounding boxes {} should be equal to the number of scores {}".format(
+ len(boxes_xywh), len(box_scores)
+ )
+ for i, box_xywh in enumerate(boxes_xywh):
+ score_i = box_scores[i]
+ image_bgr = self.visualizer_bbox.visualize(image_bgr, box_xywh)
+ score_txt = "{0:6.4f}".format(score_i)
+ topleft_xy = box_xywh[0], box_xywh[1]
+ image_bgr = self.visualizer_score.visualize(image_bgr, score_txt, topleft_xy)
+ return image_bgr
diff --git a/densepose/vis/densepose_data_points.py b/densepose/vis/densepose_data_points.py
new file mode 100644
index 0000000000000000000000000000000000000000..de809f64ee09a50291999774d91443e3edd869ea
--- /dev/null
+++ b/densepose/vis/densepose_data_points.py
@@ -0,0 +1,108 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import numpy as np
+from typing import Iterable, Optional, Tuple
+import cv2
+
+from densepose.structures import DensePoseDataRelative
+
+from .base import Boxes, Image, MatrixVisualizer, PointsVisualizer
+
+
+class DensePoseDataCoarseSegmentationVisualizer:
+ """
+ Visualizer for ground truth segmentation
+ """
+
+ def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
+ self.mask_visualizer = MatrixVisualizer(
+ inplace=inplace,
+ cmap=cmap,
+ val_scale=255.0 / DensePoseDataRelative.N_BODY_PARTS,
+ alpha=alpha,
+ )
+
+ def visualize(
+ self,
+ image_bgr: Image,
+ bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
+ ) -> Image:
+ if bbox_densepose_datas is None:
+ return image_bgr
+ for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
+ matrix = densepose_data.segm.numpy()
+ mask = np.zeros(matrix.shape, dtype=np.uint8)
+ mask[matrix > 0] = 1
+ image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh.numpy())
+ return image_bgr
+
+
+class DensePoseDataPointsVisualizer:
+ def __init__(self, densepose_data_to_value_fn=None, cmap=cv2.COLORMAP_PARULA, **kwargs):
+ self.points_visualizer = PointsVisualizer()
+ self.densepose_data_to_value_fn = densepose_data_to_value_fn
+ self.cmap = cmap
+
+ def visualize(
+ self,
+ image_bgr: Image,
+ bbox_densepose_datas: Optional[Tuple[Iterable[Boxes], Iterable[DensePoseDataRelative]]],
+ ) -> Image:
+ if bbox_densepose_datas is None:
+ return image_bgr
+ for bbox_xywh, densepose_data in zip(*bbox_densepose_datas):
+ x0, y0, w, h = bbox_xywh.numpy()
+ x = densepose_data.x.numpy() * w / 255.0 + x0
+ y = densepose_data.y.numpy() * h / 255.0 + y0
+ pts_xy = zip(x, y)
+ if self.densepose_data_to_value_fn is None:
+ image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy)
+ else:
+ v = self.densepose_data_to_value_fn(densepose_data)
+ img_colors_bgr = cv2.applyColorMap(v, self.cmap)
+ colors_bgr = [
+ [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
+ ]
+ image_bgr = self.points_visualizer.visualize(image_bgr, pts_xy, colors_bgr)
+ return image_bgr
+
+
+def _densepose_data_u_for_cmap(densepose_data):
+ u = np.clip(densepose_data.u.numpy(), 0, 1) * 255.0
+ return u.astype(np.uint8)
+
+
+def _densepose_data_v_for_cmap(densepose_data):
+ v = np.clip(densepose_data.v.numpy(), 0, 1) * 255.0
+ return v.astype(np.uint8)
+
+
+def _densepose_data_i_for_cmap(densepose_data):
+ i = (
+ np.clip(densepose_data.i.numpy(), 0.0, DensePoseDataRelative.N_PART_LABELS)
+ * 255.0
+ / DensePoseDataRelative.N_PART_LABELS
+ )
+ return i.astype(np.uint8)
+
+
+class DensePoseDataPointsUVisualizer(DensePoseDataPointsVisualizer):
+ def __init__(self, **kwargs):
+ super(DensePoseDataPointsUVisualizer, self).__init__(
+ densepose_data_to_value_fn=_densepose_data_u_for_cmap, **kwargs
+ )
+
+
+class DensePoseDataPointsVVisualizer(DensePoseDataPointsVisualizer):
+ def __init__(self, **kwargs):
+ super(DensePoseDataPointsVVisualizer, self).__init__(
+ densepose_data_to_value_fn=_densepose_data_v_for_cmap, **kwargs
+ )
+
+
+class DensePoseDataPointsIVisualizer(DensePoseDataPointsVisualizer):
+ def __init__(self, **kwargs):
+ super(DensePoseDataPointsIVisualizer, self).__init__(
+ densepose_data_to_value_fn=_densepose_data_i_for_cmap, **kwargs
+ )
diff --git a/densepose/vis/densepose_outputs_iuv.py b/densepose/vis/densepose_outputs_iuv.py
new file mode 100644
index 0000000000000000000000000000000000000000..960ffba0d4146eda0a4dcd2220c724d944834b33
--- /dev/null
+++ b/densepose/vis/densepose_outputs_iuv.py
@@ -0,0 +1,103 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import numpy as np
+from typing import Optional, Tuple
+import cv2
+
+from densepose.structures import DensePoseDataRelative
+
+from ..structures import DensePoseChartPredictorOutput
+from .base import Boxes, Image, MatrixVisualizer
+
+
+class DensePoseOutputsVisualizer:
+ def __init__(
+ self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, to_visualize=None, **kwargs
+ ):
+ assert to_visualize in "IUV", "can only visualize IUV"
+ self.to_visualize = to_visualize
+
+ if self.to_visualize == "I":
+ val_scale = 255.0 / DensePoseDataRelative.N_PART_LABELS
+ else:
+ val_scale = 1.0
+ self.mask_visualizer = MatrixVisualizer(
+ inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
+ )
+
+ def visualize(
+ self,
+ image_bgr: Image,
+ dp_output_with_bboxes: Tuple[Optional[DensePoseChartPredictorOutput], Optional[Boxes]],
+ ) -> Image:
+ densepose_output, bboxes_xywh = dp_output_with_bboxes
+ if densepose_output is None or bboxes_xywh is None:
+ return image_bgr
+
+ assert isinstance(
+ densepose_output, DensePoseChartPredictorOutput
+ ), "DensePoseChartPredictorOutput expected, {} encountered".format(type(densepose_output))
+
+ S = densepose_output.coarse_segm
+ I = densepose_output.fine_segm # noqa
+ U = densepose_output.u
+ V = densepose_output.v
+ N = S.size(0)
+ assert N == I.size(
+ 0
+ ), "densepose outputs S {} and I {}" " should have equal first dim size".format(
+ S.size(), I.size()
+ )
+ assert N == U.size(
+ 0
+ ), "densepose outputs S {} and U {}" " should have equal first dim size".format(
+ S.size(), U.size()
+ )
+ assert N == V.size(
+ 0
+ ), "densepose outputs S {} and V {}" " should have equal first dim size".format(
+ S.size(), V.size()
+ )
+ assert N == len(
+ bboxes_xywh
+ ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+ len(bboxes_xywh), N
+ )
+ for n in range(N):
+ Sn = S[n].argmax(dim=0)
+ In = I[n].argmax(dim=0) * (Sn > 0).long()
+ segmentation = In.cpu().numpy().astype(np.uint8)
+ mask = np.zeros(segmentation.shape, dtype=np.uint8)
+ mask[segmentation > 0] = 1
+ bbox_xywh = bboxes_xywh[n]
+
+ if self.to_visualize == "I":
+ vis = segmentation
+ elif self.to_visualize in "UV":
+ U_or_Vn = {"U": U, "V": V}[self.to_visualize][n].cpu().numpy().astype(np.float32)
+ vis = np.zeros(segmentation.shape, dtype=np.float32)
+ for partId in range(U_or_Vn.shape[0]):
+ vis[segmentation == partId] = (
+ U_or_Vn[partId][segmentation == partId].clip(0, 1) * 255
+ )
+
+ # pyre-fixme[61]: `vis` may not be initialized here.
+ image_bgr = self.mask_visualizer.visualize(image_bgr, mask, vis, bbox_xywh)
+
+ return image_bgr
+
+
+class DensePoseOutputsUVisualizer(DensePoseOutputsVisualizer):
+ def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
+ super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="U", **kwargs)
+
+
+class DensePoseOutputsVVisualizer(DensePoseOutputsVisualizer):
+ def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
+ super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="V", **kwargs)
+
+
+class DensePoseOutputsFineSegmentationVisualizer(DensePoseOutputsVisualizer):
+ def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
+ super().__init__(inplace=inplace, cmap=cmap, alpha=alpha, to_visualize="I", **kwargs)
diff --git a/densepose/vis/densepose_outputs_vertex.py b/densepose/vis/densepose_outputs_vertex.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe296fcf81e5711eea21049a1b4de17eb2541b3f
--- /dev/null
+++ b/densepose/vis/densepose_outputs_vertex.py
@@ -0,0 +1,231 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# pyre-unsafe
+import json
+import numpy as np
+from functools import lru_cache
+from typing import Dict, List, Optional, Tuple
+import cv2
+import torch
+
+from detectron2.utils.file_io import PathManager
+
+from densepose.modeling import build_densepose_embedder
+from densepose.modeling.cse.utils import get_closest_vertices_mask_from_ES
+
+from ..data.utils import get_class_to_mesh_name_mapping
+from ..structures import DensePoseEmbeddingPredictorOutput
+from ..structures.mesh import create_mesh
+from .base import Boxes, Image, MatrixVisualizer
+from .densepose_results_textures import get_texture_atlas
+
+
+@lru_cache()
+def get_xyz_vertex_embedding(mesh_name: str, device: torch.device):
+ if mesh_name == "smpl_27554":
+ embed_path = PathManager.get_local_path(
+ "https://dl.fbaipublicfiles.com/densepose/data/cse/mds_d=256.npy"
+ )
+ embed_map, _ = np.load(embed_path, allow_pickle=True)
+ embed_map = torch.tensor(embed_map).float()[:, 0]
+ embed_map -= embed_map.min()
+ embed_map /= embed_map.max()
+ else:
+ mesh = create_mesh(mesh_name, device)
+ embed_map = mesh.vertices.sum(dim=1)
+ embed_map -= embed_map.min()
+ embed_map /= embed_map.max()
+ embed_map = embed_map**2
+ return embed_map
+
+
+class DensePoseOutputsVertexVisualizer:
+ def __init__(
+ self,
+ cfg,
+ inplace=True,
+ cmap=cv2.COLORMAP_JET,
+ alpha=0.7,
+ device="cuda",
+ default_class=0,
+ **kwargs,
+ ):
+ self.mask_visualizer = MatrixVisualizer(
+ inplace=inplace, cmap=cmap, val_scale=1.0, alpha=alpha
+ )
+ self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
+ self.embedder = build_densepose_embedder(cfg)
+ self.device = torch.device(device)
+ self.default_class = default_class
+
+ self.mesh_vertex_embeddings = {
+ mesh_name: self.embedder(mesh_name).to(self.device)
+ for mesh_name in self.class_to_mesh_name.values()
+ if self.embedder.has_embeddings(mesh_name)
+ }
+
+ def visualize(
+ self,
+ image_bgr: Image,
+ outputs_boxes_xywh_classes: Tuple[
+ Optional[DensePoseEmbeddingPredictorOutput], Optional[Boxes], Optional[List[int]]
+ ],
+ ) -> Image:
+ if outputs_boxes_xywh_classes[0] is None:
+ return image_bgr
+
+ S, E, N, bboxes_xywh, pred_classes = self.extract_and_check_outputs_and_boxes(
+ outputs_boxes_xywh_classes
+ )
+
+ for n in range(N):
+ x, y, w, h = bboxes_xywh[n].int().tolist()
+ mesh_name = self.class_to_mesh_name[pred_classes[n]]
+ closest_vertices, mask = get_closest_vertices_mask_from_ES(
+ E[[n]],
+ S[[n]],
+ h,
+ w,
+ self.mesh_vertex_embeddings[mesh_name],
+ self.device,
+ )
+ embed_map = get_xyz_vertex_embedding(mesh_name, self.device)
+ vis = (embed_map[closest_vertices].clip(0, 1) * 255.0).cpu().numpy()
+ mask_numpy = mask.cpu().numpy().astype(dtype=np.uint8)
+ image_bgr = self.mask_visualizer.visualize(image_bgr, mask_numpy, vis, [x, y, w, h])
+
+ return image_bgr
+
+ def extract_and_check_outputs_and_boxes(self, outputs_boxes_xywh_classes):
+
+ densepose_output, bboxes_xywh, pred_classes = outputs_boxes_xywh_classes
+
+ if pred_classes is None:
+ pred_classes = [self.default_class] * len(bboxes_xywh)
+
+ assert isinstance(
+ densepose_output, DensePoseEmbeddingPredictorOutput
+ ), "DensePoseEmbeddingPredictorOutput expected, {} encountered".format(
+ type(densepose_output)
+ )
+
+ S = densepose_output.coarse_segm
+ E = densepose_output.embedding
+ N = S.size(0)
+ assert N == E.size(
+ 0
+ ), "CSE coarse_segm {} and embeddings {}" " should have equal first dim size".format(
+ S.size(), E.size()
+ )
+ assert N == len(
+ bboxes_xywh
+ ), "number of bounding boxes {}" " should be equal to first dim size of outputs {}".format(
+ len(bboxes_xywh), N
+ )
+ assert N == len(pred_classes), (
+ "number of predicted classes {}"
+ " should be equal to first dim size of outputs {}".format(len(bboxes_xywh), N)
+ )
+
+ return S, E, N, bboxes_xywh, pred_classes
+
+
+def get_texture_atlases(json_str: Optional[str]) -> Optional[Dict[str, Optional[np.ndarray]]]:
+ """
+ json_str is a JSON string representing a mesh_name -> texture_atlas_path dictionary
+ """
+ if json_str is None:
+ return None
+
+ paths = json.loads(json_str)
+ return {mesh_name: get_texture_atlas(path) for mesh_name, path in paths.items()}
+
+
+class DensePoseOutputsTextureVisualizer(DensePoseOutputsVertexVisualizer):
+ def __init__(
+ self,
+ cfg,
+ texture_atlases_dict,
+ device="cuda",
+ default_class=0,
+ **kwargs,
+ ):
+ self.embedder = build_densepose_embedder(cfg)
+
+ self.texture_image_dict = {}
+ self.alpha_dict = {}
+
+ for mesh_name in texture_atlases_dict.keys():
+ if texture_atlases_dict[mesh_name].shape[-1] == 4: # Image with alpha channel
+ self.alpha_dict[mesh_name] = texture_atlases_dict[mesh_name][:, :, -1] / 255.0
+ self.texture_image_dict[mesh_name] = texture_atlases_dict[mesh_name][:, :, :3]
+ else:
+ self.alpha_dict[mesh_name] = texture_atlases_dict[mesh_name].sum(axis=-1) > 0
+ self.texture_image_dict[mesh_name] = texture_atlases_dict[mesh_name]
+
+ self.device = torch.device(device)
+ self.class_to_mesh_name = get_class_to_mesh_name_mapping(cfg)
+ self.default_class = default_class
+
+ self.mesh_vertex_embeddings = {
+ mesh_name: self.embedder(mesh_name).to(self.device)
+ for mesh_name in self.class_to_mesh_name.values()
+ }
+
+ def visualize(
+ self,
+ image_bgr: Image,
+ outputs_boxes_xywh_classes: Tuple[
+ Optional[DensePoseEmbeddingPredictorOutput], Optional[Boxes], Optional[List[int]]
+ ],
+ ) -> Image:
+ image_target_bgr = image_bgr.copy()
+ if outputs_boxes_xywh_classes[0] is None:
+ return image_target_bgr
+
+ S, E, N, bboxes_xywh, pred_classes = self.extract_and_check_outputs_and_boxes(
+ outputs_boxes_xywh_classes
+ )
+
+ meshes = {
+ p: create_mesh(self.class_to_mesh_name[p], self.device) for p in np.unique(pred_classes)
+ }
+
+ for n in range(N):
+ x, y, w, h = bboxes_xywh[n].int().cpu().numpy()
+ mesh_name = self.class_to_mesh_name[pred_classes[n]]
+ closest_vertices, mask = get_closest_vertices_mask_from_ES(
+ E[[n]],
+ S[[n]],
+ h,
+ w,
+ self.mesh_vertex_embeddings[mesh_name],
+ self.device,
+ )
+ uv_array = meshes[pred_classes[n]].texcoords[closest_vertices].permute((2, 0, 1))
+ uv_array = uv_array.cpu().numpy().clip(0, 1)
+ textured_image = self.generate_image_with_texture(
+ image_target_bgr[y : y + h, x : x + w],
+ uv_array,
+ mask.cpu().numpy(),
+ self.class_to_mesh_name[pred_classes[n]],
+ )
+ if textured_image is None:
+ continue
+ image_target_bgr[y : y + h, x : x + w] = textured_image
+
+ return image_target_bgr
+
+ def generate_image_with_texture(self, bbox_image_bgr, uv_array, mask, mesh_name):
+ alpha = self.alpha_dict.get(mesh_name)
+ texture_image = self.texture_image_dict.get(mesh_name)
+ if alpha is None or texture_image is None:
+ return None
+ U, V = uv_array
+ x_index = (U * texture_image.shape[1]).astype(int)
+ y_index = (V * texture_image.shape[0]).astype(int)
+ local_texture = texture_image[y_index, x_index][mask]
+ local_alpha = np.expand_dims(alpha[y_index, x_index][mask], -1)
+ output_image = bbox_image_bgr.copy()
+ output_image[mask] = output_image[mask] * (1 - local_alpha) + local_texture * local_alpha
+ return output_image.astype(np.uint8)
diff --git a/densepose/vis/densepose_results.py b/densepose/vis/densepose_results.py
new file mode 100644
index 0000000000000000000000000000000000000000..d49a3828339b6ff03735924d3621396ca8f00e5c
--- /dev/null
+++ b/densepose/vis/densepose_results.py
@@ -0,0 +1,357 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import logging
+import numpy as np
+from typing import List, Optional, Tuple
+import cv2
+import torch
+
+from densepose.structures import DensePoseDataRelative
+
+from ..structures import DensePoseChartResult
+from .base import Boxes, Image, MatrixVisualizer
+
+
+class DensePoseResultsVisualizer:
+ def visualize(
+ self,
+ image_bgr: Image,
+ results_and_boxes_xywh: Tuple[Optional[List[DensePoseChartResult]], Optional[Boxes]],
+ ) -> Image:
+ densepose_result, boxes_xywh = results_and_boxes_xywh
+ if densepose_result is None or boxes_xywh is None:
+ return image_bgr
+
+ boxes_xywh = boxes_xywh.cpu().numpy()
+ context = self.create_visualization_context(image_bgr)
+ for i, result in enumerate(densepose_result):
+ iuv_array = torch.cat(
+ (result.labels[None].type(torch.float32), result.uv * 255.0)
+ ).type(torch.uint8)
+ self.visualize_iuv_arr(context, iuv_array.cpu().numpy(), boxes_xywh[i])
+ image_bgr = self.context_to_image_bgr(context)
+ return image_bgr
+
+ def create_visualization_context(self, image_bgr: Image):
+ return image_bgr
+
+ def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
+ pass
+
+ def context_to_image_bgr(self, context):
+ return context
+
+ def get_image_bgr_from_context(self, context):
+ return context
+
+
+class DensePoseMaskedColormapResultsVisualizer(DensePoseResultsVisualizer):
+ def __init__(
+ self,
+ data_extractor,
+ segm_extractor,
+ inplace=True,
+ cmap=cv2.COLORMAP_PARULA,
+ alpha=0.7,
+ val_scale=1.0,
+ **kwargs,
+ ):
+ self.mask_visualizer = MatrixVisualizer(
+ inplace=inplace, cmap=cmap, val_scale=val_scale, alpha=alpha
+ )
+ self.data_extractor = data_extractor
+ self.segm_extractor = segm_extractor
+
+ def context_to_image_bgr(self, context):
+ return context
+
+ def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh) -> None:
+ image_bgr = self.get_image_bgr_from_context(context)
+ matrix = self.data_extractor(iuv_arr)
+ segm = self.segm_extractor(iuv_arr)
+ mask = np.zeros(matrix.shape, dtype=np.uint8)
+ mask[segm > 0] = 1
+ image_bgr = self.mask_visualizer.visualize(image_bgr, mask, matrix, bbox_xywh)
+
+
+def _extract_i_from_iuvarr(iuv_arr):
+ return iuv_arr[0, :, :]
+
+
+def _extract_u_from_iuvarr(iuv_arr):
+ return iuv_arr[1, :, :]
+
+
+def _extract_v_from_iuvarr(iuv_arr):
+ return iuv_arr[2, :, :]
+
+
+class DensePoseResultsMplContourVisualizer(DensePoseResultsVisualizer):
+ def __init__(self, levels=10, **kwargs):
+ self.levels = levels
+ self.plot_args = kwargs
+
+ def create_visualization_context(self, image_bgr: Image):
+ import matplotlib.pyplot as plt
+ from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+
+ context = {}
+ context["image_bgr"] = image_bgr
+ dpi = 100
+ height_inches = float(image_bgr.shape[0]) / dpi
+ width_inches = float(image_bgr.shape[1]) / dpi
+ fig = plt.figure(figsize=(width_inches, height_inches), dpi=dpi)
+ plt.axes([0, 0, 1, 1])
+ plt.axis("off")
+ context["fig"] = fig
+ canvas = FigureCanvas(fig)
+ context["canvas"] = canvas
+ extent = (0, image_bgr.shape[1], image_bgr.shape[0], 0)
+ plt.imshow(image_bgr[:, :, ::-1], extent=extent)
+ return context
+
+ def context_to_image_bgr(self, context):
+ fig = context["fig"]
+ w, h = map(int, fig.get_size_inches() * fig.get_dpi())
+ canvas = context["canvas"]
+ canvas.draw()
+ image_1d = np.fromstring(canvas.tostring_rgb(), dtype="uint8")
+ image_rgb = image_1d.reshape(h, w, 3)
+ image_bgr = image_rgb[:, :, ::-1].copy()
+ return image_bgr
+
+ def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None:
+ import matplotlib.pyplot as plt
+
+ u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
+ v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
+ extent = (
+ bbox_xywh[0],
+ bbox_xywh[0] + bbox_xywh[2],
+ bbox_xywh[1],
+ bbox_xywh[1] + bbox_xywh[3],
+ )
+ plt.contour(u, self.levels, extent=extent, **self.plot_args)
+ plt.contour(v, self.levels, extent=extent, **self.plot_args)
+
+
+class DensePoseResultsCustomContourVisualizer(DensePoseResultsVisualizer):
+ """
+ Contour visualization using marching squares
+ """
+
+ def __init__(self, levels=10, **kwargs):
+ # TODO: colormap is hardcoded
+ cmap = cv2.COLORMAP_PARULA
+ if isinstance(levels, int):
+ self.levels = np.linspace(0, 1, levels)
+ else:
+ self.levels = levels
+ if "linewidths" in kwargs:
+ self.linewidths = kwargs["linewidths"]
+ else:
+ self.linewidths = [1] * len(self.levels)
+ self.plot_args = kwargs
+ img_colors_bgr = cv2.applyColorMap((self.levels * 255).astype(np.uint8), cmap)
+ self.level_colors_bgr = [
+ [int(v) for v in img_color_bgr.ravel()] for img_color_bgr in img_colors_bgr
+ ]
+
+ def visualize_iuv_arr(self, context, iuv_arr: np.ndarray, bbox_xywh: Boxes) -> None:
+ image_bgr = self.get_image_bgr_from_context(context)
+ segm = _extract_i_from_iuvarr(iuv_arr)
+ u = _extract_u_from_iuvarr(iuv_arr).astype(float) / 255.0
+ v = _extract_v_from_iuvarr(iuv_arr).astype(float) / 255.0
+ self._contours(image_bgr, u, segm, bbox_xywh)
+ self._contours(image_bgr, v, segm, bbox_xywh)
+
+ def _contours(self, image_bgr, arr, segm, bbox_xywh):
+ for part_idx in range(1, DensePoseDataRelative.N_PART_LABELS + 1):
+ mask = segm == part_idx
+ if not np.any(mask):
+ continue
+ arr_min = np.amin(arr[mask])
+ arr_max = np.amax(arr[mask])
+ I, J = np.nonzero(mask)
+ i0 = np.amin(I)
+ i1 = np.amax(I) + 1
+ j0 = np.amin(J)
+ j1 = np.amax(J) + 1
+ if (j1 == j0 + 1) or (i1 == i0 + 1):
+ continue
+ Nw = arr.shape[1] - 1
+ Nh = arr.shape[0] - 1
+ for level_idx, level in enumerate(self.levels):
+ if (level < arr_min) or (level > arr_max):
+ continue
+ vp = arr[i0:i1, j0:j1] >= level
+ bin_codes = vp[:-1, :-1] + vp[1:, :-1] * 2 + vp[1:, 1:] * 4 + vp[:-1, 1:] * 8
+ mp = mask[i0:i1, j0:j1]
+ bin_mask_codes = mp[:-1, :-1] + mp[1:, :-1] * 2 + mp[1:, 1:] * 4 + mp[:-1, 1:] * 8
+ it = np.nditer(bin_codes, flags=["multi_index"])
+ color_bgr = self.level_colors_bgr[level_idx]
+ linewidth = self.linewidths[level_idx]
+ while not it.finished:
+ if (it[0] != 0) and (it[0] != 15):
+ i, j = it.multi_index
+ if bin_mask_codes[i, j] != 0:
+ self._draw_line(
+ image_bgr,
+ arr,
+ mask,
+ level,
+ color_bgr,
+ linewidth,
+ it[0],
+ it.multi_index,
+ bbox_xywh,
+ Nw,
+ Nh,
+ (i0, j0),
+ )
+ it.iternext()
+
+ def _draw_line(
+ self,
+ image_bgr,
+ arr,
+ mask,
+ v,
+ color_bgr,
+ linewidth,
+ bin_code,
+ multi_idx,
+ bbox_xywh,
+ Nw,
+ Nh,
+ offset,
+ ):
+ lines = self._bin_code_2_lines(arr, v, bin_code, multi_idx, Nw, Nh, offset)
+ x0, y0, w, h = bbox_xywh
+ x1 = x0 + w
+ y1 = y0 + h
+ for line in lines:
+ x0r, y0r = line[0]
+ x1r, y1r = line[1]
+ pt0 = (int(x0 + x0r * (x1 - x0)), int(y0 + y0r * (y1 - y0)))
+ pt1 = (int(x0 + x1r * (x1 - x0)), int(y0 + y1r * (y1 - y0)))
+ cv2.line(image_bgr, pt0, pt1, color_bgr, linewidth)
+
+ def _bin_code_2_lines(self, arr, v, bin_code, multi_idx, Nw, Nh, offset):
+ i0, j0 = offset
+ i, j = multi_idx
+ i += i0
+ j += j0
+ v0, v1, v2, v3 = arr[i, j], arr[i + 1, j], arr[i + 1, j + 1], arr[i, j + 1]
+ x0i = float(j) / Nw
+ y0j = float(i) / Nh
+ He = 1.0 / Nh
+ We = 1.0 / Nw
+ if (bin_code == 1) or (bin_code == 14):
+ a = (v - v0) / (v1 - v0)
+ b = (v - v0) / (v3 - v0)
+ pt1 = (x0i, y0j + a * He)
+ pt2 = (x0i + b * We, y0j)
+ return [(pt1, pt2)]
+ elif (bin_code == 2) or (bin_code == 13):
+ a = (v - v0) / (v1 - v0)
+ b = (v - v1) / (v2 - v1)
+ pt1 = (x0i, y0j + a * He)
+ pt2 = (x0i + b * We, y0j + He)
+ return [(pt1, pt2)]
+ elif (bin_code == 3) or (bin_code == 12):
+ a = (v - v0) / (v3 - v0)
+ b = (v - v1) / (v2 - v1)
+ pt1 = (x0i + a * We, y0j)
+ pt2 = (x0i + b * We, y0j + He)
+ return [(pt1, pt2)]
+ elif (bin_code == 4) or (bin_code == 11):
+ a = (v - v1) / (v2 - v1)
+ b = (v - v3) / (v2 - v3)
+ pt1 = (x0i + a * We, y0j + He)
+ pt2 = (x0i + We, y0j + b * He)
+ return [(pt1, pt2)]
+ elif (bin_code == 6) or (bin_code == 9):
+ a = (v - v0) / (v1 - v0)
+ b = (v - v3) / (v2 - v3)
+ pt1 = (x0i, y0j + a * He)
+ pt2 = (x0i + We, y0j + b * He)
+ return [(pt1, pt2)]
+ elif (bin_code == 7) or (bin_code == 8):
+ a = (v - v0) / (v3 - v0)
+ b = (v - v3) / (v2 - v3)
+ pt1 = (x0i + a * We, y0j)
+ pt2 = (x0i + We, y0j + b * He)
+ return [(pt1, pt2)]
+ elif bin_code == 5:
+ a1 = (v - v0) / (v1 - v0)
+ b1 = (v - v1) / (v2 - v1)
+ pt11 = (x0i, y0j + a1 * He)
+ pt12 = (x0i + b1 * We, y0j + He)
+ a2 = (v - v0) / (v3 - v0)
+ b2 = (v - v3) / (v2 - v3)
+ pt21 = (x0i + a2 * We, y0j)
+ pt22 = (x0i + We, y0j + b2 * He)
+ return [(pt11, pt12), (pt21, pt22)]
+ elif bin_code == 10:
+ a1 = (v - v0) / (v3 - v0)
+ b1 = (v - v0) / (v1 - v0)
+ pt11 = (x0i + a1 * We, y0j)
+ pt12 = (x0i, y0j + b1 * He)
+ a2 = (v - v1) / (v2 - v1)
+ b2 = (v - v3) / (v2 - v3)
+ pt21 = (x0i + a2 * We, y0j + He)
+ pt22 = (x0i + We, y0j + b2 * He)
+ return [(pt11, pt12), (pt21, pt22)]
+ return []
+
+
+try:
+ import matplotlib
+
+ matplotlib.use("Agg")
+ DensePoseResultsContourVisualizer = DensePoseResultsMplContourVisualizer
+except ModuleNotFoundError:
+ logger = logging.getLogger(__name__)
+ logger.warning("Could not import matplotlib, using custom contour visualizer")
+ DensePoseResultsContourVisualizer = DensePoseResultsCustomContourVisualizer
+
+
+class DensePoseResultsFineSegmentationVisualizer(DensePoseMaskedColormapResultsVisualizer):
+ def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
+ super(DensePoseResultsFineSegmentationVisualizer, self).__init__(
+ _extract_i_from_iuvarr,
+ _extract_i_from_iuvarr,
+ inplace,
+ cmap,
+ alpha,
+ val_scale=255.0 / DensePoseDataRelative.N_PART_LABELS,
+ **kwargs,
+ )
+
+
+class DensePoseResultsUVisualizer(DensePoseMaskedColormapResultsVisualizer):
+ def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
+ super(DensePoseResultsUVisualizer, self).__init__(
+ _extract_u_from_iuvarr,
+ _extract_i_from_iuvarr,
+ inplace,
+ cmap,
+ alpha,
+ val_scale=1.0,
+ **kwargs,
+ )
+
+
+class DensePoseResultsVVisualizer(DensePoseMaskedColormapResultsVisualizer):
+ def __init__(self, inplace=True, cmap=cv2.COLORMAP_PARULA, alpha=0.7, **kwargs):
+ super(DensePoseResultsVVisualizer, self).__init__(
+ _extract_v_from_iuvarr,
+ _extract_i_from_iuvarr,
+ inplace,
+ cmap,
+ alpha,
+ val_scale=1.0,
+ **kwargs,
+ )
diff --git a/densepose/vis/densepose_results_textures.py b/densepose/vis/densepose_results_textures.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa33b861100b796f411f3aade1c03a68c279262e
--- /dev/null
+++ b/densepose/vis/densepose_results_textures.py
@@ -0,0 +1,93 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import numpy as np
+from typing import List, Optional, Tuple
+import torch
+
+from detectron2.data.detection_utils import read_image
+
+from ..structures import DensePoseChartResult
+from .base import Boxes, Image
+from .densepose_results import DensePoseResultsVisualizer
+
+
+def get_texture_atlas(path: Optional[str]) -> Optional[np.ndarray]:
+ if path is None:
+ return None
+
+ # Reading images like that downsamples 16-bit images to 8-bit
+ # If 16-bit images are needed, we can replace that by cv2.imread with the
+ # cv2.IMREAD_UNCHANGED flag (with cv2 we also need it to keep alpha channels)
+ # The rest of the pipeline would need to be adapted to 16-bit images too
+ bgr_image = read_image(path)
+ rgb_image = np.copy(bgr_image) # Convert BGR -> RGB
+ rgb_image[:, :, :3] = rgb_image[:, :, 2::-1] # Works with alpha channel
+ return rgb_image
+
+
+class DensePoseResultsVisualizerWithTexture(DensePoseResultsVisualizer):
+ """
+ texture_atlas: An image, size 6N * 4N, with N * N squares for each of the 24 body parts.
+ It must follow the grid found at https://github.com/facebookresearch/DensePose/blob/master/DensePoseData/demo_data/texture_atlas_200.png # noqa
+ For each body part, U is proportional to the x coordinate, and (1 - V) to y
+ """
+
+ def __init__(self, texture_atlas, **kwargs):
+ self.texture_atlas = texture_atlas
+ self.body_part_size = texture_atlas.shape[0] // 6
+ assert self.body_part_size == texture_atlas.shape[1] // 4
+
+ def visualize(
+ self,
+ image_bgr: Image,
+ results_and_boxes_xywh: Tuple[Optional[List[DensePoseChartResult]], Optional[Boxes]],
+ ) -> Image:
+ densepose_result, boxes_xywh = results_and_boxes_xywh
+ if densepose_result is None or boxes_xywh is None:
+ return image_bgr
+
+ boxes_xywh = boxes_xywh.int().cpu().numpy()
+ texture_image, alpha = self.get_texture()
+ for i, result in enumerate(densepose_result):
+ iuv_array = torch.cat((result.labels[None], result.uv.clamp(0, 1)))
+ x, y, w, h = boxes_xywh[i]
+ bbox_image = image_bgr[y : y + h, x : x + w]
+ image_bgr[y : y + h, x : x + w] = self.generate_image_with_texture(
+ texture_image, alpha, bbox_image, iuv_array.cpu().numpy()
+ )
+ return image_bgr
+
+ def get_texture(self):
+ N = self.body_part_size
+ texture_image = np.zeros([24, N, N, self.texture_atlas.shape[-1]])
+ for i in range(4):
+ for j in range(6):
+ texture_image[(6 * i + j), :, :, :] = self.texture_atlas[
+ N * j : N * (j + 1), N * i : N * (i + 1), :
+ ]
+
+ if texture_image.shape[-1] == 4: # Image with alpha channel
+ alpha = texture_image[:, :, :, -1] / 255.0
+ texture_image = texture_image[:, :, :, :3]
+ else:
+ alpha = texture_image.sum(axis=-1) > 0
+
+ return texture_image, alpha
+
+ def generate_image_with_texture(self, texture_image, alpha, bbox_image_bgr, iuv_array):
+
+ I, U, V = iuv_array
+ generated_image_bgr = bbox_image_bgr.copy()
+
+ for PartInd in range(1, 25):
+ x, y = np.where(I == PartInd)
+ x_index = (U[x, y] * (self.body_part_size - 1)).astype(int)
+ y_index = ((1 - V[x, y]) * (self.body_part_size - 1)).astype(int)
+ part_alpha = np.expand_dims(alpha[PartInd - 1, y_index, x_index], -1)
+ generated_image_bgr[I == PartInd] = (
+ generated_image_bgr[I == PartInd] * (1 - part_alpha)
+ + texture_image[PartInd - 1, y_index, x_index] * part_alpha
+ )
+
+ return generated_image_bgr.astype(np.uint8)
diff --git a/densepose/vis/extractor.py b/densepose/vis/extractor.py
new file mode 100644
index 0000000000000000000000000000000000000000..cdc52a51955750a178521b8ed9442b31dd9f1ebb
--- /dev/null
+++ b/densepose/vis/extractor.py
@@ -0,0 +1,201 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+
+# pyre-unsafe
+import logging
+from typing import List, Optional, Sequence, Tuple
+import torch
+
+from detectron2.layers.nms import batched_nms
+from detectron2.structures.instances import Instances
+
+from densepose.converters import ToChartResultConverterWithConfidences
+from densepose.structures import (
+ DensePoseChartResultWithConfidences,
+ DensePoseEmbeddingPredictorOutput,
+)
+from densepose.vis.bounding_box import BoundingBoxVisualizer, ScoredBoundingBoxVisualizer
+from densepose.vis.densepose_outputs_vertex import DensePoseOutputsVertexVisualizer
+from densepose.vis.densepose_results import DensePoseResultsVisualizer
+
+from .base import CompoundVisualizer
+
+Scores = Sequence[float]
+DensePoseChartResultsWithConfidences = List[DensePoseChartResultWithConfidences]
+
+
+def extract_scores_from_instances(instances: Instances, select=None):
+ if instances.has("scores"):
+ return instances.scores if select is None else instances.scores[select]
+ return None
+
+
+def extract_boxes_xywh_from_instances(instances: Instances, select=None):
+ if instances.has("pred_boxes"):
+ boxes_xywh = instances.pred_boxes.tensor.clone()
+ boxes_xywh[:, 2] -= boxes_xywh[:, 0]
+ boxes_xywh[:, 3] -= boxes_xywh[:, 1]
+ return boxes_xywh if select is None else boxes_xywh[select]
+ return None
+
+
+def create_extractor(visualizer: object):
+ """
+ Create an extractor for the provided visualizer
+ """
+ if isinstance(visualizer, CompoundVisualizer):
+ extractors = [create_extractor(v) for v in visualizer.visualizers]
+ return CompoundExtractor(extractors)
+ elif isinstance(visualizer, DensePoseResultsVisualizer):
+ return DensePoseResultExtractor()
+ elif isinstance(visualizer, ScoredBoundingBoxVisualizer):
+ return CompoundExtractor([extract_boxes_xywh_from_instances, extract_scores_from_instances])
+ elif isinstance(visualizer, BoundingBoxVisualizer):
+ return extract_boxes_xywh_from_instances
+ elif isinstance(visualizer, DensePoseOutputsVertexVisualizer):
+ return DensePoseOutputsExtractor()
+ else:
+ logger = logging.getLogger(__name__)
+ logger.error(f"Could not create extractor for {visualizer}")
+ return None
+
+
+class BoundingBoxExtractor:
+ """
+ Extracts bounding boxes from instances
+ """
+
+ def __call__(self, instances: Instances):
+ boxes_xywh = extract_boxes_xywh_from_instances(instances)
+ return boxes_xywh
+
+
+class ScoredBoundingBoxExtractor:
+ """
+ Extracts bounding boxes from instances
+ """
+
+ def __call__(self, instances: Instances, select=None):
+ scores = extract_scores_from_instances(instances)
+ boxes_xywh = extract_boxes_xywh_from_instances(instances)
+ if (scores is None) or (boxes_xywh is None):
+ return (boxes_xywh, scores)
+ if select is not None:
+ scores = scores[select]
+ boxes_xywh = boxes_xywh[select]
+ return (boxes_xywh, scores)
+
+
+class DensePoseResultExtractor:
+ """
+ Extracts DensePose chart result with confidences from instances
+ """
+
+ def __call__(
+ self, instances: Instances, select=None
+ ) -> Tuple[Optional[DensePoseChartResultsWithConfidences], Optional[torch.Tensor]]:
+ if instances.has("pred_densepose") and instances.has("pred_boxes"):
+ dpout = instances.pred_densepose
+ boxes_xyxy = instances.pred_boxes
+ boxes_xywh = extract_boxes_xywh_from_instances(instances)
+ if select is not None:
+ dpout = dpout[select]
+ boxes_xyxy = boxes_xyxy[select]
+ converter = ToChartResultConverterWithConfidences()
+ results = [converter.convert(dpout[i], boxes_xyxy[[i]]) for i in range(len(dpout))]
+ return results, boxes_xywh
+ else:
+ return None, None
+
+
+class DensePoseOutputsExtractor:
+ """
+ Extracts DensePose result from instances
+ """
+
+ def __call__(
+ self,
+ instances: Instances,
+ select=None,
+ ) -> Tuple[
+ Optional[DensePoseEmbeddingPredictorOutput], Optional[torch.Tensor], Optional[List[int]]
+ ]:
+ if not (instances.has("pred_densepose") and instances.has("pred_boxes")):
+ return None, None, None
+
+ dpout = instances.pred_densepose
+ boxes_xyxy = instances.pred_boxes
+ boxes_xywh = extract_boxes_xywh_from_instances(instances)
+
+ if instances.has("pred_classes"):
+ classes = instances.pred_classes.tolist()
+ else:
+ classes = None
+
+ if select is not None:
+ dpout = dpout[select]
+ boxes_xyxy = boxes_xyxy[select]
+ if classes is not None:
+ classes = classes[select]
+
+ return dpout, boxes_xywh, classes
+
+
+class CompoundExtractor:
+ """
+ Extracts data for CompoundVisualizer
+ """
+
+ def __init__(self, extractors):
+ self.extractors = extractors
+
+ def __call__(self, instances: Instances, select=None):
+ datas = []
+ for extractor in self.extractors:
+ data = extractor(instances, select)
+ datas.append(data)
+ return datas
+
+
+class NmsFilteredExtractor:
+ """
+ Extracts data in the format accepted by NmsFilteredVisualizer
+ """
+
+ def __init__(self, extractor, iou_threshold):
+ self.extractor = extractor
+ self.iou_threshold = iou_threshold
+
+ def __call__(self, instances: Instances, select=None):
+ scores = extract_scores_from_instances(instances)
+ boxes_xywh = extract_boxes_xywh_from_instances(instances)
+ if boxes_xywh is None:
+ return None
+ select_local_idx = batched_nms(
+ boxes_xywh,
+ scores,
+ torch.zeros(len(scores), dtype=torch.int32),
+ iou_threshold=self.iou_threshold,
+ ).squeeze()
+ select_local = torch.zeros(len(boxes_xywh), dtype=torch.bool, device=boxes_xywh.device)
+ select_local[select_local_idx] = True
+ select = select_local if select is None else (select & select_local)
+ return self.extractor(instances, select=select)
+
+
+class ScoreThresholdedExtractor:
+ """
+ Extracts data in the format accepted by ScoreThresholdedVisualizer
+ """
+
+ def __init__(self, extractor, min_score):
+ self.extractor = extractor
+ self.min_score = min_score
+
+ def __call__(self, instances: Instances, select=None):
+ scores = extract_scores_from_instances(instances)
+ if scores is None:
+ return None
+ select_local = scores > self.min_score
+ select = select_local if select is None else (select & select_local)
+ data = self.extractor(instances, select=select)
+ return data
diff --git a/model/DensePose/__pycache__/__init__.cpython-39.pyc b/model/DensePose/__pycache__/__init__.cpython-39.pyc
index e28a4bf3960d96c7a845132b2fba691fa9fb269d..ea0e8015d651fe0abda5d0c48544ee25079c7c24 100644
Binary files a/model/DensePose/__pycache__/__init__.cpython-39.pyc and b/model/DensePose/__pycache__/__init__.cpython-39.pyc differ
diff --git a/model/SCHP/__init__.py b/model/SCHP/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab2f5709a10b48c0f73c7c4d6c176e29fa6ff088
--- /dev/null
+++ b/model/SCHP/__init__.py
@@ -0,0 +1,163 @@
+from model.SCHP import networks
+from model.SCHP.utils.transforms import get_affine_transform, transform_logits
+
+from collections import OrderedDict
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+from torchvision import transforms
+
+def get_palette(num_cls):
+ """ Returns the color map for visualizing the segmentation mask.
+ Args:
+ num_cls: Number of classes
+ Returns:
+ The color map
+ """
+ n = num_cls
+ palette = [0] * (n * 3)
+ for j in range(0, n):
+ lab = j
+ palette[j * 3 + 0] = 0
+ palette[j * 3 + 1] = 0
+ palette[j * 3 + 2] = 0
+ i = 0
+ while lab:
+ palette[j * 3 + 0] |= (((lab >> 0) & 1) << (7 - i))
+ palette[j * 3 + 1] |= (((lab >> 1) & 1) << (7 - i))
+ palette[j * 3 + 2] |= (((lab >> 2) & 1) << (7 - i))
+ i += 1
+ lab >>= 3
+ return palette
+
+dataset_settings = {
+ 'lip': {
+ 'input_size': [473, 473],
+ 'num_classes': 20,
+ 'label': ['Background', 'Hat', 'Hair', 'Glove', 'Sunglasses', 'Upper-clothes', 'Dress', 'Coat',
+ 'Socks', 'Pants', 'Jumpsuits', 'Scarf', 'Skirt', 'Face', 'Left-arm', 'Right-arm',
+ 'Left-leg', 'Right-leg', 'Left-shoe', 'Right-shoe']
+ },
+ 'atr': {
+ 'input_size': [512, 512],
+ 'num_classes': 18,
+ 'label': ['Background', 'Hat', 'Hair', 'Sunglasses', 'Upper-clothes', 'Skirt', 'Pants', 'Dress', 'Belt',
+ 'Left-shoe', 'Right-shoe', 'Face', 'Left-leg', 'Right-leg', 'Left-arm', 'Right-arm', 'Bag', 'Scarf']
+ },
+ 'pascal': {
+ 'input_size': [512, 512],
+ 'num_classes': 7,
+ 'label': ['Background', 'Head', 'Torso', 'Upper Arms', 'Lower Arms', 'Upper Legs', 'Lower Legs'],
+ }
+}
+
+class SCHP:
+ def __init__(self, ckpt_path, device):
+ dataset_type = None
+ if 'lip' in ckpt_path:
+ dataset_type = 'lip'
+ elif 'atr' in ckpt_path:
+ dataset_type = 'atr'
+ elif 'pascal' in ckpt_path:
+ dataset_type = 'pascal'
+ assert dataset_type is not None, 'Dataset type not found in checkpoint path'
+ self.device = device
+ self.num_classes = dataset_settings[dataset_type]['num_classes']
+ self.input_size = dataset_settings[dataset_type]['input_size']
+ self.aspect_ratio = self.input_size[1] * 1.0 / self.input_size[0]
+ self.palette = get_palette(self.num_classes)
+
+ self.label = dataset_settings[dataset_type]['label']
+ self.model = networks.init_model('resnet101', num_classes=self.num_classes, pretrained=None).to(device)
+ self.load_ckpt(ckpt_path)
+ self.model.eval()
+
+ self.transform = transforms.Compose([
+ transforms.ToTensor(),
+ transforms.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229])
+ ])
+ self.upsample = torch.nn.Upsample(size=self.input_size, mode='bilinear', align_corners=True)
+
+
+ def load_ckpt(self, ckpt_path):
+ state_dict = torch.load(ckpt_path, map_location='cpu')['state_dict']
+ new_state_dict = OrderedDict()
+ for k, v in state_dict.items():
+ name = k[7:] # remove `module.`
+ new_state_dict[name] = v
+ self.model.load_state_dict(new_state_dict)
+
+ def _box2cs(self, box):
+ x, y, w, h = box[:4]
+ return self._xywh2cs(x, y, w, h)
+
+ def _xywh2cs(self, x, y, w, h):
+ center = np.zeros((2), dtype=np.float32)
+ center[0] = x + w * 0.5
+ center[1] = y + h * 0.5
+ if w > self.aspect_ratio * h:
+ h = w * 1.0 / self.aspect_ratio
+ elif w < self.aspect_ratio * h:
+ w = h * self.aspect_ratio
+ scale = np.array([w, h], dtype=np.float32)
+ return center, scale
+
+ def preprocess(self, image):
+ if isinstance(image, str):
+ img = cv2.imread(image, cv2.IMREAD_COLOR)
+ elif isinstance(image, Image.Image):
+ # to cv2 format
+ img = np.array(image)
+
+ h, w, _ = img.shape
+ # Get person center and scale
+ person_center, s = self._box2cs([0, 0, w - 1, h - 1])
+ r = 0
+ trans = get_affine_transform(person_center, s, r, self.input_size)
+ input = cv2.warpAffine(
+ img,
+ trans,
+ (int(self.input_size[1]), int(self.input_size[0])),
+ flags=cv2.INTER_LINEAR,
+ borderMode=cv2.BORDER_CONSTANT,
+ borderValue=(0, 0, 0))
+
+ input = self.transform(input).to(self.device).unsqueeze(0)
+ meta = {
+ 'center': person_center,
+ 'height': h,
+ 'width': w,
+ 'scale': s,
+ 'rotation': r
+ }
+ return input, meta
+
+
+ def __call__(self, image_or_path):
+ if isinstance(image_or_path, list):
+ image_list = []
+ meta_list = []
+ for image in image_or_path:
+ image, meta = self.preprocess(image)
+ image_list.append(image)
+ meta_list.append(meta)
+ image = torch.cat(image_list, dim=0)
+ else:
+ image, meta = self.preprocess(image_or_path)
+ meta_list = [meta]
+
+ output = self.model(image)
+ upsample_outputs = self.upsample(output[0][-1])
+ upsample_outputs = upsample_outputs.permute(0, 2, 3, 1) # BCHW -> BHWC
+
+ output_img_list = []
+ for upsample_output, meta in zip(upsample_outputs, meta_list):
+ c, s, w, h = meta['center'], meta['scale'], meta['width'], meta['height']
+ logits_result = transform_logits(upsample_output.data.cpu().numpy(), c, s, w, h, input_size=self.input_size)
+ parsing_result = np.argmax(logits_result, axis=2)
+ output_img = Image.fromarray(np.asarray(parsing_result, dtype=np.uint8))
+ output_img.putpalette(self.palette)
+ output_img_list.append(output_img)
+
+ return output_img_list[0] if len(output_img_list) == 1 else output_img_list
\ No newline at end of file
diff --git a/model/SCHP/__pycache__/__init__.cpython-39.pyc b/model/SCHP/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7ec10278c747231147fa4dfdeda08c9d38446cc4
Binary files /dev/null and b/model/SCHP/__pycache__/__init__.cpython-39.pyc differ
diff --git a/model/SCHP/networks/AugmentCE2P.py b/model/SCHP/networks/AugmentCE2P.py
new file mode 100644
index 0000000000000000000000000000000000000000..246a87ebca53c7ed089f4288ca6d91ba9ded7f32
--- /dev/null
+++ b/model/SCHP/networks/AugmentCE2P.py
@@ -0,0 +1,337 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8 -*-
+
+"""
+@Author : Peike Li
+@Contact : peike.li@yahoo.com
+@File : AugmentCE2P.py
+@Time : 8/4/19 3:35 PM
+@Desc :
+@License : This source code is licensed under the license found in the
+ LICENSE file in the root directory of this source tree.
+"""
+
+import functools
+
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+# Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn
+# By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer
+from inplace_abn import InPlaceABNSync
+
+BatchNorm2d = functools.partial(InPlaceABNSync, activation='identity')
+
+affine_par = True
+
+pretrained_settings = {
+ 'resnet101': {
+ 'imagenet': {
+ 'input_space': 'BGR',
+ 'input_size': [3, 224, 224],
+ 'input_range': [0, 1],
+ 'mean': [0.406, 0.456, 0.485],
+ 'std': [0.225, 0.224, 0.229],
+ 'num_classes': 1000
+ }
+ },
+}
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+ "3x3 convolution with padding"
+ return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+ padding=1, bias=False)
+
+
+class Bottleneck(nn.Module):
+ expansion = 4
+
+ def __init__(self, inplanes, planes, stride=1, dilation=1, downsample=None, fist_dilation=1, multi_grid=1):
+ super(Bottleneck, self).__init__()
+ self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+ self.bn1 = BatchNorm2d(planes)
+ self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+ padding=dilation * multi_grid, dilation=dilation * multi_grid, bias=False)
+ self.bn2 = BatchNorm2d(planes)
+ self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
+ self.bn3 = BatchNorm2d(planes * 4)
+ self.relu = nn.ReLU(inplace=False)
+ self.relu_inplace = nn.ReLU(inplace=True)
+ self.downsample = downsample
+ self.dilation = dilation
+ self.stride = stride
+
+ def forward(self, x):
+ residual = x
+
+ out = self.conv1(x)
+ out = self.bn1(out)
+ out = self.relu(out)
+
+ out = self.conv2(out)
+ out = self.bn2(out)
+ out = self.relu(out)
+
+ out = self.conv3(out)
+ out = self.bn3(out)
+
+ if self.downsample is not None:
+ residual = self.downsample(x)
+
+ out = out + residual
+ out = self.relu_inplace(out)
+
+ return out
+
+
+class PSPModule(nn.Module):
+ """
+ Reference:
+ Zhao, Hengshuang, et al. *"Pyramid scene parsing network."*
+ """
+
+ def __init__(self, features, out_features=512, sizes=(1, 2, 3, 6)):
+ super(PSPModule, self).__init__()
+
+ self.stages = []
+ self.stages = nn.ModuleList([self._make_stage(features, out_features, size) for size in sizes])
+ self.bottleneck = nn.Sequential(
+ nn.Conv2d(features + len(sizes) * out_features, out_features, kernel_size=3, padding=1, dilation=1,
+ bias=False),
+ InPlaceABNSync(out_features),
+ )
+
+ def _make_stage(self, features, out_features, size):
+ prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
+ conv = nn.Conv2d(features, out_features, kernel_size=1, bias=False)
+ bn = InPlaceABNSync(out_features)
+ return nn.Sequential(prior, conv, bn)
+
+ def forward(self, feats):
+ h, w = feats.size(2), feats.size(3)
+ priors = [F.interpolate(input=stage(feats), size=(h, w), mode='bilinear', align_corners=True) for stage in
+ self.stages] + [feats]
+ bottle = self.bottleneck(torch.cat(priors, 1))
+ return bottle
+
+
+class ASPPModule(nn.Module):
+ """
+ Reference:
+ Chen, Liang-Chieh, et al. *"Rethinking Atrous Convolution for Semantic Image Segmentation."*
+ """
+
+ def __init__(self, features, inner_features=256, out_features=512, dilations=(12, 24, 36)):
+ super(ASPPModule, self).__init__()
+
+ self.conv1 = nn.Sequential(nn.AdaptiveAvgPool2d((1, 1)),
+ nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1,
+ bias=False),
+ InPlaceABNSync(inner_features))
+ self.conv2 = nn.Sequential(
+ nn.Conv2d(features, inner_features, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(inner_features))
+ self.conv3 = nn.Sequential(
+ nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[0], dilation=dilations[0], bias=False),
+ InPlaceABNSync(inner_features))
+ self.conv4 = nn.Sequential(
+ nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[1], dilation=dilations[1], bias=False),
+ InPlaceABNSync(inner_features))
+ self.conv5 = nn.Sequential(
+ nn.Conv2d(features, inner_features, kernel_size=3, padding=dilations[2], dilation=dilations[2], bias=False),
+ InPlaceABNSync(inner_features))
+
+ self.bottleneck = nn.Sequential(
+ nn.Conv2d(inner_features * 5, out_features, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(out_features),
+ nn.Dropout2d(0.1)
+ )
+
+ def forward(self, x):
+ _, _, h, w = x.size()
+
+ feat1 = F.interpolate(self.conv1(x), size=(h, w), mode='bilinear', align_corners=True)
+
+ feat2 = self.conv2(x)
+ feat3 = self.conv3(x)
+ feat4 = self.conv4(x)
+ feat5 = self.conv5(x)
+ out = torch.cat((feat1, feat2, feat3, feat4, feat5), 1)
+
+ bottle = self.bottleneck(out)
+ return bottle
+
+
+class Edge_Module(nn.Module):
+ """
+ Edge Learning Branch
+ """
+
+ def __init__(self, in_fea=[256, 512, 1024], mid_fea=256, out_fea=2):
+ super(Edge_Module, self).__init__()
+
+ self.conv1 = nn.Sequential(
+ nn.Conv2d(in_fea[0], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(mid_fea)
+ )
+ self.conv2 = nn.Sequential(
+ nn.Conv2d(in_fea[1], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(mid_fea)
+ )
+ self.conv3 = nn.Sequential(
+ nn.Conv2d(in_fea[2], mid_fea, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(mid_fea)
+ )
+ self.conv4 = nn.Conv2d(mid_fea, out_fea, kernel_size=3, padding=1, dilation=1, bias=True)
+ self.conv5 = nn.Conv2d(out_fea * 3, out_fea, kernel_size=1, padding=0, dilation=1, bias=True)
+
+ def forward(self, x1, x2, x3):
+ _, _, h, w = x1.size()
+
+ edge1_fea = self.conv1(x1)
+ edge1 = self.conv4(edge1_fea)
+ edge2_fea = self.conv2(x2)
+ edge2 = self.conv4(edge2_fea)
+ edge3_fea = self.conv3(x3)
+ edge3 = self.conv4(edge3_fea)
+
+ edge2_fea = F.interpolate(edge2_fea, size=(h, w), mode='bilinear', align_corners=True)
+ edge3_fea = F.interpolate(edge3_fea, size=(h, w), mode='bilinear', align_corners=True)
+ edge2 = F.interpolate(edge2, size=(h, w), mode='bilinear', align_corners=True)
+ edge3 = F.interpolate(edge3, size=(h, w), mode='bilinear', align_corners=True)
+
+ edge = torch.cat([edge1, edge2, edge3], dim=1)
+ edge_fea = torch.cat([edge1_fea, edge2_fea, edge3_fea], dim=1)
+ edge = self.conv5(edge)
+
+ return edge, edge_fea
+
+
+class Decoder_Module(nn.Module):
+ """
+ Parsing Branch Decoder Module.
+ """
+
+ def __init__(self, num_classes):
+ super(Decoder_Module, self).__init__()
+ self.conv1 = nn.Sequential(
+ nn.Conv2d(512, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(256)
+ )
+ self.conv2 = nn.Sequential(
+ nn.Conv2d(256, 48, kernel_size=1, stride=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(48)
+ )
+ self.conv3 = nn.Sequential(
+ nn.Conv2d(304, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(256),
+ nn.Conv2d(256, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(256)
+ )
+
+ self.conv4 = nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
+
+ def forward(self, xt, xl):
+ _, _, h, w = xl.size()
+ xt = F.interpolate(self.conv1(xt), size=(h, w), mode='bilinear', align_corners=True)
+ xl = self.conv2(xl)
+ x = torch.cat([xt, xl], dim=1)
+ x = self.conv3(x)
+ seg = self.conv4(x)
+ return seg, x
+
+
+class ResNet(nn.Module):
+ def __init__(self, block, layers, num_classes):
+ self.inplanes = 128
+ super(ResNet, self).__init__()
+ self.conv1 = conv3x3(3, 64, stride=2)
+ self.bn1 = BatchNorm2d(64)
+ self.relu1 = nn.ReLU(inplace=False)
+ self.conv2 = conv3x3(64, 64)
+ self.bn2 = BatchNorm2d(64)
+ self.relu2 = nn.ReLU(inplace=False)
+ self.conv3 = conv3x3(64, 128)
+ self.bn3 = BatchNorm2d(128)
+ self.relu3 = nn.ReLU(inplace=False)
+
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+
+ self.layer1 = self._make_layer(block, 64, layers[0])
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2, multi_grid=(1, 1, 1))
+
+ self.context_encoding = PSPModule(2048, 512)
+
+ self.edge = Edge_Module()
+ self.decoder = Decoder_Module(num_classes)
+
+ self.fushion = nn.Sequential(
+ nn.Conv2d(1024, 256, kernel_size=1, padding=0, dilation=1, bias=False),
+ InPlaceABNSync(256),
+ nn.Dropout2d(0.1),
+ nn.Conv2d(256, num_classes, kernel_size=1, padding=0, dilation=1, bias=True)
+ )
+
+ def _make_layer(self, block, planes, blocks, stride=1, dilation=1, multi_grid=1):
+ downsample = None
+ if stride != 1 or self.inplanes != planes * block.expansion:
+ downsample = nn.Sequential(
+ nn.Conv2d(self.inplanes, planes * block.expansion,
+ kernel_size=1, stride=stride, bias=False),
+ BatchNorm2d(planes * block.expansion, affine=affine_par))
+
+ layers = []
+ generate_multi_grid = lambda index, grids: grids[index % len(grids)] if isinstance(grids, tuple) else 1
+ layers.append(block(self.inplanes, planes, stride, dilation=dilation, downsample=downsample,
+ multi_grid=generate_multi_grid(0, multi_grid)))
+ self.inplanes = planes * block.expansion
+ for i in range(1, blocks):
+ layers.append(
+ block(self.inplanes, planes, dilation=dilation, multi_grid=generate_multi_grid(i, multi_grid)))
+
+ return nn.Sequential(*layers)
+
+ def forward(self, x):
+ x = self.relu1(self.bn1(self.conv1(x)))
+ x = self.relu2(self.bn2(self.conv2(x)))
+ x = self.relu3(self.bn3(self.conv3(x)))
+ x = self.maxpool(x)
+ x2 = self.layer1(x)
+ x3 = self.layer2(x2)
+ x4 = self.layer3(x3)
+ x5 = self.layer4(x4)
+ x = self.context_encoding(x5)
+ parsing_result, parsing_fea = self.decoder(x, x2)
+ # Edge Branch
+ edge_result, edge_fea = self.edge(x2, x3, x4)
+ # Fusion Branch
+ x = torch.cat([parsing_fea, edge_fea], dim=1)
+ fusion_result = self.fushion(x)
+ return [[parsing_result, fusion_result], [edge_result]]
+
+
+def initialize_pretrained_model(model, settings, pretrained='./models/resnet101-imagenet.pth'):
+ model.input_space = settings['input_space']
+ model.input_size = settings['input_size']
+ model.input_range = settings['input_range']
+ model.mean = settings['mean']
+ model.std = settings['std']
+
+ if pretrained is not None:
+ saved_state_dict = torch.load(pretrained)
+ new_params = model.state_dict().copy()
+ for i in saved_state_dict:
+ i_parts = i.split('.')
+ if not i_parts[0] == 'fc':
+ new_params['.'.join(i_parts[0:])] = saved_state_dict[i]
+ model.load_state_dict(new_params)
+
+
+def resnet101(num_classes=20, pretrained='./models/resnet101-imagenet.pth'):
+ model = ResNet(Bottleneck, [3, 4, 23, 3], num_classes)
+ settings = pretrained_settings['resnet101']['imagenet']
+ initialize_pretrained_model(model, settings, pretrained)
+ return model
diff --git a/model/SCHP/networks/__init__.py b/model/SCHP/networks/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..d037294541626d38b3ef521b0690bfd4a36e864f
--- /dev/null
+++ b/model/SCHP/networks/__init__.py
@@ -0,0 +1,13 @@
+from __future__ import absolute_import
+
+from model.SCHP.networks.AugmentCE2P import resnet101
+
+__factory = {
+ 'resnet101': resnet101,
+}
+
+
+def init_model(name, *args, **kwargs):
+ if name not in __factory.keys():
+ raise KeyError("Unknown model arch: {}".format(name))
+ return __factory[name](*args, **kwargs)
\ No newline at end of file
diff --git a/model/SCHP/networks/__pycache__/AugmentCE2P.cpython-39.pyc b/model/SCHP/networks/__pycache__/AugmentCE2P.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3d9f79122a57789ab8464a90bf3d7e97eee3e47
Binary files /dev/null and b/model/SCHP/networks/__pycache__/AugmentCE2P.cpython-39.pyc differ
diff --git a/model/SCHP/networks/__pycache__/__init__.cpython-39.pyc b/model/SCHP/networks/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91208568875f4ae9c46b5509b6e0f28f2853e26a
Binary files /dev/null and b/model/SCHP/networks/__pycache__/__init__.cpython-39.pyc differ
diff --git a/model/SCHP/utils/__pycache__/transforms.cpython-39.pyc b/model/SCHP/utils/__pycache__/transforms.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6d75c45636b662f0f51be89dbd80233e882a7d98
Binary files /dev/null and b/model/SCHP/utils/__pycache__/transforms.cpython-39.pyc differ
diff --git a/model/SCHP/utils/transforms.py b/model/SCHP/utils/transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..1442a728938ca19fcb4ac21ae6588266df45631c
--- /dev/null
+++ b/model/SCHP/utils/transforms.py
@@ -0,0 +1,167 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import cv2
+import torch
+
+class BRG2Tensor_transform(object):
+ def __call__(self, pic):
+ img = torch.from_numpy(pic.transpose((2, 0, 1)))
+ if isinstance(img, torch.ByteTensor):
+ return img.float()
+ else:
+ return img
+
+class BGR2RGB_transform(object):
+ def __call__(self, tensor):
+ return tensor[[2,1,0],:,:]
+
+def flip_back(output_flipped, matched_parts):
+ '''
+ ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
+ '''
+ assert output_flipped.ndim == 4,\
+ 'output_flipped should be [batch_size, num_joints, height, width]'
+
+ output_flipped = output_flipped[:, :, :, ::-1]
+
+ for pair in matched_parts:
+ tmp = output_flipped[:, pair[0], :, :].copy()
+ output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
+ output_flipped[:, pair[1], :, :] = tmp
+
+ return output_flipped
+
+
+def fliplr_joints(joints, joints_vis, width, matched_parts):
+ """
+ flip coords
+ """
+ # Flip horizontal
+ joints[:, 0] = width - joints[:, 0] - 1
+
+ # Change left-right parts
+ for pair in matched_parts:
+ joints[pair[0], :], joints[pair[1], :] = \
+ joints[pair[1], :], joints[pair[0], :].copy()
+ joints_vis[pair[0], :], joints_vis[pair[1], :] = \
+ joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
+
+ return joints*joints_vis, joints_vis
+
+
+def transform_preds(coords, center, scale, input_size):
+ target_coords = np.zeros(coords.shape)
+ trans = get_affine_transform(center, scale, 0, input_size, inv=1)
+ for p in range(coords.shape[0]):
+ target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
+ return target_coords
+
+def transform_parsing(pred, center, scale, width, height, input_size):
+
+ trans = get_affine_transform(center, scale, 0, input_size, inv=1)
+ target_pred = cv2.warpAffine(
+ pred,
+ trans,
+ (int(width), int(height)), #(int(width), int(height)),
+ flags=cv2.INTER_NEAREST,
+ borderMode=cv2.BORDER_CONSTANT,
+ borderValue=(0))
+
+ return target_pred
+
+def transform_logits(logits, center, scale, width, height, input_size):
+
+ trans = get_affine_transform(center, scale, 0, input_size, inv=1)
+ channel = logits.shape[2]
+ target_logits = []
+ for i in range(channel):
+ target_logit = cv2.warpAffine(
+ logits[:,:,i],
+ trans,
+ (int(width), int(height)), #(int(width), int(height)),
+ flags=cv2.INTER_LINEAR,
+ borderMode=cv2.BORDER_CONSTANT,
+ borderValue=(0))
+ target_logits.append(target_logit)
+ target_logits = np.stack(target_logits,axis=2)
+
+ return target_logits
+
+
+def get_affine_transform(center,
+ scale,
+ rot,
+ output_size,
+ shift=np.array([0, 0], dtype=np.float32),
+ inv=0):
+ if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
+ print(scale)
+ scale = np.array([scale, scale])
+
+ scale_tmp = scale
+
+ src_w = scale_tmp[0]
+ dst_w = output_size[1]
+ dst_h = output_size[0]
+
+ rot_rad = np.pi * rot / 180
+ src_dir = get_dir([0, src_w * -0.5], rot_rad)
+ dst_dir = np.array([0, (dst_w-1) * -0.5], np.float32)
+
+ src = np.zeros((3, 2), dtype=np.float32)
+ dst = np.zeros((3, 2), dtype=np.float32)
+ src[0, :] = center + scale_tmp * shift
+ src[1, :] = center + src_dir + scale_tmp * shift
+ dst[0, :] = [(dst_w-1) * 0.5, (dst_h-1) * 0.5]
+ dst[1, :] = np.array([(dst_w-1) * 0.5, (dst_h-1) * 0.5]) + dst_dir
+
+ src[2:, :] = get_3rd_point(src[0, :], src[1, :])
+ dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
+
+ if inv:
+ trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+ else:
+ trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+ return trans
+
+
+def affine_transform(pt, t):
+ new_pt = np.array([pt[0], pt[1], 1.]).T
+ new_pt = np.dot(t, new_pt)
+ return new_pt[:2]
+
+
+def get_3rd_point(a, b):
+ direct = a - b
+ return b + np.array([-direct[1], direct[0]], dtype=np.float32)
+
+
+def get_dir(src_point, rot_rad):
+ sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+
+ src_result = [0, 0]
+ src_result[0] = src_point[0] * cs - src_point[1] * sn
+ src_result[1] = src_point[0] * sn + src_point[1] * cs
+
+ return src_result
+
+
+def crop(img, center, scale, output_size, rot=0):
+ trans = get_affine_transform(center, scale, rot, output_size)
+
+ dst_img = cv2.warpAffine(img,
+ trans,
+ (int(output_size[1]), int(output_size[0])),
+ flags=cv2.INTER_LINEAR)
+
+ return dst_img
diff --git a/model/__pycache__/attn_processor.cpython-39.pyc b/model/__pycache__/attn_processor.cpython-39.pyc
index a521d17a1e187759ba4e60c5cd5f20d560e21d04..196e72f2acdbcd585a77a2eb0b3de35a831db501 100644
Binary files a/model/__pycache__/attn_processor.cpython-39.pyc and b/model/__pycache__/attn_processor.cpython-39.pyc differ
diff --git a/model/__pycache__/cloth_masker.cpython-39.pyc b/model/__pycache__/cloth_masker.cpython-39.pyc
index ec20a4ab103e71f2571cf50ef4dffc6d26b4b932..4d7eaf357437890e88449578283c341cf27dd8ba 100644
Binary files a/model/__pycache__/cloth_masker.cpython-39.pyc and b/model/__pycache__/cloth_masker.cpython-39.pyc differ
diff --git a/model/__pycache__/pipeline.cpython-39.pyc b/model/__pycache__/pipeline.cpython-39.pyc
index 0e7b8e10ffe9c935375999604205b5e0851d5789..167acbffcdd861af178c127e2be0d7e9e709d6f1 100644
Binary files a/model/__pycache__/pipeline.cpython-39.pyc and b/model/__pycache__/pipeline.cpython-39.pyc differ
diff --git a/model/__pycache__/utils.cpython-39.pyc b/model/__pycache__/utils.cpython-39.pyc
index 93edc07c5160a1afa59318d3d4fdb640cfb8f3e4..5c79849a20c76952b54e2aac3c0470151e4cc59d 100644
Binary files a/model/__pycache__/utils.cpython-39.pyc and b/model/__pycache__/utils.cpython-39.pyc differ
diff --git a/model/cloth_masker.py b/model/cloth_masker.py
index a829bcbb5a1b08e35467c393575e805bdca1c8e7..098793109a41f0dc18bd38ed8f8b9f4efa63234e 100644
--- a/model/cloth_masker.py
+++ b/model/cloth_masker.py
@@ -6,8 +6,8 @@ import cv2
from diffusers.image_processor import VaeImageProcessor
import torch
-from model.DensePose import DensePose
-from model.segformer_b2 import Segformer # type: ignore
+from model.SCHP import SCHP # type: ignore
+from model.DensePose import DensePose # type: ignore
DENSE_INDEX_MAP = {
"background": [0],
@@ -152,37 +152,43 @@ def hull_mask(mask_area: np.ndarray):
return hull_mask
-class AutoMaskerSeg:
+class AutoMasker:
def __init__(
self,
densepose_ckpt='./Models/DensePose',
- segformer_ckpt='./Models/segformer_b3_clothes',
+ schp_ckpt='./Models/SCHP',
device='cuda'):
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)
self.densepose_processor = DensePose(densepose_ckpt, device)
- self.segformer_processor = Segformer(segformer_ckpt, device)
+ self.schp_processor_atr = SCHP(ckpt_path=os.path.join(schp_ckpt, 'exp-schp-201908301523-atr.pth'), device=device)
+ self.schp_processor_lip = SCHP(ckpt_path=os.path.join(schp_ckpt, 'exp-schp-201908261155-lip.pth'), device=device)
self.mask_processor = VaeImageProcessor(vae_scale_factor=8, do_normalize=False, do_binarize=True, do_convert_grayscale=True)
def process_densepose(self, image_or_path):
return self.densepose_processor(image_or_path, resize=1024)
- def process_atr(self, image_or_path):
- return self.segformer_processor(image_or_path)
+ def process_schp_lip(self, image_or_path):
+ return self.schp_processor_lip(image_or_path)
+
+ def process_schp_atr(self, image_or_path):
+ return self.schp_processor_atr(image_or_path)
def preprocess_image(self, image_or_path):
return {
'densepose': self.densepose_processor(image_or_path, resize=1024),
- 'atr': self.process_atr(image_or_path),
+ 'schp_atr': self.schp_processor_atr(image_or_path),
+ 'schp_lip': self.schp_processor_lip(image_or_path)
}
@staticmethod
def cloth_agnostic_mask(
densepose_mask: Image.Image,
- atr_mask: Image.Image,
+ schp_lip_mask: Image.Image,
+ schp_atr_mask: Image.Image,
part: str='overall',
**kwargs
):
@@ -197,30 +203,33 @@ class AutoMaskerSeg:
kernal_size = kernal_size if kernal_size % 2 == 1 else kernal_size + 1
densepose_mask = np.array(densepose_mask)
- # schp_lip_mask = np.array(schp_lip_mask)
- atr_mask = np.array(atr_mask)
+ schp_lip_mask = np.array(schp_lip_mask)
+ schp_atr_mask = np.array(schp_atr_mask)
# Strong Protect Area (Hands, Face, Accessory, Feet)
hands_protect_area = part_mask_of(['hands', 'feet'], densepose_mask, DENSE_INDEX_MAP)
hands_protect_area = cv2.dilate(hands_protect_area, dilate_kernel, iterations=1)
- hands_protect_area = hands_protect_area & (part_mask_of(['Left-arm', 'Right-arm', 'Left-leg', 'Right-leg'], atr_mask, ATR_MAPPING))
- # | part_mask_of(['Left-arm', 'Right-arm', 'Left-leg', 'Right-leg'], schp_lip_mask, LIP_MAPPING))
- face_protect_area = part_mask_of('face', densepose_mask, DENSE_INDEX_MAP) & part_mask_of('Face', atr_mask, ATR_MAPPING)
+ hands_protect_area = hands_protect_area & \
+ (part_mask_of(['Left-arm', 'Right-arm', 'Left-leg', 'Right-leg'], schp_atr_mask, ATR_MAPPING) | \
+ part_mask_of(['Left-arm', 'Right-arm', 'Left-leg', 'Right-leg'], schp_lip_mask, LIP_MAPPING))
+ face_protect_area = part_mask_of('Face', schp_lip_mask, LIP_MAPPING)
strong_protect_area = hands_protect_area | face_protect_area
# Weak Protect Area (Hair, Irrelevant Clothes, Body Parts)
- body_protect_area = part_mask_of(PROTECT_BODY_PARTS[part], atr_mask, ATR_MAPPING) # part_mask_of(PROTECT_BODY_PARTS[part], schp_lip_mask, LIP_MAPPING) |
- hair_protect_area = part_mask_of(['Hair'], atr_mask, ATR_MAPPING)#part_mask_of(['Hair'], schp_lip_mask, LIP_MAPPING) | \
-
- cloth_protect_area = part_mask_of(PROTECT_CLOTH_PARTS[part]['ATR'], atr_mask, ATR_MAPPING) #part_mask_of(PROTECT_CLOTH_PARTS[part]['LIP'], schp_lip_mask, LIP_MAPPING) | \
-
- accessory_protect_area = part_mask_of((accessory_parts := ['Hat', 'Glove', 'Sunglasses', 'Bag', 'Left-shoe', 'Right-shoe', 'Scarf', 'Socks']), atr_mask, ATR_MAPPING)
+ body_protect_area = part_mask_of(PROTECT_BODY_PARTS[part], schp_lip_mask, LIP_MAPPING) | part_mask_of(PROTECT_BODY_PARTS[part], schp_atr_mask, ATR_MAPPING)
+ hair_protect_area = part_mask_of(['Hair'], schp_lip_mask, LIP_MAPPING) | \
+ part_mask_of(['Hair'], schp_atr_mask, ATR_MAPPING)
+ cloth_protect_area = part_mask_of(PROTECT_CLOTH_PARTS[part]['LIP'], schp_lip_mask, LIP_MAPPING) | \
+ part_mask_of(PROTECT_CLOTH_PARTS[part]['ATR'], schp_atr_mask, ATR_MAPPING)
+ accessory_protect_area = part_mask_of((accessory_parts := ['Hat', 'Glove', 'Sunglasses', 'Bag', 'Left-shoe', 'Right-shoe', 'Scarf', 'Socks']), schp_lip_mask, LIP_MAPPING) | \
+ part_mask_of(accessory_parts, schp_atr_mask, ATR_MAPPING)
weak_protect_area = body_protect_area | cloth_protect_area | hair_protect_area | strong_protect_area | accessory_protect_area
# Mask Area
- strong_mask_area = part_mask_of(MASK_CLOTH_PARTS[part], atr_mask, ATR_MAPPING)
- background_area = part_mask_of(['Background'], atr_mask, ATR_MAPPING)
+ strong_mask_area = part_mask_of(MASK_CLOTH_PARTS[part], schp_lip_mask, LIP_MAPPING) | \
+ part_mask_of(MASK_CLOTH_PARTS[part], schp_atr_mask, ATR_MAPPING)
+ background_area = part_mask_of(['Background'], schp_lip_mask, LIP_MAPPING) & part_mask_of(['Background'], schp_atr_mask, ATR_MAPPING)
mask_dense_area = part_mask_of(MASK_DENSE_PARTS[part], densepose_mask, DENSE_INDEX_MAP)
mask_dense_area = cv2.resize(mask_dense_area.astype(np.uint8), None, fx=0.25, fy=0.25, interpolation=cv2.INTER_NEAREST)
mask_dense_area = cv2.dilate(mask_dense_area, dilate_kernel, iterations=2)
@@ -248,17 +257,15 @@ class AutoMaskerSeg:
preprocess_results = self.preprocess_image(image)
mask = self.cloth_agnostic_mask(
preprocess_results['densepose'],
- preprocess_results['atr'],
- # preprocess_results['schp_lip'],
- # preprocess_results['schp_atr'],
+ preprocess_results['schp_lip'],
+ preprocess_results['schp_atr'],
part=mask_type,
)
return {
'mask': mask,
'densepose': preprocess_results['densepose'],
- 'atr': preprocess_results['atr'],
- # 'schp_lip': preprocess_results['schp_lip'],
- # 'schp_atr': preprocess_results['schp_atr']
+ 'schp_lip': preprocess_results['schp_lip'],
+ 'schp_atr': preprocess_results['schp_atr']
}
diff --git a/model/segformer_b2.py b/model/segformer_b2.py
deleted file mode 100644
index 8ae130f984042242f529e77bb55fa9de49f0a13e..0000000000000000000000000000000000000000
--- a/model/segformer_b2.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from transformers import SegformerImageProcessor, AutoModelForSemanticSegmentation
-from PIL import Image
-import requests
-import matplotlib.pyplot as plt
-import torch.nn as nn
-
-
-FASHION_MAP = {
- "0":"Everything Else", "1": "shirt, blouse", "2": "top, t-shirt, sweatshirt",
- "3": "sweater", "4": "cardigan", "5": "jacket", "6": "vest", "7": "pants",
- "8": "shorts", "9": "skirt", "10": "coat", "11": "dress", "12": "jumpsuit",
- "13": "cape", "14": "glasses", "15": "hat", "16": "headband, head covering, hair accessory",
- "17": "tie", "18": "glove", "19": "watch", "20": "belt", "21": "leg warmer",
- "22": "tights, stockings", "23": "sock", "24": "shoe", "25": "bag, wallet",
- "26": "scarf", "27": "umbrella", "28": "hood", "29": "collar", "30": "lapel",
- "31": "epaulette", "32": "sleeve", "33": "pocket", "34": "neckline", "35": "buckle",
- "36": "zipper", "37": "applique", "38": "bead", "39": "bow", "40": "flower", "41": "fringe",
- "42": "ribbon", "43": "rivet", "44": "ruffle", "45": "sequin", "46": "tassel"
-}
-
-
-HUMAN_MAP = {
- "0":"Background","1":"shirt, blouse","2":"top, t-shirt, sweatshirt","3":"sweater",
- "4":"cardigan","5":"jacket","6":"vest","7":"pants","8":"shorts","9":"skirt",
- "10":"coat","11":"dress","12":"jumpsuit","13":"cape","14":"glasses","15":"hat",
- "16":"headband, head covering, hair accessory","17":"tie","18":"glove","19":"watch",
- "20":"belt","21":"leg warmer","22":"tights, stockings","23":"sock","24":"shoe",
- "25":"bag, wallet","26":"scarf","27":"umbrella","28":"hood","29":"collar","30":"lapel",
- "31":"epaulette","32":"sleeve","33":"pocket","34":"neckline","35":"buckle","36":"zipper",
- "37":"applique","38":"bead","39":"bow","40":"flower","41":"fringe","42":"ribbon",
- "43":"rivet","44":"ruffle","45":"sequin","46":"tassel","47":"Hair","48":"Sunglasses",
- "49":"Upper-clothes","50":"Left-shoe","51":"Right-shoe","52":"Face","53":"Left-leg",
- "54":"Right-leg","55":"Left-arm","56":"Right-arm"
-}
-
-
-
-class Segformer:
- def __init__(self, model_name, device='cuda'):
- self.device = device
- self.processor = SegformerImageProcessor.from_pretrained(model_name)
- self.model = AutoModelForSemanticSegmentation.from_pretrained(model_name).to(device)
-
-
- def predict(self, image: Image):
- inputs = self.processor(images=image, return_tensors="pt").to(self.device)
- outputs = self.model(**inputs)
- logits = outputs.logits.cpu()
-
- upsampled_logits = nn.functional.interpolate(
- logits,
- size=image.size[::-1],
- mode="bilinear",
- align_corners=False,
- )
- pred_seg = upsampled_logits.argmax(dim=1)[0]
- # to PIL image
- pred_seg = Image.fromarray(pred_seg.byte().cpu().numpy())
- return pred_seg
-
- def __call__(self, image: Image):
- return self.predict(image)
-
-
-
diff --git a/playground.py b/playground.py
deleted file mode 100644
index f2d936db364c945e11ab77e98632ed027149d4ce..0000000000000000000000000000000000000000
--- a/playground.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from model.segformer_b2 import Segformer
-from PIL import Image
-from model.cloth_masker import AutoMaskerSeg
-# model = Segformer("/home/chongzheng_p23/data/Projects/CatVTON-main/Models/segformer_b3_clothes")
-image = Image.open("/home/chongzheng_p23/data/Projects/CatVTON-main/resource/demo/example/person/women/1-model_3.png")
-# result = model(image)
-# result.save("a.png")
-
-masker = AutoMaskerSeg(
- densepose_ckpt="/home/chongzheng_p23/data/Projects/CatVTON-main/Models/densepose",
- segformer_ckpt="/home/chongzheng_p23/data/Projects/CatVTON-main/Models/segformer_b3_clothes")
-
-
-
-result = masker(image)['mask']
-result.save("b.png")
-
diff --git a/requirements.txt b/requirements.txt
index 52aa6e60095703733af7d219da99ae96e5174bad..090a050ec33f478ac3ad0bc9af0a18166d696fba 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,6 +13,10 @@ setuptools==51.0.0
scikit-image==0.24.0
tqdm==4.66.4
transformers==4.27.3
-xformers==0.0.23.post1
-Ninja==1.11.1.1
-git+https://github.com/facebookresearch/detectron2@main#subdirectory=projects/DensePose
\ No newline at end of file
+fvcore==0.1.5.post20221221
+cloudpickle==3.0.0
+omegaconf==2.3.0
+pycocotools==2.0.8
+av==12.3.0
+inplace-abn==1.1.0
+gradio==4.41.0
\ No newline at end of file