climateGAN / climategan /eval_metrics.py
vict0rsch's picture
initial commit from `vict0rsch/climateGAN`
ce190ee
import cv2
import numpy as np
import torch
from skimage import filters
from sklearn.metrics.pairwise import euclidean_distances
import matplotlib.pyplot as plt
import seaborn as sns
from copy import deepcopy
# ------------------------------------------------------------------------------
# ----- Evaluation metrics for a pair of binary mask images (pred, target) -----
# ------------------------------------------------------------------------------
def get_accuracy(arr1, arr2):
"""pixel accuracy
Args:
arr1 (np.array)
arr2 (np.array)
"""
return (arr1 == arr2).sum() / arr1.size
def trimap(pred_im, gt_im, thickness=8):
"""Compute accuracy in a region of thickness around the contours
for binary images (0-1 values)
Args:
pred_im (Image): Prediction
gt_im (Image): Target
thickness (int, optional): [description]. Defaults to 8.
"""
W, H = gt_im.size
contours, hierarchy = cv2.findContours(
np.array(gt_im), mode=cv2.RETR_TREE, method=cv2.CHAIN_APPROX_SIMPLE
)
mask_contour = np.zeros((H, W), dtype=np.int32)
cv2.drawContours(
mask_contour, contours, -1, (1), thickness=thickness, hierarchy=hierarchy
)
gt_contour = np.array(gt_im)[np.where(mask_contour > 0)]
pred_contour = np.array(pred_im)[np.where(mask_contour > 0)]
return get_accuracy(pred_contour, gt_contour)
def iou(pred_im, gt_im):
"""
IoU for binary masks (0-1 values)
Args:
pred_im ([type]): [description]
gt_im ([type]): [description]
"""
pred = np.array(pred_im)
gt = np.array(gt_im)
intersection = (pred * gt).sum()
union = (pred + gt).sum() - intersection
return intersection / union
def f1_score(pred_im, gt_im):
pred = np.array(pred_im)
gt = np.array(gt_im)
intersection = (pred * gt).sum()
return 2 * intersection / (pred + gt).sum()
def accuracy(pred_im, gt_im):
pred = np.array(pred_im)
gt = np.array(gt_im)
if len(gt_im.shape) == 4:
assert gt_im.shape[1] == 1
gt_im = gt_im[:, 0, :, :]
if len(pred.shape) > len(gt_im.shape):
pred = np.argmax(pred, axis=1)
return float((pred == gt).sum()) / gt.size
def mIOU(pred, label, average="macro"):
"""
Adapted from:
https://stackoverflow.com/questions/62461379/multiclass-semantic-segmentation-model-evaluation
Compute the mean IOU from pred and label tensors
pred is a tensor N x C x H x W with logits (softmax will be applied)
and label is a N x H x W tensor with int labels per pixel
this does the same as sklearn's jaccard_score function if you choose average="macro"
Args:
pred (torch.tensor): predicted logits
label (torch.tensor): labels
average: "macro" or "weighted"
Returns:
float: mIOU, can be nan
"""
num_classes = pred.shape[-3]
pred = torch.argmax(pred, dim=1).squeeze(1)
present_iou_list = list()
pred = pred.view(-1)
label = label.view(-1)
# Note: Following for loop goes from 0 to (num_classes-1)
# and ignore_index is num_classes, thus ignore_index is
# not considered in computation of IoU.
interesting_classes = (
[*range(num_classes)] if num_classes > 2 else [int(label.max().item())]
)
weights = []
for sem_class in interesting_classes:
pred_inds = pred == sem_class
target_inds = label == sem_class
if (target_inds.long().sum().item() > 0) or (pred_inds.long().sum().item() > 0):
intersection_now = (pred_inds[target_inds]).long().sum().item()
union_now = (
pred_inds.long().sum().item()
+ target_inds.long().sum().item()
- intersection_now
)
weights.append(pred_inds.long().sum().item())
iou_now = float(intersection_now) / float(union_now)
present_iou_list.append(iou_now)
if not present_iou_list:
return float("nan")
elif average == "weighted":
weighted_avg = np.sum(np.multiply(weights, present_iou_list) / np.sum(weights))
return weighted_avg
else:
return np.mean(present_iou_list)
def masker_classification_metrics(
pred, label, labels_dict={"cannot": 0, "must": 1, "may": 2}
):
"""
Classification metrics for the masker, and the corresponding maps. If the
predictions are soft, the errors are weighted accordingly. Metrics computed:
tpr : float
True positive rate
tpt : float
True positive total (divided by total population)
tnr : float
True negative rate
tnt : float
True negative total (divided by total population)
fpr : float
False positive rate: rate of predicted mask on cannot flood
fpt : float
False positive total (divided by total population)
fnr : float
False negative rate: rate of missed mask on must flood
fnt : float
False negative total (divided by total population)
mnr : float
"May" negative rate (labeled as "may", predicted as no-mask)
mpr : float
"May" positive rate (labeled as "may", predicted as mask)
accuracy : float
Accuracy
error : float
Error
precision : float
Precision, considering only cannot and must flood labels
f05 : float
F0.5 score, considering only cannot and must flood labels
accuracy_must_may : float
Accuracy considering only the must and may areas
Parameters
----------
pred : array-like
Mask prediction
label : array-like
Mask ground truth labels
labels_dict : dict
A dictionary with the identifier of each class (cannot, must, may)
Returns
-------
metrics_dict : dict
A dictionary with metric name and value pairs
maps_dict : dict
A dictionary containing the metric maps
"""
tp_map = pred * np.asarray(label == labels_dict["must"], dtype=int)
tpr = np.sum(tp_map) / np.sum(label == labels_dict["must"])
tpt = np.sum(tp_map) / np.prod(label.shape)
tn_map = (1.0 - pred) * np.asarray(label == labels_dict["cannot"], dtype=int)
tnr = np.sum(tn_map) / np.sum(label == labels_dict["cannot"])
tnt = np.sum(tn_map) / np.prod(label.shape)
fp_map = pred * np.asarray(label == labels_dict["cannot"], dtype=int)
fpr = np.sum(fp_map) / np.sum(label == labels_dict["cannot"])
fpt = np.sum(fp_map) / np.prod(label.shape)
fn_map = (1.0 - pred) * np.asarray(label == labels_dict["must"], dtype=int)
fnr = np.sum(fn_map) / np.sum(label == labels_dict["must"])
fnt = np.sum(fn_map) / np.prod(label.shape)
may_neg_map = (1.0 - pred) * np.asarray(label == labels_dict["may"], dtype=int)
may_pos_map = pred * np.asarray(label == labels_dict["may"], dtype=int)
mnr = np.sum(may_neg_map) / np.sum(label == labels_dict["may"])
mpr = np.sum(may_pos_map) / np.sum(label == labels_dict["may"])
accuracy = tpt + tnt
error = fpt + fnt
# Assertions
assert np.isclose(tpr, 1.0 - fnr), "TPR: {:.4f}, FNR: {:.4f}".format(tpr, fnr)
assert np.isclose(tnr, 1.0 - fpr), "TNR: {:.4f}, FPR: {:.4f}".format(tnr, fpr)
assert np.isclose(mpr, 1.0 - mnr), "MPR: {:.4f}, MNR: {:.4f}".format(mpr, mnr)
precision = np.sum(tp_map) / (np.sum(tp_map) + np.sum(fp_map) + 1e-9)
beta = 0.5
f05 = ((1 + beta ** 2) * precision * tpr) / (beta ** 2 * precision + tpr + 1e-9)
accuracy_must_may = (np.sum(tp_map) + np.sum(may_neg_map)) / (
np.sum(label == labels_dict["must"]) + np.sum(label == labels_dict["may"])
)
metrics_dict = {
"tpr": tpr,
"tpt": tpt,
"tnr": tnr,
"tnt": tnt,
"fpr": fpr,
"fpt": fpt,
"fnr": fnr,
"fnt": fnt,
"mpr": mpr,
"mnr": mnr,
"accuracy": accuracy,
"error": error,
"precision": precision,
"f05": f05,
"accuracy_must_may": accuracy_must_may,
}
maps_dict = {
"tp": tp_map,
"tn": tn_map,
"fp": fp_map,
"fn": fn_map,
"may_pos": may_pos_map,
"may_neg": may_neg_map,
}
return metrics_dict, maps_dict
def pred_cannot(pred, label, label_cannot=0):
"""
Metric for the masker: Computes false positive rate and its map. If the
predictions are soft, the errors are weighted accordingly.
Parameters
----------
pred : array-like
Mask prediction
label : array-like
Mask ground truth labels
label_cannot : int
The label index of "cannot flood"
Returns
-------
fp_map : array-like
The map of false positives: predicted mask on cannot flood
fpr : float
False positive rate: rate of predicted mask on cannot flood
"""
fp_map = pred * np.asarray(label == label_cannot, dtype=int)
fpr = np.sum(fp_map) / np.sum(label == label_cannot)
return fp_map, fpr
def missed_must(pred, label, label_must=1):
"""
Metric for the masker: Computes false negative rate and its map. If the
predictions are soft, the errors are weighted accordingly.
Parameters
----------
pred : array-like
Mask prediction
label : array-like
Mask ground truth labels
label_must : int
The label index of "must flood"
Returns
-------
fn_map : array-like
The map of false negatives: missed mask on must flood
fnr : float
False negative rate: rate of missed mask on must flood
"""
fn_map = (1.0 - pred) * np.asarray(label == label_must, dtype=int)
fnr = np.sum(fn_map) / np.sum(label == label_must)
return fn_map, fnr
def may_flood(pred, label, label_may=2):
"""
Metric for the masker: Computes "may" negative and "may" positive rates and their
map. If the predictions are soft, the "errors" are weighted accordingly.
Parameters
----------
pred : array-like
Mask prediction
label : array-like
Mask ground truth labels
label_may : int
The label index of "may flood"
Returns
-------
may_neg_map : array-like
The map of "may" negatives
may_pos_map : array-like
The map of "may" positives
mnr : float
"May" negative rate
mpr : float
"May" positive rate
"""
may_neg_map = (1.0 - pred) * np.asarray(label == label_may, dtype=int)
may_pos_map = pred * np.asarray(label == label_may, dtype=int)
mnr = np.sum(may_neg_map) / np.sum(label == label_may)
mpr = np.sum(may_pos_map) / np.sum(label == label_may)
return may_neg_map, may_pos_map, mnr, mpr
def masker_metrics(pred, label, label_cannot=0, label_must=1):
"""
Computes a set of metrics for the masker
Parameters
----------
pred : array-like
Mask prediction
label : array-like
Mask ground truth labels
label_must : int
The label index of "must flood"
label_cannot : int
The label index of "cannot flood"
Returns
-------
tpr : float
True positive rate
tnr : float
True negative rate
precision : float
Precision, considering only cannot and must flood labels
f1 : float
F1 score, considering only cannot and must flood labels
"""
tp_map = pred * np.asarray(label == label_must, dtype=int)
tpr = np.sum(tp_map) / np.sum(label == label_must)
tn_map = (1.0 - pred) * np.asarray(label == label_cannot, dtype=int)
tnr = np.sum(tn_map) / np.sum(label == label_cannot)
fp_map = pred * np.asarray(label == label_cannot, dtype=int)
fn_map = (1.0 - pred) * np.asarray(label == label_must, dtype=int) # noqa: F841
precision = np.sum(tp_map) / (np.sum(tp_map) + np.sum(fp_map))
f1 = 2 * (precision * tpr) / (precision + tpr)
return tpr, tnr, precision, f1
def get_confusion_matrix(tpr, tnr, fpr, fnr, mpr, mnr):
"""
Constructs the confusion matrix of a masker prediction over a set of samples
Parameters
----------
tpr : vector-like
True positive rate
tnr : vector-like
True negative rate
fpr : vector-like
False positive rate
fnr : vector-like
False negative rate
mpr : vector-like
"May" positive rate
mnr : vector-like
"May" negative rate
Returns
-------
confusion_matrix : 3x3 array
Confusion matrix: [i, j] = [pred, true]
| tnr fnr mnr |
| fpr tpr mpr |
| 0. 0, 0, |
confusion_matrix_std : 3x3 array
Standard deviation of the confusion matrix
"""
# Compute mean and standard deviations over all samples
tpr_m = np.mean(tpr)
tpr_s = np.std(tpr)
tnr_m = np.mean(tnr)
tnr_s = np.std(tnr)
fpr_m = np.mean(fpr)
fpr_s = np.std(fpr)
fnr_m = np.mean(fnr)
fnr_s = np.std(fnr)
mpr_m = np.mean(mpr)
mpr_s = np.std(mpr)
mnr_m = np.mean(mnr)
mnr_s = np.std(mnr)
# Assertions
assert np.isclose(tpr_m, 1.0 - fnr_m), "TPR: {:.4f}, FNR: {:.4f}".format(
tpr_m, fnr_m
)
assert np.isclose(tnr_m, 1.0 - fpr_m), "TNR: {:.4f}, FPR: {:.4f}".format(
tnr_m, fpr_m
)
assert np.isclose(mpr_m, 1.0 - mnr_m), "MPR: {:.4f}, MNR: {:.4f}".format(
mpr_m, mnr_m
)
# Fill confusion matrix
confusion_matrix = np.zeros((3, 3))
confusion_matrix[0, 0] = tnr_m
confusion_matrix[0, 1] = fnr_m
confusion_matrix[0, 2] = mnr_m
confusion_matrix[1, 0] = fpr_m
confusion_matrix[1, 1] = tpr_m
confusion_matrix[1, 2] = mpr_m
confusion_matrix[2, 2] = 0.0
# Standard deviation
confusion_matrix_std = np.zeros((3, 3))
confusion_matrix_std[0, 0] = tnr_s
confusion_matrix_std[0, 1] = fnr_s
confusion_matrix_std[0, 2] = mnr_s
confusion_matrix_std[1, 0] = fpr_s
confusion_matrix_std[1, 1] = tpr_s
confusion_matrix_std[1, 2] = mpr_s
confusion_matrix_std[2, 2] = 0.0
return confusion_matrix, confusion_matrix_std
def edges_coherence_std_min(pred, label, label_must=1, bin_th=0.5):
"""
The standard deviation of the minimum distance between the edge of the prediction
and the edge of the "must flood" label.
Parameters
----------
pred : array-like
Mask prediction
label : array-like
Mask ground truth labels
label_must : int
The label index of "must flood"
bin_th : float
The threshold for the binarization of the prediction
Returns
-------
metric : float
The value of the metric
pred_edge : array-like
The edges images of the prediction, for visualization
label_edge : array-like
The edges images of the "must flood" label, for visualization
"""
# Keep must flood label only
label = deepcopy(label)
label[label != label_must] = -1
label[label == label_must] = 1
label[label != label_must] = 0
label = np.asarray(label, dtype=float)
# Binarize prediction
pred = np.asarray(pred > bin_th, dtype=float)
# Compute edges
pred = filters.sobel(pred)
label = filters.sobel(label)
# Location of edges
pred_coord = np.argwhere(pred > 0)
label_coord = np.argwhere(label > 0)
# Handle blank predictions
if pred_coord.shape[0] == 0:
return 1.0, pred, label
# Normalized pairwise distances between pred and label
dist_mat = np.divide(euclidean_distances(pred_coord, label_coord), pred.shape[0])
# Standard deviation of the minimum distance from pred to label
edge_coherence = np.std(np.min(dist_mat, axis=1))
return edge_coherence, pred, label
def boxplot_metric(
output_filename,
df,
metric,
dict_metrics,
do_stripplot=False,
dict_models=None,
dpi=300,
**snskwargs
):
f = plt.figure(dpi=dpi)
if do_stripplot:
ax = sns.boxplot(x="model", y=metric, data=df, fliersize=0.0, **snskwargs)
ax = sns.stripplot(
x="model", y=metric, data=df, size=2.0, color="gray", **snskwargs
)
else:
ax = sns.boxplot(x="model", y=metric, data=df, **snskwargs)
# Set axes labels
ax.set_xlabel("Models", rotation=0, fontsize="medium")
ax.set_ylabel(dict_metrics[metric], rotation=90, fontsize="medium")
# Spines
sns.despine(left=True, bottom=True)
# X-Tick labels
if dict_models:
xticklabels = [dict_models[t.get_text()] for t in ax.get_xticklabels()]
ax.set_xticklabels(
xticklabels,
rotation=20,
verticalalignment="top",
horizontalalignment="right",
fontsize="xx-small",
)
f.savefig(
output_filename,
dpi=f.dpi,
bbox_inches="tight",
facecolor="white",
transparent=False,
)
f.clear()
plt.close(f)
def clustermap_metric(
output_filename,
df,
metric,
dict_metrics,
method="average",
cluster_metric="euclidean",
dict_models=None,
dpi=300,
**snskwargs
):
ax_grid = sns.clustermap(data=df, method=method, metric=cluster_metric, **snskwargs)
ax_heatmap = ax_grid.ax_heatmap
ax_cbar = ax_grid.ax_cbar
# Set axes labels
ax_heatmap.set_xlabel("Models", rotation=0, fontsize="medium")
ax_heatmap.set_ylabel("Images", rotation=90, fontsize="medium")
# Set title
ax_cbar.set_title(dict_metrics[metric], rotation=0, fontsize="x-large")
# X-Tick labels
if dict_models:
xticklabels = [dict_models[t.get_text()] for t in ax_heatmap.get_xticklabels()]
ax_heatmap.set_xticklabels(
xticklabels,
rotation=20,
verticalalignment="top",
horizontalalignment="right",
fontsize="small",
)
ax_grid.fig.savefig(
output_filename,
dpi=dpi,
bbox_inches="tight",
facecolor="white",
transparent=False,
)
ax_grid.fig.clear()
plt.close(ax_grid.fig)