Source code for pytorch3d.implicitron.tools.metric_utils

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# pyre-unsafe

import math
from typing import Optional, Tuple

import torch
from torch.nn import functional as F



[docs]
def eval_depth(
    pred: torch.Tensor,
    gt: torch.Tensor,
    crop: int = 1,
    mask: Optional[torch.Tensor] = None,
    get_best_scale: bool = True,
    mask_thr: float = 0.5,
    best_scale_clamp_thr: float = 1e-4,
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Evaluate the depth error between the prediction `pred` and the ground
    truth `gt`.

    Args:
        pred: A tensor of shape (N, 1, H, W) denoting the predicted depth maps.
        gt: A tensor of shape (N, 1, H, W) denoting the ground truth depth maps.
        crop: The number of pixels to crop from the border.
        mask: A mask denoting the valid regions of the gt depth.
        get_best_scale: If `True`, estimates a scaling factor of the predicted depth
            that yields the best mean squared error between `pred` and `gt`.
            This is typically enabled for cases where predicted reconstructions
            are inherently defined up to an arbitrary scaling factor.
        mask_thr: A constant used to threshold the `mask` to specify the valid
            regions.
        best_scale_clamp_thr: The threshold for clamping the divisor in best
            scale estimation.

    Returns:
        mse_depth: Mean squared error between `pred` and `gt`.
        abs_depth: Mean absolute difference between `pred` and `gt`.
    """

    # chuck out the border
    if crop > 0:
        gt = gt[:, :, crop:-crop, crop:-crop]
        pred = pred[:, :, crop:-crop, crop:-crop]

    if mask is not None:
        # mult gt by mask
        if crop > 0:
            mask = mask[:, :, crop:-crop, crop:-crop]
        gt = gt * (mask > mask_thr).float()

    dmask = (gt > 0.0).float()
    dmask_mass = torch.clamp(dmask.sum((1, 2, 3)), 1e-4)

    if get_best_scale:
        # mult preds by a scalar "scale_best"
        # 	s.t. we get best possible mse error
        scale_best = estimate_depth_scale_factor(pred, gt, dmask, best_scale_clamp_thr)
        pred = pred * scale_best[:, None, None, None]

    df = gt - pred

    # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
    mse_depth = (dmask * (df**2)).sum((1, 2, 3)) / dmask_mass
    abs_depth = (dmask * df.abs()).sum((1, 2, 3)) / dmask_mass

    return mse_depth, abs_depth




[docs]
def estimate_depth_scale_factor(pred, gt, mask, clamp_thr):
    xy = pred * gt * mask
    xx = pred * pred * mask
    scale_best = xy.mean((1, 2, 3)) / torch.clamp(xx.mean((1, 2, 3)), clamp_thr)
    return scale_best




[docs]
def calc_psnr(
    x: torch.Tensor,
    y: torch.Tensor,
    mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    Calculates the Peak-signal-to-noise ratio between tensors `x` and `y`.
    """
    mse = calc_mse(x, y, mask=mask)
    psnr = torch.log10(mse.clamp(1e-10)) * (-10.0)
    return psnr




[docs]
def calc_mse(
    x: torch.Tensor,
    y: torch.Tensor,
    mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    Calculates the mean square error between tensors `x` and `y`.
    """
    if mask is None:
        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
        return torch.mean((x - y) ** 2)
    else:
        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
        return (((x - y) ** 2) * mask).sum() / mask.expand_as(x).sum().clamp(1e-5)




[docs]
def calc_bce(
    pred: torch.Tensor,
    gt: torch.Tensor,
    equal_w: bool = True,
    pred_eps: float = 0.01,
    mask: Optional[torch.Tensor] = None,
    lerp_bound: Optional[float] = None,
) -> torch.Tensor:
    """
    Calculates the binary cross entropy.
    """
    if pred_eps > 0.0:
        # up/low bound the predictions
        pred = torch.clamp(pred, pred_eps, 1.0 - pred_eps)

    if mask is None:
        mask = torch.ones_like(gt)

    if equal_w:
        mask_fg = (gt > 0.5).float() * mask
        mask_bg = (1 - mask_fg) * mask
        weight = mask_fg / mask_fg.sum().clamp(1.0) + mask_bg / mask_bg.sum().clamp(1.0)
        # weight sum should be at this point ~2
        # pyre-fixme[58]: `/` is not supported for operand types `int` and `Tensor`.
        weight = weight * (weight.numel() / weight.sum().clamp(1.0))
    else:
        weight = torch.ones_like(gt) * mask

    if lerp_bound is not None:
        return binary_cross_entropy_lerp(pred, gt, weight, lerp_bound)
    else:
        return F.binary_cross_entropy(pred, gt, reduction="mean", weight=weight)




[docs]
def binary_cross_entropy_lerp(
    pred: torch.Tensor,
    gt: torch.Tensor,
    weight: torch.Tensor,
    lerp_bound: float,
):
    """
    Binary cross entropy which avoids exploding gradients by linearly
    extrapolating the log function for log(1-pred) mad log(pred) whenever
    pred or 1-pred is smaller than lerp_bound.
    """
    loss = log_lerp(1 - pred, lerp_bound) * (1 - gt) + log_lerp(pred, lerp_bound) * gt
    loss_reduced = -(loss * weight).sum() / weight.sum().clamp(1e-4)
    return loss_reduced




[docs]
def log_lerp(x: torch.Tensor, b: float):
    """
    Linearly extrapolated log for x < b.
    """
    assert b > 0
    return torch.where(x >= b, x.log(), math.log(b) + (x - b) / b)




[docs]
def rgb_l1(
    pred: torch.Tensor, target: torch.Tensor, mask: Optional[torch.Tensor] = None
) -> torch.Tensor:
    """
    Calculates the mean absolute error between the predicted colors `pred`
    and ground truth colors `target`.
    """
    if mask is None:
        mask = torch.ones_like(pred[:, :1])
    return ((pred - target).abs() * mask).sum(dim=(1, 2, 3)) / mask.sum(
        dim=(1, 2, 3)
    ).clamp(1)




[docs]
def huber(dfsq: torch.Tensor, scaling: float = 0.03) -> torch.Tensor:
    """
    Calculates the huber function of the input squared error `dfsq`.
    The function smoothly transitions from a region with unit gradient
    to a hyperbolic function at `dfsq=scaling`.
    """
    loss = (safe_sqrt(1 + dfsq / (scaling * scaling), eps=1e-4) - 1) * scaling
    return loss




[docs]
def neg_iou_loss(
    predict: torch.Tensor,
    target: torch.Tensor,
    mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    This is a great loss because it emphasizes on the active
    regions of the predict and targets
    """
    return 1.0 - iou(predict, target, mask=mask)




[docs]
def safe_sqrt(A: torch.Tensor, eps: float = 1e-4) -> torch.Tensor:
    """
    performs safe differentiable sqrt
    """
    return (torch.clamp(A, float(0)) + eps).sqrt()




[docs]
def iou(
    predict: torch.Tensor,
    target: torch.Tensor,
    mask: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    This is a great loss because it emphasizes on the active
    regions of the predict and targets
    """
    dims = tuple(range(predict.dim())[1:])
    if mask is not None:
        predict = predict * mask
        target = target * mask
    intersect = (predict * target).sum(dims)
    union = (predict + target - predict * target).sum(dims) + 1e-4
    return (intersect / union).sum() / intersect.numel()




[docs]
def beta_prior(pred: torch.Tensor, cap: float = 0.1) -> torch.Tensor:
    if cap <= 0.0:
        raise ValueError("capping should be positive to avoid unbound loss")

    min_value = math.log(cap) + math.log(cap + 1.0)
    return (torch.log(pred + cap) + torch.log(1.0 - pred + cap)).mean() - min_value