Source code for deeptab.metrics.regression

"""Regression metrics (MSE, MAE, RMSE, R2, MAPE, PinballLoss).

All standard metrics delegate to :mod:`sklearn.metrics` internally.
The wrapper classes exist for three reasons:

1. **Uniform interface** -- each class carries ``name``, ``higher_is_better``,
   and ``needs_raw`` so the training loop and registry can inspect them
   without hard-coding metric names.
2. **LSS compatibility** -- ``model.predict()`` returns a 2-D array of shape
   ``(n_samples, n_params)`` for distributional models.  The helper
   :func:`_extract_mean` pulls the first column (predicted mean) so sklearn
   functions receive the expected 1-D array.
3. **Consistent API** -- all metrics share the same
   ``metric(y_true, y_pred) -> float`` call signature regardless of their
   source.

Quick reference
---------------

.. list-table::
   :header-rows: 1
   :widths: 22 12 20 46

   * - Class
     - ``name``
     - ``higher_is_better``
     - Notes
   * - :class:`MeanSquaredError`
     - ``"mse"``
     - ``False``
     - Standard MSE; lower = better
   * - :class:`RootMeanSquaredError`
     - ``"rmse"``
     - ``False``
     - Same units as target; lower = better
   * - :class:`MeanAbsoluteError`
     - ``"mae"``
     - ``False``
     - Robust to outliers; lower = better
   * - :class:`R2Score`
     - ``"r2"``
     - ``True``
     - 1.0 = perfect; **higher = better**
   * - :class:`MeanAbsolutePercentageError`
     - ``"mape"``
     - ``False``
     - % scale; avoid when targets are near zero
   * - :class:`PinballLoss`
     - ``"pinball"``
     - ``False``
     - Quantile regression; lower = better
"""

from __future__ import annotations

import numpy as np
from sklearn.metrics import mean_absolute_error as _mae
from sklearn.metrics import mean_absolute_percentage_error as _mape
from sklearn.metrics import mean_squared_error as _mse
from sklearn.metrics import r2_score as _r2

from .base import DeepTabMetric


def _extract_mean(y_pred: np.ndarray) -> np.ndarray:
    """Return the first column of a 2-D array, or the flat 1-D array.

    LSS models return ``(n_samples, n_params)`` arrays; the first column is
    always the predicted mean / location parameter.
    """
    y_pred = np.asarray(y_pred)
    if y_pred.ndim == 2:
        return y_pred[:, 0]
    return y_pred.ravel()



[docs]
class MeanSquaredError(DeepTabMetric):
    """Mean Squared Error -- delegates to :func:`sklearn.metrics.mean_squared_error`.

    Accepts both point-prediction vectors and 2-D parameter arrays (uses
    the first column as the predicted mean).
    """

    name = "mse"
    higher_is_better = False

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
        return float(_mse(np.asarray(y_true).ravel(), _extract_mean(y_pred)))




[docs]
class RootMeanSquaredError(DeepTabMetric):
    """Root Mean Squared Error -- sqrt of :func:`sklearn.metrics.mean_squared_error`."""

    name = "rmse"
    higher_is_better = False

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
        return float(np.sqrt(_mse(np.asarray(y_true).ravel(), _extract_mean(y_pred))))




[docs]
class MeanAbsoluteError(DeepTabMetric):
    """Mean Absolute Error -- delegates to :func:`sklearn.metrics.mean_absolute_error`."""

    name = "mae"
    higher_is_better = False

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
        return float(_mae(np.asarray(y_true).ravel(), _extract_mean(y_pred)))




[docs]
class R2Score(DeepTabMetric):
    """Coefficient of Determination (R2) -- delegates to :func:`sklearn.metrics.r2_score`.

    Higher is better; perfect prediction gives R2 = 1.
    """

    name = "r2"
    higher_is_better = True

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
        return float(_r2(np.asarray(y_true).ravel(), _extract_mean(y_pred)))




[docs]
class MeanAbsolutePercentageError(DeepTabMetric):
    """Mean Absolute Percentage Error -- delegates to
    :func:`sklearn.metrics.mean_absolute_percentage_error`.

    sklearn clips the denominator to ``np.finfo(np.float64).eps`` internally.
    """

    name = "mape"
    higher_is_better = False

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
        return float(_mape(np.asarray(y_true).ravel(), _extract_mean(y_pred)))




[docs]
class PinballLoss(DeepTabMetric):
    """Pinball (Quantile) Loss -- delegates to
    :func:`sklearn.metrics.mean_pinball_loss`.

    Measures calibration at a single quantile level ``tau in (0, 1)``.

    For LSS ``quantile`` family predictions, ``y_pred`` is a 2-D array where
    each column is a predicted quantile.  Pass ``col`` to select the relevant
    column (default 0).

    Parameters
    ----------
    quantile : float
        The quantile level, e.g. 0.5 for the median.
    col : int
        Column of ``y_pred`` to use when predictions are 2-D.  Default 0.
    """

    name = "pinball"
    higher_is_better = False

    def __init__(self, quantile: float = 0.5, col: int = 0) -> None:
        if not 0.0 < quantile < 1.0:
            raise ValueError(f"quantile must be in (0, 1), got {quantile}")
        self.quantile = quantile
        self.col = col

    def __call__(self, y_true: np.ndarray, y_pred: np.ndarray) -> float:
        from sklearn.metrics import mean_pinball_loss

        y_pred_arr = np.asarray(y_pred, dtype=float)
        q_pred = y_pred_arr[:, self.col] if y_pred_arr.ndim == 2 else y_pred_arr.ravel()
        return float(mean_pinball_loss(np.asarray(y_true).ravel(), q_pred, alpha=self.quantile))

    def __repr__(self) -> str:
        return f"PinballLoss(quantile={self.quantile}, col={self.col})"