Source code for pybaselines.two_d.polynomial

# -*- coding: utf-8 -*-
"""Polynomial techniques for fitting baselines to experimental data.

Created on April 16, 2023
@author: Donald Erb


The function penalized_poly was adapted from MATLAB code from
https://www.mathworks.com/matlabcentral/fileexchange/27429-background-correction
(accessed March 18, 2021), which was licensed under the BSD-2-clause below.

License: 2-clause BSD

Copyright (c) 2012, Vincent Mazet
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the distribution

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

"""

import numpy as np

from .. import _weighting
from ..utils import _MIN_FLOAT, _convert_coef2d, relative_difference
from ._algorithm_setup import _Algorithm2D


class _Polynomial(_Algorithm2D):
    """A base class for all polynomial algorithms."""


[docs]
    @_Algorithm2D._register(
        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
    )
    def poly(self, data, poly_order=2, weights=None, return_coef=False, max_cross=None):
        """
        Computes a polynomial that fits the baseline of the data.

        Parameters
        ----------
        data : array-like, shape (M, N)
            The y-values of the measured data.
        poly_order : int or Sequence[int, int], optional
            The polynomial orders for x and z. If a single value, will use that for both x and
            z. Default is 2.
        weights : array-like, shape (M, N), optional
            The weighting array. If None (default), then will be an array with
            shape equal to (M, N) and all values set to 1.
        return_coef : bool, optional
            If True, will convert the polynomial coefficients for the fit baseline to
            a form that fits the x and z values and return them in the params dictionary.
            Default is False, since the conversion takes time.
        max_cross : int, optional
            The maximum degree for the cross terms. For example, if `max_cross` is 1, then
            `x z**2`, `x**2 z`, and `x**2 z**2` would all be set to 0. Default is None, which
            does not limit the cross terms.

        Returns
        -------
        baseline : numpy.ndarray, shape (M, N)
            The calculated baseline.
        params : dict
            A dictionary with the following items:

            * 'weights': numpy.ndarray, shape (M, N)
                The weight array used for fitting the data.
            * 'coef': numpy.ndarray, shape (``poly_order[0] + 1``, ``poly_order[1] + 1``)
                Only if `return_coef` is True. The array of polynomial parameters
                for the baseline, in increasing order. Can be used to create a
                polynomial using :func:`numpy.polynomial.polynomial.polyval2d`.

        Notes
        -----
        To only fit regions without peaks, supply a weight array with zero values
        at the indices where peaks are located.

        """
        y, weight_array, pseudo_inverse = self._setup_polynomial(
            data, weights, poly_order, calc_vander=True, calc_pinv=True, max_cross=max_cross
        )
        sqrt_w = np.sqrt(weight_array)

        coef = pseudo_inverse @ (sqrt_w * y)
        baseline = self._polynomial.vandermonde @ coef
        params = {'weights': weight_array}
        if return_coef:
            params['coef'] = _convert_coef2d(
                coef, *self._polynomial.poly_order, self.x_domain, self.z_domain
            )

        return baseline, params



[docs]
    @_Algorithm2D._register(
        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
    )
    def modpoly(self, data, poly_order=2, tol=1e-3, max_iter=250, weights=None,
                use_original=False, mask_initial_peaks=False, return_coef=False, max_cross=None):
        """
        The modified polynomial (ModPoly) baseline algorithm.

        Parameters
        ----------
        data : array-like, shape (M, N)
            The y-values of the measured data.
        poly_order : int or Sequence[int, int], optional
            The polynomial orders for x and z. If a single value, will use that for both x and
            z. Default is 2.
        tol : float, optional
            The exit criteria. Default is 1e-3.
        max_iter : int, optional
            The maximum number of iterations. Default is 250.
        weights : array-like, shape (M, N), optional
            The weighting array. If None (default), then will be an array with
            shape equal to (M, N) and all values set to 1.
        use_original : bool, optional
            If False (default), will compare the baseline of each iteration with
            the y-values of that iteration [1]_ when choosing minimum values. If True,
            will compare the baseline with the original y-values given by `data` [2]_.
        mask_initial_peaks : bool, optional
            If True, will mask any data where the initial baseline fit + the standard
            deviation of the residual is less than measured data [3]_. Default is False.
        return_coef : bool, optional
            If True, will convert the polynomial coefficients for the fit baseline to
            a form that fits the x and z values and return them in the params dictionary.
            Default is False, since the conversion takes time.
        max_cross : int, optional
            The maximum degree for the cross terms. For example, if `max_cross` is 1, then
            `x z**2`, `x**2 z`, and `x**2 z**2` would all be set to 0. Default is None, which
            does not limit the cross terms.

        Returns
        -------
        baseline : numpy.ndarray, shape (M, N)
            The calculated baseline.
        params : dict
            A dictionary with the following items:

            * 'weights': numpy.ndarray, shape (M, N)
                The weight array used for fitting the data.
            * 'tol_history': numpy.ndarray
                An array containing the calculated tolerance values for
                each iteration. The length of the array is the number of iterations
                completed. If the last value in the array is greater than the input
                `tol` value, then the function did not converge.
            * 'coef': numpy.ndarray, shape (``poly_order[0] + 1``, ``poly_order[1] + 1``)
                Only if `return_coef` is True. The array of polynomial parameters
                for the baseline, in increasing order. Can be used to create a
                polynomial using :func:`numpy.polynomial.polynomial.polyval2d`.

        Notes
        -----
        Algorithm originally developed in [2]_ and then slightly modified in [1]_.

        References
        ----------
        .. [1] Gan, F., et al. Baseline correction by improved iterative polynomial
            fitting with automatic threshold. Chemometrics and Intelligent
            Laboratory Systems, 2006, 82, 59-65.
        .. [2] Lieber, C., et al. Automated method for subtraction of fluorescence
            from biological raman spectra. Applied Spectroscopy, 2003, 57(11),
            1363-1367.
        .. [3] Zhao, J., et al. Automated Autofluorescence Background Subtraction
            Algorithm for Biomedical Raman Spectroscopy, Applied Spectroscopy,
            2007, 61(11), 1225-1232.

        """
        y, weight_array, pseudo_inverse = self._setup_polynomial(
            data, weights, poly_order, calc_vander=True, calc_pinv=True, copy_weights=True,
            max_cross=max_cross
        )
        sqrt_w = np.sqrt(weight_array)
        if use_original:
            y0 = y

        coef = pseudo_inverse @ (sqrt_w * y)
        baseline = self._polynomial.vandermonde @ coef
        if mask_initial_peaks:
            # use baseline + deviation since without deviation, half of y should be above baseline
            weight_array[baseline + np.std(y - baseline) < y] = 0
            sqrt_w = np.sqrt(weight_array)
            pseudo_inverse = np.linalg.pinv(sqrt_w[:, None] * self._polynomial.vandermonde)

        tol_history = np.empty(max_iter)
        for i in range(max_iter):
            baseline_old = baseline
            y = np.minimum(y0 if use_original else y, baseline)
            coef = pseudo_inverse @ (sqrt_w * y)
            baseline = self._polynomial.vandermonde @ coef
            calc_difference = relative_difference(baseline_old, baseline)
            tol_history[i] = calc_difference
            if calc_difference < tol:
                break

        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
        if return_coef:
            params['coef'] = _convert_coef2d(
                coef, *self._polynomial.poly_order, self.x_domain, self.z_domain
            )

        return baseline, params



[docs]
    @_Algorithm2D._register(
        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
    )
    def imodpoly(self, data, poly_order=2, tol=1e-3, max_iter=250, weights=None,
                 use_original=False, mask_initial_peaks=True, return_coef=False,
                 num_std=1., max_cross=None):
        """
        The improved modofied polynomial (IModPoly) baseline algorithm.

        Parameters
        ----------
        data : array-like, shape (M, N)
            The y-values of the measured data.
        poly_order : int or Sequence[int, int], optional
            The polynomial orders for x and z. If a single value, will use that for both x and
            z. Default is 2.
        tol : float, optional
            The exit criteria. Default is 1e-3.
        max_iter : int, optional
            The maximum number of iterations. Default is 250.
        weights : array-like, shape (M, N), optional
            The weighting array. If None (default), then will be an array with
            shape equal to (M, N) and all values set to 1.
        use_original : bool, optional
            If False (default), will compare the baseline of each iteration with
            the y-values of that iteration [1]_ when choosing minimum values. If True,
            will compare the baseline with the original y-values given by `data` [2]_.
        mask_initial_peaks : bool, optional
            If True (default), will mask any data where the initial baseline fit +
            the standard deviation of the residual is less than measured data [3]_.
        return_coef : bool, optional
            If True, will convert the polynomial coefficients for the fit baseline to
            a form that fits the x and z values and return them in the params dictionary.
            Default is False, since the conversion takes time.
        num_std : float, optional
            The number of standard deviations to include when thresholding. Default
            is 1. Must be greater or equal to 0.
        max_cross : int, optional
            The maximum degree for the cross terms. For example, if `max_cross` is 1, then
            `x z**2`, `x**2 z`, and `x**2 z**2` would all be set to 0. Default is None, which
            does not limit the cross terms.

        Returns
        -------
        baseline : numpy.ndarray, shape (M, N)
            The calculated baseline.
        params : dict
            A dictionary with the following items:

            * 'weights': numpy.ndarray, shape (M, N)
                The weight array used for fitting the data.
            * 'tol_history': numpy.ndarray
                An array containing the calculated tolerance values for
                each iteration. The length of the array is the number of iterations
                completed. If the last value in the array is greater than the input
                `tol` value, then the function did not converge.
            * 'coef': numpy.ndarray, shape (``poly_order[0] + 1``, ``poly_order[1] + 1``)
                Only if `return_coef` is True. The array of polynomial parameters
                for the baseline, in increasing order. Can be used to create a
                polynomial using :func:`numpy.polynomial.polynomial.polyval2d`.

        Raises
        ------
        ValueError
            Raised if `num_std` is less than 0.

        Notes
        -----
        Algorithm originally developed in [3]_.

        References
        ----------
        .. [1] Gan, F., et al. Baseline correction by improved iterative polynomial
            fitting with automatic threshold. Chemometrics and Intelligent
            Laboratory Systems, 2006, 82, 59-65.
        .. [2] Lieber, C., et al. Automated method for subtraction of fluorescence
            from biological raman spectra. Applied Spectroscopy, 2003, 57(11),
            1363-1367.
        .. [3] Zhao, J., et al. Automated Autofluorescence Background Subtraction
            Algorithm for Biomedical Raman Spectroscopy, Applied Spectroscopy,
            2007, 61(11), 1225-1232.

        """
        if num_std < 0:
            raise ValueError('num_std must be greater than or equal to 0')

        y, weight_array, pseudo_inverse = self._setup_polynomial(
            data, weights, poly_order, calc_vander=True, calc_pinv=True,
            copy_weights=True, max_cross=max_cross
        )
        sqrt_w = np.sqrt(weight_array)
        if use_original:
            y0 = y

        coef = pseudo_inverse @ (sqrt_w * y)
        baseline = self._polynomial.vandermonde @ coef
        deviation = np.std(y - baseline)
        if mask_initial_peaks:
            weight_array[baseline + deviation < y] = 0
            sqrt_w = np.sqrt(weight_array)
            pseudo_inverse = np.linalg.pinv(sqrt_w[:, None] * self._polynomial.vandermonde)

        tol_history = np.empty(max_iter)
        for i in range(max_iter):
            y = np.minimum(y0 if use_original else y, baseline + num_std * deviation)
            coef = pseudo_inverse @ (sqrt_w * y)
            baseline = self._polynomial.vandermonde @ coef
            new_deviation = np.std(y - baseline)
            # use new_deviation as dividing term in relative difference
            calc_difference = relative_difference(new_deviation, deviation)
            tol_history[i] = calc_difference
            if calc_difference < tol:
                break
            deviation = new_deviation

        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
        if return_coef:
            params['coef'] = _convert_coef2d(
                coef, *self._polynomial.poly_order, self.x_domain, self.z_domain
            )

        return baseline, params


    # adapted from
    # https://www.mathworks.com/matlabcentral/fileexchange/27429-background-correction;
    # see license above

[docs]
    @_Algorithm2D._register(
        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
    )
    def penalized_poly(self, data, poly_order=2, tol=1e-3, max_iter=250, weights=None,
                       cost_function='asymmetric_truncated_quadratic', threshold=None,
                       alpha_factor=0.99, return_coef=False, max_cross=None):
        """
        Fits a polynomial baseline using a non-quadratic cost function.

        The non-quadratic cost functions penalize residuals with larger values,
        giving a more robust fit compared to normal least-squares.

        Parameters
        ----------
        data : array-like, shape (M, N)
            The y-values of the measured data.
        poly_order : int or Sequence[int, int], optional
            The polynomial orders for x and z. If a single value, will use that for both x and
            z. Default is 2.
        tol : float, optional
            The exit criteria. Default is 1e-3.
        max_iter : int, optional
            The maximum number of iterations. Default is 250.
        weights : array-like, shape (M, N), optional
            The weighting array. If None (default), then will be an array with
            shape equal to (M, N) and all values set to 1.
        cost_function : str, optional
            The non-quadratic cost function to minimize. Must indicate symmetry of the
            method by appending 'a' or 'asymmetric' for asymmetric loss, and 's' or
            'symmetric' for symmetric loss. Default is 'asymmetric_truncated_quadratic'.
            Available methods, and their associated reference, are:

                * 'asymmetric_truncated_quadratic'[1]_
                * 'symmetric_truncated_quadratic'[1]_
                * 'asymmetric_huber'[1]_
                * 'symmetric_huber'[1]_
                * 'asymmetric_indec'[2]_
                * 'symmetric_indec'[2]_

        threshold : float, optional
            The threshold value for the loss method, where the function goes from
            quadratic loss (such as used for least squares) to non-quadratic. For
            symmetric loss methods, residual values with absolute value less than
            threshold will have quadratic loss. For asymmetric loss methods, residual
            values less than the threshold will have quadratic loss. Default is None,
            which sets `threshold` to one-tenth of the standard deviation of the input
            data.
        alpha_factor : float, optional
            A value between 0 and 1 that controls the value of the penalty. Default is
            0.99. Typically should not need to change this value.
        return_coef : bool, optional
            If True, will convert the polynomial coefficients for the fit baseline to
            a form that fits the x and z values and return them in the params dictionary.
            Default is False, since the conversion takes time.
        max_cross : int, optional
            The maximum degree for the cross terms. For example, if `max_cross` is 1, then
            `x z**2`, `x**2 z`, and `x**2 z**2` would all be set to 0. Default is None, which
            does not limit the cross terms.

        Returns
        -------
        baseline : numpy.ndarray, shape (M, N)
            The calculated baseline.
        params : dict
            A dictionary with the following items:

            * 'weights': numpy.ndarray, shape (M, N)
                The weight array used for fitting the data.
            * 'tol_history': numpy.ndarray
                An array containing the calculated tolerance values for
                each iteration. The length of the array is the number of iterations
                completed. If the last value in the array is greater than the input
                `tol` value, then the function did not converge.
            * 'coef': numpy.ndarray, shape (``poly_order[0] + 1``, ``poly_order[1] + 1``)
                Only if `return_coef` is True. The array of polynomial parameters
                for the baseline, in increasing order. Can be used to create a
                polynomial using :func:`numpy.polynomial.polynomial.polyval2d`.

        Raises
        ------
        ValueError
            Raised if `alpha_factor` is not between 0 and 1.

        Notes
        -----
        In baseline literature, this procedure is sometimes called "backcor".

        References
        ----------
        .. [1] Mazet, V., et al. Background removal from spectra by designing and
            minimising a non-quadratic cost function. Chemometrics and Intelligent
            Laboratory Systems, 2005, 76(2), 121-133.
        .. [2] Liu, J., et al. Goldindec: A Novel Algorithm for Raman Spectrum Baseline
            Correction. Applied Spectroscopy, 2015, 69(7), 834-842.

        """
        if not 0 < alpha_factor <= 1:
            raise ValueError('alpha_factor must be between 0 and 1')
        symmetric_loss, method = _identify_loss_method(cost_function)
        loss_function = {
            'huber': _huber_loss,
            'truncated_quadratic': _truncated_quadratic_loss,
            'indec': _indec_loss
        }[method]

        y, weight_array, pseudo_inverse = self._setup_polynomial(
            data, weights, poly_order, calc_vander=True, calc_pinv=True, max_cross=max_cross
        )
        if threshold is None:
            threshold = np.std(y) / 10
        loss_kwargs = {
            'threshold': threshold, 'alpha_factor': alpha_factor, 'symmetric': symmetric_loss
        }

        sqrt_w = np.sqrt(weight_array)
        y = sqrt_w * y

        coef = pseudo_inverse @ y
        baseline = self._polynomial.vandermonde @ coef
        tol_history = np.empty(max_iter)
        for i in range(max_iter):
            baseline_old = baseline
            coef = pseudo_inverse @ (y + loss_function(y - sqrt_w * baseline, **loss_kwargs))
            baseline = self._polynomial.vandermonde @ coef
            calc_difference = relative_difference(baseline_old, baseline)
            tol_history[i] = calc_difference
            if calc_difference < tol:
                break

        params = {'weights': weight_array, 'tol_history': tol_history[:i + 1]}
        if return_coef:
            params['coef'] = _convert_coef2d(
                coef, *self._polynomial.poly_order, self.x_domain, self.z_domain
            )

        return baseline, params



[docs]
    @_Algorithm2D._register(
        sort_keys=('weights',), reshape_baseline=True, reshape_keys=('weights',)
    )
    def quant_reg(self, data, poly_order=2, quantile=0.05, tol=1e-6, max_iter=250,
                  weights=None, eps=None, return_coef=False, max_cross=None):
        """
        Approximates the baseline of the data using quantile regression.

        Parameters
        ----------
        data : array-like, shape (M, N)
            The y-values of the measured data.
        poly_order : int or Sequence[int, int], optional
            The polynomial orders for x and z. If a single value, will use that for both x and
            z. Default is 2.
        quantile : float, optional
            The quantile at which to fit the baseline. Default is 0.05.
        tol : float, optional
            The exit criteria. Default is 1e-6. For extreme quantiles (`quantile` < 0.01
            or `quantile` > 0.99), may need to use a lower value to get a good fit.
        max_iter : int, optional
            The maximum number of iterations. Default is 250. For extreme quantiles
            (`quantile` < 0.01 or `quantile` > 0.99), may need to use a higher value to
            ensure convergence.
        weights : array-like, shape (M, N), optional
            The weighting array. If None (default), then will be an array with
            shape equal to (M, N) and all values set to 1.
        eps : float, optional
            A small value added to the square of the residual to prevent dividing by 0.
            Default is None, which uses the square of the maximum-absolute-value of the
            fit each iteration multiplied by 1e-6.
        return_coef : bool, optional
            If True, will convert the polynomial coefficients for the fit baseline to
            a form that fits the x and z values and return them in the params dictionary.
            Default is False, since the conversion takes time.
        max_cross : int, optional
            The maximum degree for the cross terms. For example, if `max_cross` is 1, then
            `x z**2`, `x**2 z`, and `x**2 z**2` would all be set to 0. Default is None, which
            does not limit the cross terms.

        Returns
        -------
        baseline : numpy.ndarray, shape (M, N)
            The calculated baseline.
        params : dict
            A dictionary with the following items:

            * 'weights': numpy.ndarray, shape (M, N)
                The weight array used for fitting the data.
            * 'tol_history': numpy.ndarray
                An array containing the calculated tolerance values for
                each iteration. The length of the array is the number of iterations
                completed. If the last value in the array is greater than the input
                `tol` value, then the function did not converge.
            * 'coef': numpy.ndarray, shape (``poly_order[0] + 1``, ``poly_order[1] + 1``)
                Only if `return_coef` is True. The array of polynomial parameters
                for the baseline, in increasing order. Can be used to create a
                polynomial using :func:`numpy.polynomial.polynomial.polyval2d`.

        Raises
        ------
        ValueError
            Raised if `quantile` is not between 0 and 1.

        Notes
        -----
        Application of quantile regression for baseline fitting ss described in [1]_.

        Performs quantile regression using iteratively reweighted least squares (IRLS)
        as described in [2]_.

        References
        ----------
        .. [1] Komsta, Ł. Comparison of Several Methods of Chromatographic
                Baseline Removal with a New Approach Based on Quantile Regression.
                Chromatographia, 2011, 73, 721-731.
        .. [2] Schnabel, S., et al. Simultaneous estimation of quantile curves using
                quantile sheets. AStA Advances in Statistical Analysis, 2013, 97, 77-87.

        """
        # TODO provide a way to estimate best poly_order based on AIC like in Komsta? could be
        # useful for all polynomial methods; maybe could be an optimizer function
        if not 0 < quantile < 1:
            raise ValueError('quantile must be between 0 and 1.')

        y, weight_array = self._setup_polynomial(
            data, weights, poly_order, calc_vander=True, max_cross=max_cross
        )
        sqrt_w = np.sqrt(weight_array)
        baseline_old = y
        tol_history = np.empty(max_iter + 1)
        for i in range(max_iter + 1):
            coef = np.linalg.lstsq(
                self._polynomial.vandermonde * sqrt_w[:, None], y * sqrt_w, None
            )[0]
            baseline = self._polynomial.vandermonde @ coef
            # relative_difference(baseline_old, baseline, 1) gives nearly same result and
            # the l2 norm is faster to calculate, so use that instead of l1 norm
            calc_difference = relative_difference(baseline_old, baseline)
            tol_history[i] = calc_difference
            if calc_difference < tol:
                break
            sqrt_w = np.sqrt(_weighting._quantile(y, baseline, quantile, eps))
            baseline_old = baseline

        params = {'weights': sqrt_w**2, 'tol_history': tol_history[:i + 1]}
        if return_coef:
            params['coef'] = _convert_coef2d(
                coef, *self._polynomial.poly_order, self.x_domain, self.z_domain
            )

        return baseline, params



# adapted from (https://www.mathworks.com/matlabcentral/fileexchange/27429-background-correction);
# see license above
def _huber_loss(residual, threshold=1.0, alpha_factor=0.99, symmetric=True):
    """
    The Huber non-quadratic cost function.

    Parameters
    ----------
    residual : numpy.ndarray, shape (N,)
        The residual array.
    threshold : float, optional
        Any residual values below the threshold are given quadratic loss.
        Default is 1.0.
    alpha_factor : float, optional
        The scale between 0 and 1 to multiply the cost function's alpha_max
        value (see Notes below). Default is 0.99.
    symmetric : bool, optional
        If True (default), the cost function is symmetric and applies the same
        weighting for positive and negative values. If False, will apply weights
        asymmetrically so that only positive weights are given the non-quadratic
        weigting and negative weights have normal, quadratic weighting.

    Returns
    -------
    weights : numpy.ndarray, shape (N,)
        The weight array.

    Notes
    -----
    The returned result is

        -residual + alpha_factor * alpha_max * phi'(residual)

    where phi'(x) is the derivative of the huber loss function, phi(x).

    References
    ----------
    Mazet, V., et al. Background removal from spectra by designing and
    minimising a non-quadratic cost function. Chemometrics and Intelligent
    Laboratory Systems, 2005, 76(2), 121-133.

    """
    alpha = alpha_factor * 0.5  # alpha_max for huber is 0.5
    if symmetric:
        mask = (np.abs(residual) < threshold)
        weights = (
            mask * residual * (2 * alpha - 1)
            + (~mask) * 2 * alpha * threshold * np.sign(residual)
        )
    else:
        mask = (residual < threshold)
        weights = (
            mask * residual * (2 * alpha - 1)
            + (~mask) * (2 * alpha * threshold - residual)
        )
    return weights


# adapted from (https://www.mathworks.com/matlabcentral/fileexchange/27429-background-correction);
# see license above
def _truncated_quadratic_loss(residual, threshold=1.0, alpha_factor=0.99, symmetric=True):
    """
    The Truncated-Quadratic non-quadratic cost function.

    Parameters
    ----------
    residual : numpy.ndarray, shape (N,)
        The residual array.
    threshold : float, optional
        Any residual values below the threshold are given quadratic loss.
        Default is 1.0.
    alpha_factor : float, optional
        The scale between 0 and 1 to multiply the cost function's alpha_max
        value (see Notes below). Default is 0.99.
    symmetric : bool, optional
        If True (default), the cost function is symmetric and applies the same
        weighting for positive and negative values. If False, will apply weights
        asymmetrically so that only positive weights are given the non-quadratic
        weigting and negative weights have normal, quadratic weighting.

    Returns
    -------
    weights : numpy.ndarray, shape (N,)
        The weight array.

    Notes
    -----
    The returned result is

        -residual + alpha_factor * alpha_max * phi'(residual)

    where phi'(x) is the derivative of the truncated quadratic function, phi(x).

    References
    ----------
    Mazet, V., et al. Background removal from spectra by designing and
    minimising a non-quadratic cost function. Chemometrics and Intelligent
    Laboratory Systems, 2005, 76(2), 121-133.

    """
    alpha = alpha_factor * 0.5  # alpha_max for truncated quadratic is 0.5
    if symmetric:
        mask = (np.abs(residual) < threshold)
    else:
        mask = (residual < threshold)
    return mask * residual * (2 * alpha - 1) - (~mask) * residual


def _indec_loss(residual, threshold=1.0, alpha_factor=0.99, symmetric=True):
    """
    The Indec non-quadratic cost function.

    Parameters
    ----------
    residual : numpy.ndarray, shape (N,)
        The residual array.
    threshold : float, optional
        Any residual values below the threshold are given quadratic loss.
        Default is 1.0.
    alpha_factor : float, optional
        The scale between 0 and 1 to multiply the cost function's alpha_max
        value (see Notes below). Default is 0.99.
    symmetric : bool, optional
        If True (default), the cost function is symmetric and applies the same
        weighting for positive and negative values. If False, will apply weights
        asymmetrically so that only positive weights are given the non-quadratic
        weigting and negative weights have normal, quadratic weighting.

    Returns
    -------
    weights : numpy.ndarray, shape (N,)
        The weight array.

    Notes
    -----
    The returned result is

        -residual + alpha_factor * alpha_max * phi'(residual)

    where phi'(x) is the derivative of the Indec function, phi(x).

    References
    ----------
    Liu, J., et al. Goldindec: A Novel Algorithm for Raman Spectrum Baseline
    Correction. Applied Spectroscopy, 2015, 69(7), 834-842.

    Mazet, V., et al. Background removal from spectra by designing and
    minimising a non-quadratic cost function. Chemometrics and Intelligent
    Laboratory Systems, 2005, 76(2), 121-133.

    """
    alpha = alpha_factor * 0.5  # alpha_max for indec is 0.5
    if symmetric:
        mask = (np.abs(residual) < threshold)
        multiple = np.sign(residual)
    else:
        mask = (residual < threshold)
        # multiple=1 is same as sign(residual) since residual is always > 0
        # for asymmetric case, but this allows not doing the sign calculation
        multiple = 1
    weights = (
        mask * residual * (2 * alpha - 1)
        - (~mask) * (
            residual + alpha * multiple * threshold**3 / np.maximum(2 * residual**2, _MIN_FLOAT)
        )
    )
    return weights


def _identify_loss_method(loss_method):
    """
    Identifies the symmetry for the given loss method.

    Parameters
    ----------
    loss_method : str
        The loss method to use. Should have the symmetry identifier as
        the prefix.

    Returns
    -------
    symmetric : bool
        True if `loss_method` had 's_' or 'symmetric_' as the prefix, else False.
    str
        The input `loss_method` value without the first section that indicated
        the symmetry.

    Raises
    ------
    ValueError
        Raised if the loss method does not have the correct form.

    """
    prefix, *split_method = loss_method.lower().split('_')
    if prefix not in ('a', 's', 'asymmetric', 'symmetric') or not split_method:
        raise ValueError('must specify loss function symmetry by appending "a_" or "s_"')
    if prefix in ('a', 'asymmetric'):
        symmetric = False
    else:
        symmetric = True
    return symmetric, '_'.join(split_method)