Skip to content

Despiking

Spike removal methods for cosmic rays and electronic artifacts.

spectrakit.despike.despike_whitaker_hayes

despike_whitaker_hayes(
    intensities: ndarray,
    *,
    threshold: float = DEFAULT_THRESHOLD,
    window_size: int | None = None,
) -> np.ndarray

Remove cosmic ray spikes using the Whitaker-Hayes method.

Computes the modified Z-score of the second finite difference to detect outlier points (cosmic rays, electronic spikes). Detected spikes are replaced by linear interpolation from neighboring non-spike points.

Parameters:

Name Type Description Default
intensities ndarray

Spectral intensities, shape (W,) or (N, W).

required
threshold float

Modified Z-score threshold for spike detection. Values of 3.0–5.0 are typical; lower catches more spikes.

DEFAULT_THRESHOLD
window_size int | None

Optional rolling window for local MAD estimation. If None (default), uses global MAD over the full spectrum. If provided, must be odd and >= 3.

None

Returns:

Type Description
ndarray

Despiked intensities, same shape as input.

Raises:

Type Description
SpectrumShapeError

If intensities is not 1-D or 2-D.

EmptySpectrumError

If intensities has zero elements.

ValueError

If threshold <= 0.

ValueError

If spectrum has fewer than 4 points.

ValueError

If window_size is provided but < 3 or even.

Examples:

>>> import numpy as np
>>> from spectrakit import despike_whitaker_hayes
>>> raw = np.sin(np.linspace(0, 4 * np.pi, 200))
>>> raw[100] += 10.0  # cosmic ray spike
>>> clean = despike_whitaker_hayes(raw, threshold=3.0)
>>> abs(clean[100] - np.sin(np.linspace(0, 4 * np.pi, 200))[100]) < 0.5
True
Source code in src/spectrakit/despike/whitaker_hayes.py
def despike_whitaker_hayes(
    intensities: np.ndarray,
    *,
    threshold: float = DEFAULT_THRESHOLD,
    window_size: int | None = None,
) -> np.ndarray:
    """Remove cosmic ray spikes using the Whitaker-Hayes method.

    Computes the modified Z-score of the second finite difference to
    detect outlier points (cosmic rays, electronic spikes).  Detected
    spikes are replaced by linear interpolation from neighboring
    non-spike points.

    Args:
        intensities: Spectral intensities, shape ``(W,)`` or ``(N, W)``.
        threshold: Modified Z-score threshold for spike detection.
            Values of 3.0–5.0 are typical; lower catches more spikes.
        window_size: Optional rolling window for local MAD estimation.
            If ``None`` (default), uses global MAD over the full spectrum.
            If provided, must be odd and >= 3.

    Returns:
        Despiked intensities, same shape as input.

    Raises:
        SpectrumShapeError: If *intensities* is not 1-D or 2-D.
        EmptySpectrumError: If *intensities* has zero elements.
        ValueError: If *threshold* <= 0.
        ValueError: If spectrum has fewer than 4 points.
        ValueError: If *window_size* is provided but < 3 or even.

    Examples:
        >>> import numpy as np
        >>> from spectrakit import despike_whitaker_hayes
        >>> raw = np.sin(np.linspace(0, 4 * np.pi, 200))
        >>> raw[100] += 10.0  # cosmic ray spike
        >>> clean = despike_whitaker_hayes(raw, threshold=3.0)
        >>> abs(clean[100] - np.sin(np.linspace(0, 4 * np.pi, 200))[100]) < 0.5
        True
    """
    intensities = ensure_float64(intensities)
    validate_1d_or_2d(intensities)
    warn_if_not_finite(intensities)

    if threshold <= 0:
        raise ValueError(f"threshold must be positive, got {threshold}")

    width = intensities.shape[-1]
    if width < _MIN_LENGTH:
        raise ValueError(f"Whitaker-Hayes requires at least {_MIN_LENGTH} points, got {width}")

    if window_size is not None:
        if window_size < 3:
            raise ValueError(f"window_size must be >= 3, got {window_size}")
        if window_size % 2 == 0:
            raise ValueError(f"window_size must be odd, got {window_size}")

    return apply_along_spectra(
        _despike_whitaker_hayes_1d,
        intensities,
        threshold=threshold,
        window_size=window_size,
    )

spectrakit.despike.despike_zscore

despike_zscore(
    intensities: ndarray,
    *,
    threshold: float = DEFAULT_THRESHOLD,
    window_size: int = DEFAULT_WINDOW_SIZE,
) -> np.ndarray

Remove spikes using rolling modified Z-score detection.

For each spectral point, a modified Z-score is computed relative to the local median within a rolling window. Points exceeding the threshold are replaced with the local median.

Parameters:

Name Type Description Default
intensities ndarray

Spectral intensities, shape (W,) or (N, W).

required
threshold float

Modified Z-score threshold for spike detection. Values of 3.0–5.0 are typical; lower catches more spikes.

DEFAULT_THRESHOLD
window_size int

Size of the rolling window for local statistics. Must be odd and >= 3.

DEFAULT_WINDOW_SIZE

Returns:

Type Description
ndarray

Despiked intensities, same shape as input.

Raises:

Type Description
SpectrumShapeError

If intensities is not 1-D or 2-D.

EmptySpectrumError

If intensities has zero elements.

ValueError

If threshold <= 0, window_size < 3, or window_size is even.

Examples:

>>> import numpy as np
>>> from spectrakit import despike_zscore
>>> raw = np.ones(100); raw[50] = 100.0  # single spike
>>> clean = despike_zscore(raw, threshold=3.0)
>>> abs(clean[50] - 1.0) < 0.1
True
Source code in src/spectrakit/despike/zscore.py
def despike_zscore(
    intensities: np.ndarray,
    *,
    threshold: float = DEFAULT_THRESHOLD,
    window_size: int = DEFAULT_WINDOW_SIZE,
) -> np.ndarray:
    """Remove spikes using rolling modified Z-score detection.

    For each spectral point, a modified Z-score is computed relative to
    the local median within a rolling window.  Points exceeding the
    threshold are replaced with the local median.

    Args:
        intensities: Spectral intensities, shape ``(W,)`` or ``(N, W)``.
        threshold: Modified Z-score threshold for spike detection.
            Values of 3.0–5.0 are typical; lower catches more spikes.
        window_size: Size of the rolling window for local statistics.
            Must be odd and >= 3.

    Returns:
        Despiked intensities, same shape as input.

    Raises:
        SpectrumShapeError: If *intensities* is not 1-D or 2-D.
        EmptySpectrumError: If *intensities* has zero elements.
        ValueError: If *threshold* <= 0, *window_size* < 3, or
            *window_size* is even.

    Examples:
        >>> import numpy as np
        >>> from spectrakit import despike_zscore
        >>> raw = np.ones(100); raw[50] = 100.0  # single spike
        >>> clean = despike_zscore(raw, threshold=3.0)
        >>> abs(clean[50] - 1.0) < 0.1
        True
    """
    intensities = ensure_float64(intensities)
    validate_1d_or_2d(intensities)
    warn_if_not_finite(intensities)

    if threshold <= 0:
        raise ValueError(f"threshold must be positive, got {threshold}")
    if window_size < 3:
        raise ValueError(f"window_size must be >= 3, got {window_size}")
    if window_size % 2 == 0:
        raise ValueError(f"window_size must be odd, got {window_size}")

    return apply_along_spectra(
        _despike_zscore_1d,
        intensities,
        threshold=threshold,
        window_size=window_size,
    )