Skip to content

Similarity Metrics

spectrakit.similarity.similarity_cosine

similarity_cosine(
    query: ndarray, reference: ndarray
) -> float | np.ndarray

Compute cosine similarity between spectra.

For single spectra (1-D), returns a scalar. For a query (1-D) against a library (2-D), returns an array of similarities. For a batch of queries (2-D) against a library (2-D), returns a similarity matrix.

Parameters:

Name Type Description Default
query ndarray

Query spectrum, shape (W,) or (M, W).

required
reference ndarray

Reference spectrum shape (W,), or library shape (N, W).

required

Returns:

Type Description
float | ndarray

Cosine similarity in [-1, 1].

float | ndarray
  • query (W,) + reference (W,) → scalar
float | ndarray
  • query (W,) + reference (N, W) → array (N,)
float | ndarray
  • query (M, W) + reference (W,) → array (M,)
float | ndarray
  • query (M, W) + reference (N, W) → matrix (M, N)

Raises:

Type Description
SpectrumShapeError

If query or reference is not 1-D or 2-D.

EmptySpectrumError

If inputs have zero elements.

Source code in src/spectrakit/similarity/cosine.py
def similarity_cosine(query: np.ndarray, reference: np.ndarray) -> float | np.ndarray:
    """Compute cosine similarity between spectra.

    For single spectra (1-D), returns a scalar. For a query (1-D) against
    a library (2-D), returns an array of similarities. For a batch of
    queries (2-D) against a library (2-D), returns a similarity matrix.

    Args:
        query: Query spectrum, shape ``(W,)`` or ``(M, W)``.
        reference: Reference spectrum shape ``(W,)``, or library shape ``(N, W)``.

    Returns:
        Cosine similarity in [-1, 1].

        - query ``(W,)`` + reference ``(W,)`` → scalar
        - query ``(W,)`` + reference ``(N, W)`` → array ``(N,)``
        - query ``(M, W)`` + reference ``(W,)`` → array ``(M,)``
        - query ``(M, W)`` + reference ``(N, W)`` → matrix ``(M, N)``

    Raises:
        SpectrumShapeError: If *query* or *reference* is not 1-D or 2-D.
        EmptySpectrumError: If inputs have zero elements.
    """
    query = ensure_float64(query)
    reference = ensure_float64(reference)
    validate_1d_or_2d(query, name="query")
    validate_1d_or_2d(reference, name="reference")
    warn_if_not_finite(query, name="query")
    warn_if_not_finite(reference, name="reference")
    validate_matching_width(query, reference)

    # 1D query vs 1D reference → scalar
    if query.ndim == 1 and reference.ndim == 1:
        dot = np.dot(query, reference)
        denom = np.linalg.norm(query) * np.linalg.norm(reference)
        if denom < EPSILON:
            return 0.0
        return float(dot / denom)

    # 1D query vs 2D reference → (N,)
    if query.ndim == 1 and reference.ndim == 2:
        dots = reference @ query
        norms_ref = np.linalg.norm(reference, axis=1)
        norm_query = np.linalg.norm(query)
        denoms = norms_ref * norm_query
        denoms = np.where(denoms < EPSILON, 1.0, denoms)
        return dots / denoms  # type: ignore[no-any-return]

    # 2D query vs 1D reference → (M,)
    if query.ndim == 2 and reference.ndim == 1:
        dots = query @ reference
        norms_query = np.linalg.norm(query, axis=1)
        norm_ref = np.linalg.norm(reference)
        denoms = norms_query * norm_ref
        denoms = np.where(denoms < EPSILON, 1.0, denoms)
        return dots / denoms  # type: ignore[no-any-return]

    # 2D query vs 2D reference → (M, N)
    dots = query @ reference.T
    norms_query = np.linalg.norm(query, axis=1, keepdims=True)
    norms_ref = np.linalg.norm(reference, axis=1, keepdims=True)
    denoms = norms_query @ norms_ref.T
    denoms = np.where(denoms < EPSILON, 1.0, denoms)
    return dots / denoms  # type: ignore[no-any-return]

spectrakit.similarity.similarity_pearson

similarity_pearson(
    query: ndarray, reference: ndarray
) -> float | np.ndarray

Compute Pearson correlation between spectra.

Parameters:

Name Type Description Default
query ndarray

Query spectrum, shape (W,) or (M, W).

required
reference ndarray

Reference spectrum shape (W,), or library shape (N, W).

required

Returns:

Type Description
float | ndarray

Pearson r in [-1, 1]. Returns 0.0 for constant spectra.

float | ndarray
  • query (W,) + reference (W,) → scalar
float | ndarray
  • query (W,) + reference (N, W) → array (N,)
float | ndarray
  • query (M, W) + reference (W,) → array (M,)
float | ndarray
  • query (M, W) + reference (N, W) → matrix (M, N)

Raises:

Type Description
SpectrumShapeError

If query or reference is not 1-D or 2-D.

EmptySpectrumError

If inputs have zero elements.

Note

The 2-D × 2-D case uses matrix multiplication (O(M*N*W)). For large M or N, consider batching queries to manage memory.

Source code in src/spectrakit/similarity/pearson.py
def similarity_pearson(query: np.ndarray, reference: np.ndarray) -> float | np.ndarray:
    """Compute Pearson correlation between spectra.

    Args:
        query: Query spectrum, shape ``(W,)`` or ``(M, W)``.
        reference: Reference spectrum shape ``(W,)``, or library shape ``(N, W)``.

    Returns:
        Pearson *r* in [-1, 1]. Returns ``0.0`` for constant spectra.

        - query ``(W,)`` + reference ``(W,)`` → scalar
        - query ``(W,)`` + reference ``(N, W)`` → array ``(N,)``
        - query ``(M, W)`` + reference ``(W,)`` → array ``(M,)``
        - query ``(M, W)`` + reference ``(N, W)`` → matrix ``(M, N)``

    Raises:
        SpectrumShapeError: If *query* or *reference* is not 1-D or 2-D.
        EmptySpectrumError: If inputs have zero elements.

    Note:
        The 2-D × 2-D case uses matrix multiplication (``O(M*N*W)``).
        For large *M* or *N*, consider batching queries to manage memory.
    """
    query = ensure_float64(query)
    reference = ensure_float64(reference)
    validate_1d_or_2d(query, name="query")
    validate_1d_or_2d(reference, name="reference")
    warn_if_not_finite(query, name="query")
    warn_if_not_finite(reference, name="reference")
    validate_matching_width(query, reference)

    # 1D query vs 1D reference → scalar
    if query.ndim == 1 and reference.ndim == 1:
        qc = query - np.mean(query)
        rc = reference - np.mean(reference)
        denom = np.linalg.norm(qc) * np.linalg.norm(rc)
        if denom < EPSILON:
            return 0.0
        return float(np.dot(qc, rc) / denom)

    # 1D query vs 2D reference → (N,)
    if query.ndim == 1 and reference.ndim == 2:
        qc = query - np.mean(query)
        rc = reference - np.mean(reference, axis=1, keepdims=True)
        numerator = rc @ qc
        denom_q = np.linalg.norm(qc)
        denom_r = np.linalg.norm(rc, axis=1)
        denoms = np.where(denom_q * denom_r < EPSILON, 1.0, denom_q * denom_r)
        return numerator / denoms  # type: ignore[no-any-return]

    # 2D query vs 1D reference → (M,)
    if query.ndim == 2 and reference.ndim == 1:
        qc = query - np.mean(query, axis=1, keepdims=True)
        rc = reference - np.mean(reference)
        numerator = qc @ rc
        denom_q = np.linalg.norm(qc, axis=1)
        denom_r = np.linalg.norm(rc)
        denoms = np.where(denom_q * denom_r < EPSILON, 1.0, denom_q * denom_r)
        return numerator / denoms  # type: ignore[no-any-return]

    # 2D query vs 2D reference → (M, N)
    qc = query - np.mean(query, axis=1, keepdims=True)
    rc = reference - np.mean(reference, axis=1, keepdims=True)
    numerator = qc @ rc.T
    norms_q = np.linalg.norm(qc, axis=1, keepdims=True)
    norms_r = np.linalg.norm(rc, axis=1, keepdims=True)
    denom_matrix = norms_q @ norms_r.T
    denoms = np.where(denom_matrix < EPSILON, 1.0, denom_matrix)
    return numerator / denoms  # type: ignore[no-any-return]

spectrakit.similarity.similarity_spectral_angle

similarity_spectral_angle(
    query: ndarray, reference: ndarray
) -> float | np.ndarray

Compute Spectral Angle Mapper (SAM) between spectra.

Returns the angle in radians between spectral vectors. Smaller angle means more similar. Range: [0, pi].

Parameters:

Name Type Description Default
query ndarray

Query spectrum, shape (W,) or (M, W).

required
reference ndarray

Reference spectrum shape (W,), or library shape (N, W).

required

Returns:

Type Description
float | ndarray

Angle in radians in [0, pi].

float | ndarray
  • query (W,) + reference (W,) → scalar
float | ndarray
  • query (W,) + reference (N, W) → array (N,)
float | ndarray
  • query (M, W) + reference (W,) → array (M,)
float | ndarray
  • query (M, W) + reference (N, W) → matrix (M, N)

Raises:

Type Description
SpectrumShapeError

If query or reference is not 1-D or 2-D.

EmptySpectrumError

If inputs have zero elements.

Source code in src/spectrakit/similarity/spectral_angle.py
def similarity_spectral_angle(query: np.ndarray, reference: np.ndarray) -> float | np.ndarray:
    """Compute Spectral Angle Mapper (SAM) between spectra.

    Returns the angle in radians between spectral vectors. Smaller
    angle means more similar. Range: [0, pi].

    Args:
        query: Query spectrum, shape ``(W,)`` or ``(M, W)``.
        reference: Reference spectrum shape ``(W,)``, or library shape ``(N, W)``.

    Returns:
        Angle in radians in [0, pi].

        - query ``(W,)`` + reference ``(W,)`` → scalar
        - query ``(W,)`` + reference ``(N, W)`` → array ``(N,)``
        - query ``(M, W)`` + reference ``(W,)`` → array ``(M,)``
        - query ``(M, W)`` + reference ``(N, W)`` → matrix ``(M, N)``

    Raises:
        SpectrumShapeError: If *query* or *reference* is not 1-D or 2-D.
        EmptySpectrumError: If inputs have zero elements.
    """
    query = ensure_float64(query)
    reference = ensure_float64(reference)
    validate_1d_or_2d(query, name="query")
    validate_1d_or_2d(reference, name="reference")
    warn_if_not_finite(query, name="query")
    warn_if_not_finite(reference, name="reference")
    validate_matching_width(query, reference)

    # 1D query vs 1D reference → scalar
    if query.ndim == 1 and reference.ndim == 1:
        denom = np.linalg.norm(query) * np.linalg.norm(reference)
        if denom < EPSILON:
            return 0.0
        cos_angle = np.clip(np.dot(query, reference) / denom, -1.0, 1.0)
        return float(np.arccos(cos_angle))

    # 1D query vs 2D reference → (N,)
    if query.ndim == 1 and reference.ndim == 2:
        dots = reference @ query
        norm_query = np.linalg.norm(query)
        norms_ref = np.linalg.norm(reference, axis=1)
        denoms = norm_query * norms_ref
        denoms = np.where(denoms < EPSILON, 1.0, denoms)
        cos_angles = np.clip(dots / denoms, -1.0, 1.0)
        return np.arccos(cos_angles)  # type: ignore[no-any-return]

    # 2D query vs 1D reference → (M,)
    if query.ndim == 2 and reference.ndim == 1:
        dots = query @ reference
        norms_query = np.linalg.norm(query, axis=1)
        norm_ref = np.linalg.norm(reference)
        denoms = norms_query * norm_ref
        denoms = np.where(denoms < EPSILON, 1.0, denoms)
        cos_angles = np.clip(dots / denoms, -1.0, 1.0)
        return np.arccos(cos_angles)  # type: ignore[no-any-return]

    # 2D query vs 2D reference → (M, N)
    dots = query @ reference.T
    norms_query = np.linalg.norm(query, axis=1, keepdims=True)
    norms_ref = np.linalg.norm(reference, axis=1, keepdims=True)
    denoms = norms_query @ norms_ref.T
    denoms = np.where(denoms < EPSILON, 1.0, denoms)
    cos_angles = np.clip(dots / denoms, -1.0, 1.0)
    return np.arccos(cos_angles)  # type: ignore[no-any-return]

spectrakit.similarity.similarity_euclidean

similarity_euclidean(
    query: ndarray, reference: ndarray
) -> float | np.ndarray

Compute Euclidean distance between spectra.

Lower values indicate greater similarity.

Parameters:

Name Type Description Default
query ndarray

Query spectrum, shape (W,) or (M, W).

required
reference ndarray

Reference spectrum shape (W,), or library shape (N, W).

required

Returns:

Type Description
float | ndarray

Euclidean distance in [0, inf).

float | ndarray
  • query (W,) + reference (W,) → scalar
float | ndarray
  • query (W,) + reference (N, W) → array (N,)
float | ndarray
  • query (M, W) + reference (W,) → array (M,)
float | ndarray
  • query (M, W) + reference (N, W) → matrix (M, N)

Raises:

Type Description
SpectrumShapeError

If query or reference is not 1-D or 2-D.

EmptySpectrumError

If inputs have zero elements.

Note

The 2-D × 2-D case allocates an (M, N, W) intermediate array. For large M, N, or W, consider batching queries to manage memory.

Source code in src/spectrakit/similarity/euclidean.py
def similarity_euclidean(query: np.ndarray, reference: np.ndarray) -> float | np.ndarray:
    """Compute Euclidean distance between spectra.

    Lower values indicate greater similarity.

    Args:
        query: Query spectrum, shape ``(W,)`` or ``(M, W)``.
        reference: Reference spectrum shape ``(W,)``, or library shape ``(N, W)``.

    Returns:
        Euclidean distance in [0, inf).

        - query ``(W,)`` + reference ``(W,)`` → scalar
        - query ``(W,)`` + reference ``(N, W)`` → array ``(N,)``
        - query ``(M, W)`` + reference ``(W,)`` → array ``(M,)``
        - query ``(M, W)`` + reference ``(N, W)`` → matrix ``(M, N)``

    Raises:
        SpectrumShapeError: If *query* or *reference* is not 1-D or 2-D.
        EmptySpectrumError: If inputs have zero elements.

    Note:
        The 2-D × 2-D case allocates an ``(M, N, W)`` intermediate array.
        For large *M*, *N*, or *W*, consider batching queries to manage memory.
    """
    query = ensure_float64(query)
    reference = ensure_float64(reference)
    validate_1d_or_2d(query, name="query")
    validate_1d_or_2d(reference, name="reference")
    warn_if_not_finite(query, name="query")
    warn_if_not_finite(reference, name="reference")
    validate_matching_width(query, reference)

    # 1D query vs 1D reference → scalar
    if query.ndim == 1 and reference.ndim == 1:
        return float(np.linalg.norm(query - reference))

    # 1D query vs 2D reference → (N,)
    if query.ndim == 1 and reference.ndim == 2:
        return np.linalg.norm(reference - query, axis=1)  # type: ignore[no-any-return]

    # 2D query vs 1D reference → (M,)
    if query.ndim == 2 and reference.ndim == 1:
        return np.linalg.norm(query - reference, axis=1)  # type: ignore[no-any-return]

    # 2D query vs 2D reference → (M, N)
    m, w = query.shape
    n = reference.shape[0]
    max_elements = m * n * w
    if max_elements > _MAX_BROADCAST_ELEMENTS:
        # Chunked computation to avoid allocating (M, N, W) intermediate
        result = np.empty((m, n), dtype=np.float64)
        for i in range(m):
            result[i] = np.linalg.norm(reference - query[i], axis=1)
        return result
    diff = query[:, np.newaxis, :] - reference[np.newaxis, :, :]
    return np.linalg.norm(diff, axis=2)  # type: ignore[no-any-return]