Source code for ktch.plot._morphospace

"""Morphospace scatter plot with reconstructed shapes."""

# Copyright 2026 Koji Noshita
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import warnings
from collections.abc import Callable, Sequence
from typing import Any

import numpy as np
import numpy.typing as npt

from ._base import require_dependencies
from ._params import (
    _detect_shape_type,
    _get_renderer_and_projection,
    _resolve_descriptor_params,
    _resolve_reducer_params,
    _resolve_xy_hue,
    _validate_components,
)
from ._renderers import _resolve_render_kw



[docs]
def morphospace_plot(
    data: Any | None = None,
    *,
    x: str | npt.ArrayLike | None = None,
    y: str | npt.ArrayLike | None = None,
    hue: str | npt.ArrayLike | None = None,
    reducer: Any | None = None,
    reducer_inverse_transform: Callable[[np.ndarray], np.ndarray] | None = None,
    n_components: int | None = None,
    descriptor: Any | None = None,
    descriptor_inverse_transform: Callable[[np.ndarray], np.ndarray] | None = None,
    components: tuple[int, int] = (0, 1),
    shape_type: str = "auto",
    render_fn: Callable[..., None] | None = None,
    n_dim: int | None = None,
    links: Sequence[Sequence[int]] | None = None,
    n_shapes: int = 5,
    shape_scale: float = 1.0,
    shape_color: str = "lightgray",
    shape_alpha: float = 0.7,
    palette: str | Sequence | None = None,
    hue_order: Sequence | None = None,
    scatter_kw: dict[str, Any] | None = None,
    ax: object | None = None,
    **render_kw: Any,
) -> object:
    """Scatter plot of specimens in morphospace with shape insets.

    Draws a scatter plot of scores from dimension reduction (reducer)
    and overlays reconstructed shapes at a regular grid of positions
    in the low-dimensional space.

    The function uses the same two-stage inverse transform pipeline as
    :func:`shape_variation_plot`:
    ``scores -> [reducer_inverse_transform] -> coefficients ->
    [descriptor_inverse_transform] -> shape coordinates``.

    Shape insets are parented to ``ax`` via :meth:`matplotlib.axes.Axes.inset_axes`
    with ``transform=ax.transData``, so they follow the parent axes
    automatically under ``tight_layout``, ``subplots_adjust``, and
    ``constrained_layout``.

    Parameters
    ----------
    data : DataFrame, optional
        DataFrame containing scores and metadata. If provided, ``x``, ``y``,
        ``hue`` refer to column names.
    x : str or array-like, optional
        Horizontal axis values (column name or array).
    y : str or array-like, optional
        Vertical axis values (column name or array).
    hue : str or array-like, optional
        Grouping variable for scatter coloring.
    reducer : fitted estimator, optional
        Convenience parameter. Extracts ``reducer_inverse_transform`` via
        ``.inverse_transform`` and ``n_components`` via ``.n_components_``
        (fallback to ``.n_components``).
    reducer_inverse_transform : callable, optional
        Overrides ``reducer.inverse_transform``.
    n_components : int, optional
        Overrides ``reducer.n_components_``.
    descriptor : fitted estimator, optional
        Convenience parameter. Extracts ``descriptor_inverse_transform``
        via ``.inverse_transform``.
    descriptor_inverse_transform : callable, optional
        Overrides ``descriptor.inverse_transform``.
    components : tuple of (int, int)
        0-indexed component indices for (horizontal, vertical) axes.
    shape_type : str
        Shape rendering type. One of ``"auto"``, ``"curve_2d"``,
        ``"curve_3d"``, ``"surface_3d"``, ``"landmarks_2d"``,
        ``"landmarks_3d"``.
    render_fn : callable, optional
        Custom renderer ``(coords, ax, **kw) -> None``.
    n_dim : int, optional
        Spatial dimensionality (for GPA identity case).
    links : sequence of sequence of int, optional
        Landmark link pairs.
    n_shapes : int
        Number of shapes along each axis (total: ``n_shapes * n_shapes``).
    shape_scale : float
        Scale factor for inset shape size, expressed as a fraction of
        the parent ax's per-cell extent (the data range divided by
        ``n_shapes``). ``1.0`` fills each cell.
    shape_color : str
        Color for reconstructed shapes.
    shape_alpha : float
        Transparency for reconstructed shapes.
    palette : str or sequence, optional
        Forwarded to ``sns.scatterplot``.
    hue_order : sequence, optional
        Forwarded to ``sns.scatterplot``.
    scatter_kw : dict, optional
        Additional kwargs forwarded to ``sns.scatterplot``.
    ax : matplotlib.axes.Axes, optional
        Pre-existing axes. If ``None``, creates new figure and axes.
    **render_kw
        Forwarded to the shape renderer.

    Returns
    -------
    ax : matplotlib.axes.Axes
        The main scatter plot axes.

    Raises
    ------
    ImportError
        If matplotlib or seaborn are not installed.
    ValueError
        If required parameters cannot be resolved.

    Notes
    -----
    When ``shape_type="auto"`` (the default), the type is inferred from a
    single specimen (batch dimension removed) of the descriptor inverse
    transform output:

    - ``(m, n, 3)`` (ndim=3) -> ``"surface_3d"``
    - ``(t, 2)`` (ndim=2, last dim 2) -> ``"curve_2d"``
    - ``(t, k)`` (ndim=2, last dim >= 3) -> ``"curve_3d"``
    - No descriptor (identity / GPA case) with ``n_dim=2`` -> ``"landmarks_2d"``
    - No descriptor (identity / GPA case) with ``n_dim=3`` -> ``"landmarks_3d"``

    For per-specimen shapes with ``shape[-1] == 3`` and ndim=2,
    auto-detection chooses ``"curve_3d"``. If the data represents
    landmarks, specify ``shape_type="landmarks_3d"`` explicitly.

    3-D shape types (``"surface_3d"``, ``"curve_3d"``, ``"landmarks_3d"``)
    use matplotlib 3-D projection for each inset, which is significantly
    slower. For 3-D surfaces (e.g., SHA), consider using ``n_shapes <= 3``
    and reducing surface resolution via a
    ``descriptor_inverse_transform`` wrapper.

    For 3-D shape types, ``descriptor.inverse_transform`` typically
    dominates the runtime. ``morphospace_plot`` issues a single batched
    call covering all ``n_shapes ** 2`` grid points, so descriptors that
    parallelize across the batch dimension speed up plotting directly.
    :class:`~ktch.harmonic.SphericalHarmonicAnalysis`,
    :class:`~ktch.harmonic.EllipticFourierAnalysis`, and
    :class:`~ktch.harmonic.DiskHarmonicAnalysis` all accept ``n_jobs``
    at construction time:

    .. code-block:: python

        sha = SphericalHarmonicAnalysis(n_harmonics=20, n_jobs=-1)
        morphospace_plot(..., descriptor=sha)

    See Also
    --------
    shape_variation_plot : Shape grid along component axes.
    explained_variance_ratio_plot : Scree plot of explained variance.

    Examples
    --------
    >>> from ktch.plot import morphospace_plot
    >>> ax = morphospace_plot(  # doctest: +SKIP
    ...     data=df_pca,
    ...     x="PC1", y="PC2", hue="genus",
    ...     reducer=pca,
    ...     descriptor=efa,
    ...     palette="Paired",
    ...     n_shapes=5,
    ...     shape_scale=0.8,
    ... )
    """
    require_dependencies("matplotlib", "seaborn")
    import matplotlib.pyplot as plt
    import seaborn as sns

    # Create or reuse axes
    if ax is None:
        _, ax = plt.subplots()

    # Draw scatter plot (if data provided)
    if x is not None and y is not None:
        sns.scatterplot(
            data=data,
            x=x,
            y=y,
            hue=hue,
            palette=palette,
            hue_order=hue_order,
            ax=ax,
            **(scatter_kw or {}),
        )

    if n_shapes < 1:
        raise ValueError(f"n_shapes must be >= 1, got {n_shapes}")

    # Resolve reducer/descriptor parameters (if reducer available)
    if reducer is not None or reducer_inverse_transform is not None:
        reducer_inverse_transform, _, n_components = _resolve_reducer_params(
            reducer,
            reducer_inverse_transform,
            explained_variance=None,
            n_components=n_components,
            require_variance=False,
        )
        descriptor_inverse_transform, n_dim = _resolve_descriptor_params(
            descriptor,
            descriptor_inverse_transform,
            n_dim,
            shape_type,
        )
        _validate_components(components, n_components)

        # Overlay shapes
        comp_h, comp_v = components
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        x_range = np.linspace(*xlim, n_shapes)
        y_range = np.linspace(*ylim, n_shapes)

        # Batch reconstruction
        grid = np.array([(h, v) for h in x_range for v in y_range])
        all_scores = np.zeros((len(grid), n_components))
        all_scores[:, comp_h] = grid[:, 0]
        all_scores[:, comp_v] = grid[:, 1]

        all_coeffs = reducer_inverse_transform(all_scores)
        if descriptor_inverse_transform is not None:
            all_coords = np.asarray(descriptor_inverse_transform(all_coeffs))
        else:
            all_coords = all_coeffs.reshape(len(grid), -1, n_dim)

        # Auto-detect shape_type if needed
        if shape_type == "auto":
            shape_type = _detect_shape_type(
                all_coords[0],
                descriptor_inverse_transform,
                n_dim,
            )

        renderer, proj = _get_renderer_and_projection(shape_type, render_fn)

        # Inset size in parent ax data units: shape_scale fraction of cell.
        half_w = 0.5 * shape_scale * (xlim[1] - xlim[0]) / n_shapes
        half_h = 0.5 * shape_scale * (ylim[1] - ylim[0]) / n_shapes

        resolved = _resolve_render_kw(
            render_kw,
            color=shape_color,
            alpha=shape_alpha,
            links=links,
        )

        for idx, (score_h, score_v) in enumerate(grid):
            single = all_coords[idx]

            # Anchor inset to parent ax in data coordinates for any subsequent layout
            # (tight_layout, subplots_adjust, ...).
            # zorder below the scatter default (1) keeps markers on top of insets.
            axins = ax.inset_axes(
                bounds=(score_h - half_w, score_v - half_h, 2 * half_w, 2 * half_h),
                transform=ax.transData,
                projection=proj,
                zorder=0,
            )

            renderer(single, axins, **resolved)
            axins.axis("off")

    return ax



#
# Distribution overlays
#


def _iter_overlay_groups(
    x_arr: np.ndarray,
    y_arr: np.ndarray,
    hue_arr: np.ndarray | None,
    categories: list | None,
    *,
    color: Any,
    palette: str | Sequence | None,
    min_points: int,
    overlay_name: str,
):
    """Yield ``(xi, yi, group_color, category_name)`` for each drawable group.

    Handles single-group and per-hue-group iteration, color resolution
    via seaborn palette, and minimum-point-count warnings.

    Parameters
    ----------
    x_arr, y_arr : ndarray
        Coordinate arrays.
    hue_arr : ndarray or None
        Group labels (None for a single group).
    categories : list or None
        Ordered unique hue values.
    color : color
        Single color for the no-hue case (falls back to ``"C0"``).
    palette : str, sequence, or None
        Seaborn palette for hue groups.
    min_points : int
        Minimum number of points required to draw the overlay.
    overlay_name : str
        Human-readable name for warning messages (e.g. ``"confidence ellipse"``).

    Yields
    ------
    xi : ndarray
    yi : ndarray
    group_color : color
    category_name : str or None
        ``None`` for the single-group (no-hue) case, otherwise the
        category value.
    """
    if hue_arr is None:
        c = color if color is not None else "C0"
        if len(x_arr) < min_points:
            warnings.warn(
                f"Skipped: need at least {min_points} data points "
                f"for {overlay_name} (got {len(x_arr)})",
                UserWarning,
                stacklevel=3,
            )
            return
        yield x_arr, y_arr, c, None
    else:
        require_dependencies("seaborn")
        import seaborn as sns

        colors = sns.color_palette(palette, n_colors=len(categories))
        color_map = dict(zip(categories, colors))
        for cat in categories:
            mask = hue_arr == cat
            xi, yi = x_arr[mask], y_arr[mask]
            if len(xi) < min_points:
                warnings.warn(
                    f"Category {cat!r} skipped: need at least {min_points} "
                    f"data points for {overlay_name} (got {len(xi)})",
                    UserWarning,
                    stacklevel=3,
                )
                continue
            yield xi, yi, color_map[cat], cat


def _draw_confidence_ellipse(
    x: np.ndarray,
    y: np.ndarray,
    ax: object,
    *,
    n_std: float,
    fill: bool,
    alpha: float,
    color: Any,
    linewidth: float,
    label: str | None = None,
    **kwargs: Any,
) -> bool:
    """Draw a single confidence ellipse on *ax*.

    Returns True on success, False if the ellipse is degenerate
    (zero variance in x or y).
    """
    import matplotlib.patches as mpatches
    import matplotlib.transforms as transforms

    cov = np.cov(x, y)
    var_product = cov[0, 0] * cov[1, 1]
    if var_product <= 0:
        return False

    pearson = cov[0, 1] / np.sqrt(var_product)
    pearson = np.clip(pearson, -1.0, 1.0)

    ell_radius_x = np.sqrt(1 + pearson)
    ell_radius_y = np.sqrt(1 - pearson)

    facecolor = color if fill else "none"

    ellipse = mpatches.Ellipse(
        (0, 0),
        width=ell_radius_x * 2,
        height=ell_radius_y * 2,
        facecolor=facecolor,
        edgecolor=color,
        linewidth=linewidth,
        alpha=alpha,
        label=label,
        **kwargs,
    )

    scale_x = np.sqrt(cov[0, 0]) * n_std
    scale_y = np.sqrt(cov[1, 1]) * n_std
    mean_x = np.mean(x)
    mean_y = np.mean(y)

    transf = (
        transforms.Affine2D()
        .rotate_deg(45)
        .scale(scale_x, scale_y)
        .translate(mean_x, mean_y)
    )
    ellipse.set_transform(transf + ax.transData)
    ax.add_patch(ellipse)
    return True


def confidence_ellipse_plot(
    data: Any | None = None,
    *,
    x: str | npt.ArrayLike | None = None,
    y: str | npt.ArrayLike | None = None,
    hue: str | npt.ArrayLike | None = None,
    confidence: float = 0.95,
    n_std: float | None = None,
    fill: bool = False,
    alpha: float | None = None,
    palette: str | Sequence | None = None,
    hue_order: Sequence | None = None,
    color: Any | None = None,
    linewidth: float = 1.0,
    legend: bool = True,
    ax: object | None = None,
    **kwargs: Any,
) -> object:
    r"""Draw confidence ellipses for groups on a scatter plot.

    Overlays covariance-based confidence ellipses on existing axes,
    colored by *hue* groups. Designed to be combined with
    :func:`morphospace_plot` or ``seaborn.scatterplot``.

    The ellipse size is determined by *confidence* (default 0.95),
    converted internally via
    :math:`n_{\text{std}} = \sqrt{-2 \ln(1 - p)}`.
    Alternatively, *n_std* can be given directly to bypass the
    conversion.

    Parameters
    ----------
    data : DataFrame, optional
        DataFrame containing the data. When provided, *x*, *y*, and
        *hue* should be column names.
    x : str or array-like
        Horizontal axis values.
    y : str or array-like
        Vertical axis values.
    hue : str or array-like, optional
        Grouping variable. One ellipse is drawn per group.
    confidence : float
        Confidence level in the open interval (0, 1). Determines the
        ellipse size assuming a bivariate normal distribution.
        Ignored when *n_std* is given.
    n_std : float or None
        Number of standard deviations for the ellipse radii. When
        given, overrides *confidence*.
    fill : bool
        If True, fill the interior of each ellipse.
    alpha : float or None
        Opacity. Defaults to 0.25 when *fill* is True, 1.0 otherwise.
    palette : str or sequence, optional
        Seaborn color palette for hue groups.
    hue_order : sequence, optional
        Order and subset of hue levels to plot.
    color : color, optional
        Single color used when *hue* is not set.
        Defaults to ``"C0"``.
    linewidth : float
        Ellipse edge width.
    legend : bool
        If True, set *label* on each ellipse so it can appear in a
        legend created by ``ax.legend()``.
    ax : matplotlib.axes.Axes, optional
        Axes to draw on. Defaults to ``plt.gca()``.
    **kwargs
        Forwarded to :class:`matplotlib.patches.Ellipse`.

    Returns
    -------
    matplotlib.axes.Axes

    See Also
    --------
    convex_hull_plot : Convex hull overlay.
    morphospace_plot : Scatter plot with shape insets.

    Examples
    --------
    >>> from ktch.plot import morphospace_plot, confidence_ellipse_plot
    >>> ax = morphospace_plot(  # doctest: +SKIP
    ...     data=df, x="PC1", y="PC2", hue="species",
    ...     reducer=pca, descriptor=efa,
    ... )
    >>> confidence_ellipse_plot(  # doctest: +SKIP
    ...     data=df, x="PC1", y="PC2", hue="species",
    ...     ax=ax,
    ... )
    """
    require_dependencies("matplotlib")
    import matplotlib.pyplot as plt

    if n_std is None:
        if not 0 < confidence < 1:
            raise ValueError(
                f"confidence must be in the open interval (0, 1), got {confidence}"
            )
        n_std = float(np.sqrt(-2.0 * np.log(1.0 - confidence)))

    if ax is None:
        ax = plt.gca()

    x_arr, y_arr, hue_arr, categories = _resolve_xy_hue(data, x, y, hue, hue_order)

    if alpha is None:
        alpha = 0.25 if fill else 1.0

    for xi, yi, c, cat in _iter_overlay_groups(
        x_arr,
        y_arr,
        hue_arr,
        categories,
        color=color,
        palette=palette,
        min_points=2,
        overlay_name="confidence ellipse",
    ):
        label = None if cat is None else (cat if legend else "_nolegend_")
        drawn = _draw_confidence_ellipse(
            xi,
            yi,
            ax,
            n_std=n_std,
            fill=fill,
            alpha=alpha,
            color=c,
            linewidth=linewidth,
            label=label,
            **kwargs,
        )
        if not drawn:
            prefix = f"Category {cat!r} skipped" if cat is not None else "Skipped"
            warnings.warn(
                f"{prefix}: zero variance in x or y",
                UserWarning,
                stacklevel=2,
            )

    return ax


def _draw_convex_hull(
    x: np.ndarray,
    y: np.ndarray,
    ax: object,
    *,
    fill: bool,
    alpha: float,
    color: Any,
    linewidth: float,
    label: str | None = None,
    **kwargs: Any,
) -> None:
    """Draw a single convex hull on *ax*.

    May raise ``QhullError`` if the points are degenerate.
    """
    import matplotlib.patches as mpatches
    from scipy.spatial import ConvexHull

    points = np.column_stack([x, y])
    hull = ConvexHull(points)
    verts = hull.vertices

    if fill:
        polygon = mpatches.Polygon(
            points[verts],
            closed=True,
            facecolor=color,
            edgecolor=color,
            linewidth=linewidth,
            alpha=alpha,
            label=label,
            **kwargs,
        )
        ax.add_patch(polygon)
    else:
        hull_x = np.append(points[verts, 0], points[verts[0], 0])
        hull_y = np.append(points[verts, 1], points[verts[0], 1])
        ax.plot(
            hull_x,
            hull_y,
            color=color,
            linewidth=linewidth,
            alpha=alpha,
            label=label,
            **kwargs,
        )


def convex_hull_plot(
    data: Any | None = None,
    *,
    x: str | npt.ArrayLike | None = None,
    y: str | npt.ArrayLike | None = None,
    hue: str | npt.ArrayLike | None = None,
    fill: bool = False,
    alpha: float | None = None,
    palette: str | Sequence | None = None,
    hue_order: Sequence | None = None,
    color: Any | None = None,
    linewidth: float = 1.0,
    legend: bool = True,
    ax: object | None = None,
    **kwargs: Any,
) -> object:
    """Draw convex hulls for groups on a scatter plot.

    Overlays convex hulls on existing axes, colored by *hue* groups.
    Designed to be combined with :func:`morphospace_plot` or
    ``seaborn.scatterplot``.

    Parameters
    ----------
    data : DataFrame, optional
        DataFrame containing the data. When provided, *x*, *y*, and
        *hue* should be column names.
    x : str or array-like
        Horizontal axis values.
    y : str or array-like
        Vertical axis values.
    hue : str or array-like, optional
        Grouping variable. One hull is drawn per group.
    fill : bool
        If True, fill the interior of each hull.
    alpha : float or None
        Opacity. Defaults to 0.2 when *fill* is True, 1.0 otherwise.
    palette : str or sequence, optional
        Seaborn color palette for hue groups.
    hue_order : sequence, optional
        Order and subset of hue levels to plot.
    color : color, optional
        Single color used when *hue* is not set.
        Defaults to ``"C0"``.
    linewidth : float
        Hull edge width.
    legend : bool
        If True, set *label* on each hull so it can appear in a
        legend created by ``ax.legend()``.
    ax : matplotlib.axes.Axes, optional
        Axes to draw on. Defaults to ``plt.gca()``.
    **kwargs
        Forwarded to :meth:`~matplotlib.axes.Axes.plot` (when
        *fill* is False) or :class:`matplotlib.patches.Polygon`
        (when *fill* is True).

    Returns
    -------
    matplotlib.axes.Axes

    See Also
    --------
    confidence_ellipse_plot : Confidence ellipse overlay.
    morphospace_plot : Scatter plot with shape insets.

    Examples
    --------
    >>> from ktch.plot import morphospace_plot, convex_hull_plot
    >>> ax = morphospace_plot(  # doctest: +SKIP
    ...     data=df, x="PC1", y="PC2", hue="species",
    ...     reducer=pca, descriptor=efa,
    ... )
    >>> convex_hull_plot(  # doctest: +SKIP
    ...     data=df, x="PC1", y="PC2", hue="species",
    ...     ax=ax,
    ... )
    """
    require_dependencies("matplotlib")
    import matplotlib.pyplot as plt
    from scipy.spatial import QhullError

    if ax is None:
        ax = plt.gca()

    x_arr, y_arr, hue_arr, categories = _resolve_xy_hue(data, x, y, hue, hue_order)

    if alpha is None:
        alpha = 0.2 if fill else 1.0

    for xi, yi, c, cat in _iter_overlay_groups(
        x_arr,
        y_arr,
        hue_arr,
        categories,
        color=color,
        palette=palette,
        min_points=3,
        overlay_name="convex hull",
    ):
        label = None if cat is None else (cat if legend else "_nolegend_")
        try:
            _draw_convex_hull(
                xi,
                yi,
                ax,
                fill=fill,
                alpha=alpha,
                color=c,
                linewidth=linewidth,
                label=label,
                **kwargs,
            )
        except QhullError:
            prefix = f"Category {cat!r} skipped" if cat is not None else "Skipped"
            warnings.warn(
                f"{prefix}: unable to compute convex hull (points may be collinear)",
                UserWarning,
                stacklevel=2,
            )

    return ax