"""Chain code file I/O functions."""
# Copyright 2025 Koji Noshita
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import warnings
from dataclasses import dataclass
from pathlib import Path
import numpy as np
import pandas as pd
from ._protocols import MorphoDataMixin
_DIRECTIONS = np.array(
[
[1, 0], # 0: right
[1, -1], # 1: up-right
[0, -1], # 2: up
[-1, -1], # 3: up-left
[-1, 0], # 4: left
[-1, 1], # 5: down-left
[0, 1], # 6: down
[1, 1], # 7: down-right
]
)
def _chain_code_area(chain_code):
"""Compute enclosed area in pixels from a chain code.
Coordinates represent pixel centers. The polygon area from
the Shoelace formula is converted to pixel count using Pick's theorem:
``pixel_count = polygon_area + boundary_points / 2 + 1``.
Each chain code step (orthogonal or diagonal) has gcd(|dx|, |dy|) = 1,
so there are no intermediate lattice points on edges, and
``boundary_points = len(chain_code)``.
Parameters
----------
chain_code : np.ndarray
Chain code sequence with values from 0 to 7.
Returns
-------
area : int
Enclosed area in pixels.
"""
steps = _DIRECTIONS[chain_code]
coords = np.vstack([np.zeros((1, 2)), np.cumsum(steps, axis=0)])
x, y = coords[:, 0], coords[:, 1]
polygon_area = 0.5 * abs(np.sum(x[:-1] * y[1:] - x[1:] * y[:-1]))
boundary_points = len(chain_code)
return int(round(polygon_area + boundary_points / 2 + 1))
[docs]
@dataclass(repr=False)
class ChainCodeData(MorphoDataMixin):
"""Chain code data class.
Chain codes represent 2D contours using directional codes from 0 to 7::
3 2 1
4 * 0
5 6 7
Parameters
----------
specimen_name : str
Specimen name.
x : float
X coordinate.
y : float
Y coordinate.
area_per_pixel : float
Area (mm2) per pixel.
chain_code : np.ndarray
Chain code sequence with values from 0 to 7 representing directions.
area_pixels : int or None, optional
Area in pixels.
If None, computed from the chain code using the Shoelace formula.
If provided and differs from the computed value,
a warning is issued and the computed value is used.
"""
specimen_name: str
x: float
y: float
area_per_pixel: float
chain_code: np.ndarray
area_pixels: int | None = None
@property
def sample_name(self) -> str:
"""Deprecated. Use ``specimen_name`` instead."""
warnings.warn(
"sample_name is deprecated, use specimen_name",
DeprecationWarning,
stacklevel=2,
)
return self.specimen_name
def _repr_detail(self):
return f"n_points={len(self.chain_code)}"
def __post_init__(self):
if not isinstance(self.chain_code, np.ndarray):
self.chain_code = np.array(self.chain_code)
self._validate_chain_code()
computed_area = _chain_code_area(self.chain_code)
if self.area_pixels is not None and self.area_pixels != computed_area:
warnings.warn(
f"area_pixels ({self.area_pixels}) differs from the value "
f"computed from chain code ({computed_area}). "
f"Using computed value.",
stacklevel=2,
)
self.area_pixels = computed_area
def _validate_chain_code(self):
"""Validate that chain code values are between 0 and 7."""
if not np.all((self.chain_code >= 0) & (self.chain_code <= 7)):
invalid_values = self.chain_code[
(self.chain_code < 0) | (self.chain_code > 7)
]
raise ValueError(
f"Chain code contains invalid values: {invalid_values}. "
f"Values must be between 0 and 7 (inclusive)."
)
[docs]
def get_chain_code(self):
"""Get the raw chain code as a numpy array.
Returns
-------
chain_code : np.ndarray
Raw chain code values (0-7) representing directions.
"""
return self.chain_code
[docs]
def to_numpy(self):
"""Convert chain code to 2D coordinates as a numpy array.
The chain code is converted to a sequence of 2D coordinates,
starting from ``(x, y)`` and applying the directional changes
based on the chain code values. Displacements are scaled by
``sqrt(area_per_pixel)``; when ``area_per_pixel <= 0`` (e.g.
no scale marker was set in SHAPE), pixel units are used
(scale factor = 1).
The returned coordinates are in image coordinates where X
increases rightward and Y increases downward. The starting
point ``(x, y)`` and displacements share the same coordinate
system (pixels when ``area_per_pixel <= 0``, physical units
otherwise).
Chain codes represent 2D contours using directional codes from 0 to 7::
3 2 1
4 * 0
5 6 7
Returns
-------
coords : np.ndarray
2D coordinates with shape (n, 2) where n is the number of points.
The first column is the x-coordinate and the second column is the y-coordinate.
"""
steps = _DIRECTIONS[self.chain_code]
coords = np.vstack([np.zeros((1, 2)), np.cumsum(steps, axis=0)])
if self.area_per_pixel > 0:
scale_factor = np.sqrt(self.area_per_pixel)
coords *= scale_factor
coords[:, 0] += self.x
coords[:, 1] += self.y
return coords
[docs]
def to_dataframe(self):
"""Convert chain code to 2D coordinates as a pandas DataFrame.
The chain code is converted to a sequence of 2D coordinates,
starting from (0, 0) and applying the directional changes
based on the chain code values. The coordinates are scaled
using the area_per_pixel value.
Chain codes represent 2D contours using directional codes from 0 to 7::
3 2 1
4 * 0
5 6 7
Returns
-------
df : pd.DataFrame
DataFrame with x and y columns for the coordinates and chain_code
column for the direction codes. The first point has chain_code=-1
since it has no direction.
"""
coords = self.to_numpy()
chain_code_values = np.zeros(len(coords), dtype=int)
chain_code_values[0] = -1 # First point has no direction
chain_code_values[1:] = self.chain_code # Remaining points have directions
df = pd.DataFrame(
{
"x": coords[:, 0],
"y": coords[:, 1],
"chain_code": chain_code_values,
},
index=pd.MultiIndex.from_tuples(
[[self.specimen_name, i] for i in range(len(coords))],
name=["specimen_id", "coord_id"],
),
)
return df
[docs]
def read_chc(file_path, as_frame=False):
"""Read chain code (.chc) file.
Chain codes represent 2D contours using directional codes from 0 to 7::
3 2 1
4 * 0
5 6 7
The chain code file format is:
[Sample name] [X] [Y] [Area (mm2) per pixel] [Area (pixels)] [Chain code] -1
Parameters
----------
file_path : str
Path to the chain code file.
as_frame : bool, default=False
If True, return a :class:`~pandas.DataFrame`.
Returns
-------
result : ChainCodeData or list of ChainCodeData or pd.DataFrame
If a single record and ``as_frame=False``, returns a
:class:`ChainCodeData`. If multiple records and
``as_frame=False``, returns a list of :class:`ChainCodeData`.
If ``as_frame=True``, returns a concatenated DataFrame.
"""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"{path} does not exist.")
if path.suffix.lower() != ".chc":
raise ValueError(f"{path} is not a chain code file.")
chc_data_list = _read_chc(path)
if as_frame:
dfs = [chc_data.to_dataframe() for chc_data in chc_data_list]
if len(dfs) == 1:
return dfs[0]
return pd.concat(dfs)
if len(chc_data_list) == 1:
return chc_data_list[0]
return chc_data_list
[docs]
def write_chc(
file_path,
chain_codes,
sample_names=None,
xs=None,
ys=None,
area_per_pixels=None,
area_pixels=None,
):
"""Write chain code to .chc file.
Chain codes represent 2D contours using directional codes from 0 to 7::
3 2 1
4 * 0
5 6 7
The chain code file format is:
[Sample name] [X] [Y] [Area (mm2) per pixel] [Area (pixels)] [Chain code] -1
Parameters
----------
file_path : str
Path to the chain code file.
chain_codes : list of np.ndarray or np.ndarray
Chain codes with values from 0 to 7 representing directions.
sample_names : list of str or str, optional
Sample names.
xs : list of float or float, optional
X coordinates.
ys : list of float or float, optional
Y coordinates.
area_per_pixels : list of float or float, optional
Area (mm2) per pixel.
area_pixels : list of int or int, optional
Area in pixels.
"""
path = Path(file_path)
if isinstance(chain_codes, np.ndarray):
if chain_codes.ndim == 1:
chain_codes = [chain_codes]
elif chain_codes.ndim == 2:
chain_codes = [chain_codes[i, :] for i in range(chain_codes.shape[0])]
else:
raise ValueError("chain_codes must be a 1D or 2D array.")
elif not isinstance(chain_codes, list):
raise ValueError("chain_codes must be a list of numpy arrays or a numpy array.")
n_samples = len(chain_codes)
if sample_names is None:
sample_names = ["Sample"] * n_samples
elif isinstance(sample_names, str):
sample_names = [sample_names] * n_samples
if xs is None:
xs = [0] * n_samples
elif isinstance(xs, (int, float)):
xs = [xs] * n_samples
if ys is None:
ys = [0] * n_samples
elif isinstance(ys, (int, float)):
ys = [ys] * n_samples
if area_per_pixels is None:
area_per_pixels = [1.0] * n_samples
elif isinstance(area_per_pixels, (int, float)):
area_per_pixels = [area_per_pixels] * n_samples
if area_pixels is None:
area_pixels = [None] * n_samples
elif isinstance(area_pixels, int):
area_pixels = [area_pixels] * n_samples
for name, lst in [
("sample_names", sample_names),
("xs", xs),
("ys", ys),
("area_per_pixels", area_per_pixels),
("area_pixels", area_pixels),
]:
if len(lst) != n_samples:
raise ValueError(
f"Length of {name} ({len(lst)}) does not match "
f"number of chain codes ({n_samples})."
)
chc_data_list = []
for i in range(n_samples):
chc_data_list.append(
ChainCodeData(
specimen_name=sample_names[i],
x=xs[i],
y=ys[i],
area_per_pixel=area_per_pixels[i],
chain_code=chain_codes[i],
area_pixels=area_pixels[i],
)
)
_write_chc(path, chc_data_list)
def _read_chc(file_path):
"""Read chain code file.
Record consists of a header followed by chain code values terminated by ``-1``.
The header and chain code may appear on a single line
or be split across multiple lines (as documented in the SHAPE manual).
Single-line::
[Sample name] [X] [Y] [Area per pixel] [Area (pixels)] [Chain code...] -1
Multi-line::
[Sample name] [X] [Y] [Area per pixel] [Area (pixels)]
[Chain code...] -1
Parameters
----------
file_path : str or Path
Path to the chain code file.
Returns
-------
chc_data_list : list of ChainCodeData
Chain code data.
"""
chc_data_list = []
with open(file_path, "r") as f:
tokens = []
for line in f:
line = line.strip()
if not line:
continue
tokens.extend(line.split())
# Parse tokens: each record ends with the sentinel "-1"
pos = 0
while pos < len(tokens):
# Need at least header (5 fields) + 1 chain code value + sentinel
if pos + 6 >= len(tokens):
remaining = tokens[pos:]
if remaining:
raise ValueError(
f"Incomplete record at end of {file_path}: {' '.join(remaining)!r}"
)
break
sample_name = tokens[pos]
x = float(tokens[pos + 1])
y = float(tokens[pos + 2])
area_per_pixel = float(tokens[pos + 3])
area_pixels = int(tokens[pos + 4])
pos += 5
# Collect chain code values until sentinel "-1"
cc_values = []
found_sentinel = False
while pos < len(tokens):
if tokens[pos] == "-1":
pos += 1
found_sentinel = True
break
cc_values.append(int(tokens[pos]))
pos += 1
if not found_sentinel:
# No sentinel: treat all remaining values as chain code
pass
if not cc_values:
raise ValueError(
f"Empty chain code for record {sample_name!r} in {file_path}"
)
chc_data = ChainCodeData(
specimen_name=sample_name,
x=x,
y=y,
area_per_pixel=area_per_pixel,
chain_code=np.array(cc_values, dtype=int),
area_pixels=area_pixels,
)
chc_data_list.append(chc_data)
return chc_data_list
def _write_chc(file_path, chc_data_list):
"""Write chain code data to a file.
Parameters
----------
file_path : str or Path
Path to the chain code file.
chc_data_list : list of ChainCodeData
Chain code data.
"""
with open(file_path, "w") as f:
for chc_data in chc_data_list:
f.write(
f"{chc_data.specimen_name} {chc_data.x} {chc_data.y} "
f"{chc_data.area_per_pixel} {chc_data.area_pixels} "
)
f.write(" ".join(map(str, chc_data.chain_code.tolist())))
f.write(" -1\n")