.ipynb

Elliptic Fourier Analysis#

See also

For cases where automatic normalization is not suitable, see 2D Outline Registration.

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA

from ktch.datasets import load_outline_mosquito_wings
from ktch.harmonic import EllipticFourierAnalysis
from ktch.plot import explained_variance_ratio_plot

Load mosquito wing outline dataset#

from Rohlf and Archie 1984 Syst. Zool.

data_outline_mosquito_wings = load_outline_mosquito_wings(as_frame=True)
data_outline_mosquito_wings.coords
x y
specimen_id coord_id
1 0 0.99973 0.00000
1 0.81881 0.05151
2 0.70063 0.08851
3 0.61179 0.11670
4 0.53226 0.13666
... ... ... ...
126 95 0.68167 -0.22149
96 0.76135 -0.19548
97 0.90752 -0.17312
98 0.95720 -0.12093
99 0.95964 -0.06038

12600 rows × 2 columns

coords = data_outline_mosquito_wings.coords.to_numpy().reshape(-1, 100, 2)
fig, ax = plt.subplots()
sns.lineplot(x=coords[0][:, 0], y=coords[0][:, 1], sort=False, estimator=None, ax=ax)
ax.set_aspect("equal")
../../_images/83df30501c29d88d38212696bf11e14183bee1d70cdaf27d0ae0dc04f62b0d36.png
fig, ax = plt.subplots(figsize=(10, 10))
sns.lineplot(
    data=data_outline_mosquito_wings.coords,
    x="x",
    y="y",
    hue="specimen_id",
    sort=False,
    estimator=None,
    ax=ax,
)
ax.set_aspect("equal")
../../_images/538a320ffc1fa5edbc47e73a39c471546362fe85035d903b42497defd930e646.png

EFA#

efa = EllipticFourierAnalysis(n_harmonics=20)
coef = efa.fit_transform(coords)

PCA#

pca = PCA(n_components=12)
pcscores = pca.fit_transform(coef)
df_pca = pd.DataFrame(pcscores)
df_pca["specimen_id"] = [i for i in range(1, len(pcscores) + 1)]
df_pca = df_pca.set_index("specimen_id")
df_pca = df_pca.join(data_outline_mosquito_wings.meta)
df_pca = df_pca.rename(columns={i: ("PC" + str(i + 1)) for i in range(12)})
df_pca
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 genus
specimen_id
1 0.004277 -0.019211 0.018697 0.000810 -0.001363 -0.015705 0.007489 -0.000853 -0.004332 0.000064 -0.001233 -0.000509 AN
2 -0.034207 0.019829 0.000914 0.002444 0.004912 0.004622 0.002922 -0.000719 -0.008601 -0.003872 -0.002954 0.000656 AN
3 -0.199464 -0.031161 -0.045073 0.008504 -0.004950 0.006258 0.001273 0.004424 -0.003027 -0.000986 0.002781 -0.001875 AN
4 -0.248697 0.035792 -0.019214 -0.000370 0.007445 -0.009597 0.004925 0.001945 0.001729 -0.001801 0.001553 -0.001683 AN
5 0.151898 -0.057919 -0.009257 0.009022 0.002941 0.009306 0.003443 -0.001652 -0.001026 -0.004647 -0.003179 0.000502 AN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
122 -0.084827 0.032089 -0.023891 0.000607 0.007980 -0.003300 0.000688 0.002950 0.003434 -0.001497 0.002059 0.000181 CX
123 -0.096891 -0.043715 0.000709 -0.007439 -0.005802 -0.003579 0.005781 0.000594 0.006692 0.001221 -0.000017 0.001236 CX
124 -0.119387 0.016518 -0.031522 -0.021842 0.001695 0.002130 0.002902 0.003965 0.000240 -0.001915 -0.001795 0.000932 CX
125 -0.059915 0.046869 0.014093 -0.002840 0.008363 -0.001264 0.001011 0.000412 0.003208 0.004421 -0.000136 0.000643 CX
126 -0.035477 0.031945 0.008483 -0.019398 -0.002612 0.005169 0.004838 -0.000205 0.002391 -0.002302 0.000625 -0.001556 DE

126 rows × 13 columns

fig, ax = plt.subplots()
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", ax=ax, palette="Paired")
<Axes: xlabel='PC1', ylabel='PC2'>
../../_images/748eb70f4515519881d6fda7abadecccfa8bf92dbdaaff5acba240828c3fafa4.png

Morphospace#

def get_pc_scores_for_morphospace(ax, num=5):
    xrange = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1], num)
    yrange = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1], num)
    return xrange, yrange


def plot_recon_morphs(
    pca,
    efa,
    fig,
    ax,
    n_PCs_xy=[1, 2],
    morph_num=3,
    morph_scale=1.0,
    morph_color="lightgray",
    morph_alpha=0.7,
):
    pc_scores_h, pc_scores_v = get_pc_scores_for_morphospace(ax, morph_num)
    print("PC_h: ", pc_scores_h, ", PC_v: ", pc_scores_v)
    for pc_score_h in pc_scores_h:
        for pc_score_v in pc_scores_v:
            pc_score = np.zeros(pca.n_components_)
            n_PC_h, n_PC_v = n_PCs_xy
            pc_score[n_PC_h - 1] = pc_score_h
            pc_score[n_PC_v - 1] = pc_score_v

            arr_coef = pca.inverse_transform([pc_score])

            ax_width = ax.get_window_extent().width
            fig_width = fig.get_window_extent().width
            fig_height = fig.get_window_extent().height
            morph_size = morph_scale * ax_width / (fig_width * morph_num)
            loc = ax.transData.transform((pc_score_h, pc_score_v))
            axins = fig.add_axes(
                [
                    loc[0] / fig_width - morph_size / 2,
                    loc[1] / fig_height - morph_size / 2,
                    morph_size,
                    morph_size,
                ],
                anchor="C",
            )

            coords = efa.inverse_transform(arr_coef)
            x = coords[0][:, 0]
            y = coords[0][:, 1]

            axins.plot(
                x.astype(float), y.astype(float), color=morph_color, alpha=morph_alpha
            )
            axins.axis("equal")
            axins.axis("off")
fig = plt.figure(figsize=(16, 16), dpi=200)

ax = fig.add_subplot(2, 2, 1)
sns.scatterplot(
    data=df_pca,
    x="PC1",
    y="PC2",
    hue="genus",
    hue_order=None,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(pca, efa, morph_num=5, morph_scale=0.5, fig=fig, ax=ax)
PC_h:  [-0.43098724 -0.23129168 -0.03159612  0.16809944  0.367795  ] , PC_v:  [-0.08749144 -0.0333772   0.02073705  0.0748513   0.12896555]
../../_images/919bb3a357ef7c594056e34ab51c1c49e855adb1c5af135d113e0cbdf28816d4.png
morph_num = 5
morph_scale = 0.8
morph_color = "gray"
morph_alpha = 0.8

hue_order = df_pca["genus"].unique()

fig = plt.figure(figsize=(16, 16), dpi=200)

#########
# PC1
#########
ax = fig.add_subplot(2, 2, 1)
sns.scatterplot(
    data=df_pca,
    x="PC1",
    y="PC2",
    hue="genus",
    hue_order=hue_order,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(
    pca,
    efa,
    morph_num=5,
    morph_scale=morph_scale,
    morph_color=morph_color,
    morph_alpha=morph_alpha,
    fig=fig,
    ax=ax,
)

ax.patch.set_alpha(0)
ax.set(xlabel="PC1", ylabel="PC2")

print("PC1-PC2 done")

#########
# PC2
#########
ax = fig.add_subplot(2, 2, 2)
sns.scatterplot(
    data=df_pca,
    x="PC2",
    y="PC3",
    hue="genus",
    hue_order=hue_order,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(
    pca,
    efa,
    morph_num=5,
    morph_scale=morph_scale,
    morph_color=morph_color,
    morph_alpha=morph_alpha,
    fig=fig,
    ax=ax,
    n_PCs_xy=[2, 3],
)

ax.patch.set_alpha(0)
ax.set(xlabel="PC2", ylabel="PC3")

print("PC2-PC3 done")

#########
# PC3
#########
ax = fig.add_subplot(2, 2, 3)
sns.scatterplot(
    data=df_pca,
    x="PC3",
    y="PC1",
    hue="genus",
    hue_order=hue_order,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(
    pca,
    efa,
    morph_num=5,
    morph_scale=morph_scale,
    morph_color=morph_color,
    morph_alpha=morph_alpha,
    fig=fig,
    ax=ax,
    n_PCs_xy=[3, 1],
)

ax.patch.set_alpha(0)
ax.set(xlabel="PC3", ylabel="PC1")

print("PC3-PC1 done")

#########
# CCR
#########

ax = fig.add_subplot(2, 2, 4)
explained_variance_ratio_plot(pca, ax=ax, verbose=True)
PC_h:  [-0.43098724 -0.23129168 -0.03159612  0.16809944  0.367795  ] , PC_v:  [-0.08749144 -0.0333772   0.02073705  0.0748513   0.12896555]
PC1-PC2 done
PC_h:  [-0.08749144 -0.0333772   0.02073705  0.0748513   0.12896555] , PC_v:  [-0.06424967 -0.0314274   0.00139486  0.03421713  0.0670394 ]
PC2-PC3 done
PC_h:  [-0.06424967 -0.0314274   0.00139486  0.03421713  0.0670394 ] , PC_v:  [-0.43098724 -0.23129168 -0.03159612  0.16809944  0.367795  ]
PC3-PC1 done
Explained variance ratio:
['PC1 0.8973793082876147', 'PC2 0.06795826373299034', 'PC3 0.023626937292117314', 'PC4 0.003773503228718847', 'PC5 0.002928302449990927', 'PC6 0.001256061508173971', 'PC7 0.0007323203596517956', 'PC8 0.0005507118631158467', 'PC9 0.00043565710337472723', 'PC10 0.00030767134202518213', 'PC11 0.00022663508829736778', 'PC12 0.00015100902428697937']
Cumsum of Explained variance ratio:
['PC1 0.8973793082876147', 'PC2 0.965337572020605', 'PC3 0.9889645093127223', 'PC4 0.9927380125414411', 'PC5 0.995666314991432', 'PC6 0.9969223764996059', 'PC7 0.9976546968592577', 'PC8 0.9982054087223735', 'PC9 0.9986410658257482', 'PC10 0.9989487371677733', 'PC11 0.9991753722560707', 'PC12 0.9993263812803577']
<Axes: >
../../_images/aeb8ad98d5fdc00b140584e91c27045b99a22c6994eff853a5266b926afa9c85.png