Elliptic Fourier Analysis#

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA

from ktch.datasets import load_outline_mosquito_wings
from ktch.harmonic import EllipticFourierAnalysis
from ktch.plot import plot_explained_variance_ratio

Load mosquito wing outline dataset#

from Rohlf and Archie 1984 Syst. Zool.

data_outline_mosquito_wings = load_outline_mosquito_wings(as_frame=True)
data_outline_mosquito_wings.coords
x y
specimen_id coord_id
1 0 0.99973 0.00000
1 0.81881 0.05151
2 0.70063 0.08851
3 0.61179 0.11670
4 0.53226 0.13666
... ... ... ...
126 95 0.68167 -0.22149
96 0.76135 -0.19548
97 0.90752 -0.17312
98 0.95720 -0.12093
99 0.95964 -0.06038

12600 rows × 2 columns

coords = data_outline_mosquito_wings.coords.to_numpy().reshape(-1, 100, 2)
fig, ax = plt.subplots()
sns.lineplot(x=coords[0][:, 0], y=coords[0][:, 1], sort=False, estimator=None, ax=ax)
ax.set_aspect("equal")
../../_images/49e2420554882e52543f1290a8a33ba397822d42f76cabdefe24de1a10b21f47.png
fig, ax = plt.subplots(figsize=(10, 10))
sns.lineplot(
    data=data_outline_mosquito_wings.coords,
    x="x",
    y="y",
    hue="specimen_id",
    sort=False,
    estimator=None,
    ax=ax,
)
ax.set_aspect("equal")
../../_images/ce1b1a320380b6839998080a85b207f31031cafd221dbc32bbea8b5c0c497c1a.png

EFA#

efa = EllipticFourierAnalysis(n_harmonics=20)
coef = efa.fit_transform(coords)

PCA#

pca = PCA(n_components=12)
pcscores = pca.fit_transform(coef)
df_pca = pd.DataFrame(pcscores)
df_pca["specimen_id"] = [i for i in range(1, len(pcscores) + 1)]
df_pca = df_pca.set_index("specimen_id")
df_pca = df_pca.join(data_outline_mosquito_wings.meta)
df_pca = df_pca.rename(columns={i: ("PC" + str(i + 1)) for i in range(12)})
df_pca
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 genus
specimen_id
1 0.004277 -0.019211 0.018697 0.000810 -0.001363 -0.015705 0.007489 -0.000853 -0.004332 0.000064 -0.001233 -0.000509 AN
2 -0.034207 0.019829 0.000914 0.002444 0.004912 0.004622 0.002922 -0.000719 -0.008601 -0.003872 -0.002954 0.000656 AN
3 -0.199464 -0.031161 -0.045073 0.008504 -0.004950 0.006258 0.001273 0.004424 -0.003027 -0.000986 0.002781 -0.001875 AN
4 -0.248697 0.035792 -0.019214 -0.000370 0.007445 -0.009597 0.004925 0.001945 0.001729 -0.001801 0.001553 -0.001683 AN
5 0.151898 -0.057919 -0.009257 0.009022 0.002941 0.009306 0.003443 -0.001652 -0.001026 -0.004647 -0.003179 0.000502 AN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
122 -0.084827 0.032089 -0.023891 0.000607 0.007980 -0.003300 0.000688 0.002950 0.003434 -0.001497 0.002059 0.000181 CX
123 -0.096891 -0.043715 0.000709 -0.007439 -0.005802 -0.003579 0.005781 0.000594 0.006692 0.001221 -0.000017 0.001236 CX
124 -0.119387 0.016518 -0.031522 -0.021842 0.001695 0.002130 0.002902 0.003965 0.000240 -0.001915 -0.001795 0.000932 CX
125 -0.059915 0.046869 0.014093 -0.002840 0.008363 -0.001264 0.001011 0.000412 0.003208 0.004421 -0.000136 0.000643 CX
126 -0.035477 0.031945 0.008483 -0.019398 -0.002612 0.005169 0.004838 -0.000205 0.002391 -0.002302 0.000625 -0.001556 DE

126 rows × 13 columns

fig, ax = plt.subplots()
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", ax=ax, palette="Paired")
<Axes: xlabel='PC1', ylabel='PC2'>
../../_images/ae2cb706d04db4b6eff6432e5af4efe9f18a4b84fac995883e98dc220b1248f3.png

Morphospace#

def get_pc_scores_for_morphospace(ax, num=5):
    xrange = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1], num)
    yrange = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1], num)
    return xrange, yrange


def plot_recon_morphs(
    pca,
    efa,
    fig,
    ax,
    n_PCs_xy=[1, 2],
    morph_num=3,
    morph_scale=1.0,
    morph_color="lightgray",
    morph_alpha=0.7,
):
    pc_scores_h, pc_scores_v = get_pc_scores_for_morphospace(ax, morph_num)
    print("PC_h: ", pc_scores_h, ", PC_v: ", pc_scores_v)
    for pc_score_h in pc_scores_h:
        for pc_score_v in pc_scores_v:
            pc_score = np.zeros(pca.n_components_)
            n_PC_h, n_PC_v = n_PCs_xy
            pc_score[n_PC_h - 1] = pc_score_h
            pc_score[n_PC_v - 1] = pc_score_v

            arr_coef = pca.inverse_transform([pc_score])

            ax_width = ax.get_window_extent().width
            fig_width = fig.get_window_extent().width
            fig_height = fig.get_window_extent().height
            morph_size = morph_scale * ax_width / (fig_width * morph_num)
            loc = ax.transData.transform((pc_score_h, pc_score_v))
            axins = fig.add_axes(
                [
                    loc[0] / fig_width - morph_size / 2,
                    loc[1] / fig_height - morph_size / 2,
                    morph_size,
                    morph_size,
                ],
                anchor="C",
            )

            coords = efa.inverse_transform(arr_coef)
            x = coords[0][:, 0]
            y = coords[0][:, 1]

            axins.plot(
                x.astype(float), y.astype(float), color=morph_color, alpha=morph_alpha
            )
            axins.axis("equal")
            axins.axis("off")
fig = plt.figure(figsize=(16, 16), dpi=200)

ax = fig.add_subplot(2, 2, 1)
sns.scatterplot(
    data=df_pca,
    x="PC1",
    y="PC2",
    hue="genus",
    hue_order=None,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(pca, efa, morph_num=5, morph_scale=0.5, fig=fig, ax=ax)
PC_h:  [-0.43098724 -0.23129168 -0.03159612  0.16809944  0.367795  ] , PC_v:  [-0.08749144 -0.0333772   0.02073705  0.0748513   0.12896555]
../../_images/349f47266ef4ee0e997e1a1cc5dd9be0762440bc2221113e63acc1653bd815db.png
morph_num = 5
morph_scale = 0.8
morph_color = "gray"
morph_alpha = 0.8

hue_order = df_pca["genus"].unique()

fig = plt.figure(figsize=(16, 16), dpi=200)

#########
# PC1
#########
ax = fig.add_subplot(2, 2, 1)
sns.scatterplot(
    data=df_pca,
    x="PC1",
    y="PC2",
    hue="genus",
    hue_order=hue_order,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(
    pca,
    efa,
    morph_num=5,
    morph_scale=morph_scale,
    morph_color=morph_color,
    morph_alpha=morph_alpha,
    fig=fig,
    ax=ax,
)

ax.patch.set_alpha(0)
ax.set(xlabel="PC1", ylabel="PC2")

print("PC1-PC2 done")

#########
# PC2
#########
ax = fig.add_subplot(2, 2, 2)
sns.scatterplot(
    data=df_pca,
    x="PC2",
    y="PC3",
    hue="genus",
    hue_order=hue_order,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(
    pca,
    efa,
    morph_num=5,
    morph_scale=morph_scale,
    morph_color=morph_color,
    morph_alpha=morph_alpha,
    fig=fig,
    ax=ax,
    n_PCs_xy=[2, 3],
)

ax.patch.set_alpha(0)
ax.set(xlabel="PC2", ylabel="PC3")

print("PC2-PC3 done")

#########
# PC3
#########
ax = fig.add_subplot(2, 2, 3)
sns.scatterplot(
    data=df_pca,
    x="PC3",
    y="PC1",
    hue="genus",
    hue_order=hue_order,
    palette="Paired",
    ax=ax,
    legend=True,
)

plot_recon_morphs(
    pca,
    efa,
    morph_num=5,
    morph_scale=morph_scale,
    morph_color=morph_color,
    morph_alpha=morph_alpha,
    fig=fig,
    ax=ax,
    n_PCs_xy=[3, 1],
)

ax.patch.set_alpha(0)
ax.set(xlabel="PC3", ylabel="PC1")

print("PC3-PC1 done")

#########
# CCR
#########

ax = fig.add_subplot(2, 2, 4)
plot_explained_variance_ratio(pca, ax=ax, verbose=True)
PC_h:  [-0.43098724 -0.23129168 -0.03159612  0.16809944  0.367795  ] , PC_v:  [-0.08749144 -0.0333772   0.02073705  0.0748513   0.12896555]
PC1-PC2 done
PC_h:  [-0.08749144 -0.0333772   0.02073705  0.0748513   0.12896555] , PC_v:  [-0.06424967 -0.0314274   0.00139486  0.03421713  0.0670394 ]
PC2-PC3 done
PC_h:  [-0.06424967 -0.0314274   0.00139486  0.03421713  0.0670394 ] , PC_v:  [-0.43098724 -0.23129168 -0.03159612  0.16809944  0.367795  ]
PC3-PC1 done
Explained variance ratio:
['PC1 0.8973793082876147', 'PC2 0.06795826373299034', 'PC3 0.023626937292117314', 'PC4 0.003773503228718847', 'PC5 0.002928302449990927', 'PC6 0.001256061508173971', 'PC7 0.0007323203596517956', 'PC8 0.0005507118631158467', 'PC9 0.00043565710337472723', 'PC10 0.00030767134202518213', 'PC11 0.00022663508829736778', 'PC12 0.00015100902428697937']
Cumsum of Explained variance ratio:
['PC1 0.8973793082876147', 'PC2 0.965337572020605', 'PC3 0.9889645093127223', 'PC4 0.9927380125414411', 'PC5 0.995666314991432', 'PC6 0.9969223764996059', 'PC7 0.9976546968592577', 'PC8 0.9982054087223735', 'PC9 0.9986410658257482', 'PC10 0.9989487371677733', 'PC11 0.9991753722560707', 'PC12 0.9993263812803577']
<Axes: >
../../_images/aa4a2eed6abf21608f7ea09b7ce16637d43341a831065e3e2955d0eae954bf9c.png