Elliptic Fourier Analysis#

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA

from ktch.datasets import load_outline_mosquito_wings
from ktch.outline import EllipticFourierAnalysis

Load mosquito wing outline dataset#

from Rohlf and Archie 1984 Syst. Zool.

data_outline_mosquito_wings = load_outline_mosquito_wings(as_frame=True)
data_outline_mosquito_wings.coords
x y
specimen_id coord_id
1 0 0.99973 0.00000
1 0.81881 0.05151
2 0.70063 0.08851
3 0.61179 0.11670
4 0.53226 0.13666
... ... ... ...
126 95 0.68167 -0.22149
96 0.76135 -0.19548
97 0.90752 -0.17312
98 0.95720 -0.12093
99 0.95964 -0.06038

12600 rows × 2 columns

coords = data_outline_mosquito_wings.coords.to_numpy().reshape(-1,100,2)
fig, ax = plt.subplots()
sns.lineplot(x=coords[0][:,0], y=coords[0][:,1], sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')
../../_images/986ae4eeef66bce31d253e9b2bf4e6366c712ffb64f176073e957af5fa28fb20.png
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(
    data= data_outline_mosquito_wings.coords,x="x", y="y",
    hue="specimen_id",
    sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')
../../_images/9dbfa7ea3d3a1e77d6b331ce53b6167c4b656da272e2dda427adaeb6d6118947.png

EFA#

efa = EllipticFourierAnalysis(n_harmonics=20)
coef = efa.fit_transform(coords)

PCA#

pca = PCA(n_components=12)
pcscores = pca.fit_transform(coef)
df_pca = pd.DataFrame(pcscores)
df_pca["specimen_id"] = [i for i in range(1,len(pcscores)+1)]
df_pca = df_pca.set_index("specimen_id")
df_pca = df_pca.join(data_outline_mosquito_wings.meta)
df_pca = df_pca.rename(columns={i:("PC"+str(i+1)) for i in range(12)})
df_pca
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 genus
specimen_id
1 0.047296 -0.028614 -0.001061 -0.000310 0.013393 -0.010994 -0.001533 -0.002746 0.004097 -0.000186 0.001503 -0.000810 AN
2 0.028955 0.015756 0.001092 -0.003740 -0.006694 -0.002453 -0.002395 -0.001203 0.008363 -0.003959 0.001362 -0.000196 AN
3 -0.191432 0.001392 0.017447 0.001495 -0.006041 0.000175 0.004568 0.000639 0.003804 0.000880 -0.001539 0.003237 AN
4 -0.078667 0.048088 -0.002377 -0.001929 0.005109 -0.010823 0.002926 0.002686 -0.001388 0.000213 -0.000209 0.002098 AN
5 -0.035074 -0.050274 0.009340 -0.008051 -0.010825 -0.000951 -0.003192 -0.001052 0.001537 -0.005507 0.000756 0.003088 AN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
122 -0.073861 0.045384 -0.002446 -0.004791 0.000477 -0.004412 0.002848 0.003930 -0.002412 0.000114 -0.001704 0.000024 CX
123 -0.041873 -0.038377 0.000938 0.011287 0.001922 -0.006925 0.002899 -0.001107 -0.006483 0.000414 -0.000392 -0.001321 CX
124 -0.152010 0.041032 -0.016443 0.014469 -0.002731 -0.001113 0.004757 -0.001846 0.000681 -0.003672 -0.000681 -0.001829 CX
125 0.049972 0.036880 -0.012326 -0.004738 0.001212 -0.000433 0.003217 -0.002744 -0.003874 0.002662 0.000321 -0.000729 CX
126 0.067099 0.021569 -0.009049 0.017494 -0.008229 -0.005337 -0.000294 0.001003 -0.001838 -0.000874 0.000331 -0.000350 DE

126 rows × 13 columns

fig, ax = plt.subplots()
sns.scatterplot(data=df_pca, x="PC1", y = "PC2", hue="genus", ax=ax, palette="Paired")
<Axes: xlabel='PC1', ylabel='PC2'>
../../_images/b289e3f670262db9e662a29e20dec07db3a313fb9924cb912440e08a3796ef86.png

Morphospace#

def get_pc_scores_for_morphospace(ax, num = 5):
    xrange = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1],num)
    yrange = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1],num)
    return xrange, yrange

def plot_recon_morphs(pca, efa, hue, hue_order, fig, ax, 
             n_PCs_xy = [1, 2], lmax = 20, morph_num = 3, morph_scale = 1.0, morph_color = "lightgray", morph_alpha = 0.7, 
             standardized_by_1st_ellipsoid = False):
    
    pc_scores_h, pc_scores_v = get_pc_scores_for_morphospace(ax, morph_num)
    print("PC_h: ", pc_scores_h, ", PC_v: ", pc_scores_v)
    for pc_score_h in pc_scores_h:
        for pc_score_v in pc_scores_v:
            pc_score = np.zeros(pca.n_components_)
            n_PC_h, n_PC_v = n_PCs_xy
            pc_score[n_PC_h-1] = pc_score_h
            pc_score[n_PC_v-1] = pc_score_v

            arr_coef = pca.inverse_transform([pc_score])

            
            ax_width = ax.get_window_extent().width
            fig_width = fig.get_window_extent().width
            fig_height = fig.get_window_extent().height
            morph_size = morph_scale*ax_width/(fig_width*morph_num)
            loc = ax.transData.transform((pc_score_h, pc_score_v))
            axins = fig.add_axes([loc[0]/fig_width-morph_size/2, loc[1]/fig_height-morph_size/2,
                                       morph_size, morph_size], anchor='C')
            
            coords = efa.inverse_transform(arr_coef)
            x = coords[0][:,0]
            y = coords[0][:,1]
            
            axins.plot(x.astype(float),y.astype(float),color=morph_color, alpha = morph_alpha)
            axins.axis('equal')
            axins.axis('off') 
fig = plt.figure(figsize=(16,16),dpi=200)

ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=0.5,
                  hue=None,hue_order=None, 
                  fig = fig, ax = ax)
PC_h:  [-0.2400097  -0.11657721  0.00685527  0.13028775  0.25372023] , PC_v:  [-0.09298342 -0.03817918  0.01662506  0.07142931  0.12623355]
../../_images/f8bd4c9bf46bbc4a5290c6b6125c613937f3d2a9cf530429ab118b77aa5e67e7.png
morph_num = 5
morph_scale = 0.8
morph_color = "gray"
morph_alpha = 0.8

fig = plt.figure(figsize=(16,16),dpi=200)

#########
# PC1
#########
ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
                  hue=None,hue_order=None, 
                  morph_color=morph_color, morph_alpha=morph_alpha,
                  fig = fig, ax = ax)

sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None, 
                palette="Paired", ax = ax)

ax.patch.set_alpha(0)
ax.set(xlabel='PC1', ylabel='PC2')

print("PC1-PC2 done")

#########
# PC2
#########
ax = fig.add_subplot(2,2,2)
sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
                  hue=None,hue_order=None, 
                  morph_color=morph_color, morph_alpha=morph_alpha,
                  fig = fig, ax = ax, n_PCs_xy = [2, 3])

sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None, 
                palette="Paired", ax = ax)

ax.patch.set_alpha(0)
ax.set(xlabel='PC2', ylabel='PC3')

print("PC2-PC3 done")

#########
# PC3
#########
ax=fig.add_subplot(2,2,3)
sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
                  hue=None,hue_order=None, 
                  morph_color=morph_color, morph_alpha=morph_alpha,
                  fig = fig, ax = ax, n_PCs_xy = [3, 1])

sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None, 
                palette="Paired", ax = ax)

ax.patch.set_alpha(0)
ax.set(xlabel='PC3', ylabel='PC1')

print("PC3-PC1 done")

#########
# CCR
#########

fig.add_subplot(2,2,4)
sns.barplot(x=["PC"+str(i+1) for i in range(12)], y=pca.explained_variance_ratio_[0:16], color="gray")
PC_h:  [-0.2400097  -0.11657721  0.00685527  0.13028775  0.25372023] , PC_v:  [-0.09298342 -0.03817918  0.01662506  0.07142931  0.12623355]
PC1-PC2 done
PC_h:  [-0.09298342 -0.03817918  0.01662506  0.07142931  0.12623355] , PC_v:  [-0.0252314  -0.01161327  0.00200486  0.01562299  0.02924111]
PC2-PC3 done
PC_h:  [-0.0252314  -0.01161327  0.00200486  0.01562299  0.02924111] , PC_v:  [-0.2400097  -0.11657721  0.00685527  0.13028775  0.25372023]
PC3-PC1 done
<Axes: >
../../_images/928f480bfc1d169dff3261d3b46c5ec1a562f5121767fba73ab56af00fdcc394.png