Elliptic Fourier Analysis#

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA

from ktch.datasets import load_outline_mosquito_wings
from ktch.outline import EllipticFourierAnalysis
/tmp/ipykernel_2010/4263885994.py:2: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd

Load mosquito wing outline dataset#

from Rohlf and Archie 1984 Syst. Zool.

data_outline_mosquito_wings = load_outline_mosquito_wings(as_frame=True)
data_outline_mosquito_wings.coords
x y
specimen_id coord_id
1 0 0.99973 0.00000
1 0.81881 0.05151
2 0.70063 0.08851
3 0.61179 0.11670
4 0.53226 0.13666
... ... ... ...
126 95 0.68167 -0.22149
96 0.76135 -0.19548
97 0.90752 -0.17312
98 0.95720 -0.12093
99 0.95964 -0.06038

12600 rows × 2 columns

coords = data_outline_mosquito_wings.coords.to_numpy().reshape(-1,100,2)
fig, ax = plt.subplots()
sns.lineplot(x=coords[0][:,0], y=coords[0][:,1], sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')
../../_images/9cdb1935271f5e094c5728ded8398a4c448081701ee1c532a6f01e1c7b9a307b.png
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(
    data= data_outline_mosquito_wings.coords,x="x", y="y",
    hue="specimen_id",
    sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')
../../_images/05c3be25930d6b1f845a326e50a4c528597c987500f7936219e4a3c82a0c3d8c.png

EFA#

efa = EllipticFourierAnalysis(n_harmonics=20)
coef = efa.fit_transform(coords)

PCA#

pca = PCA(n_components=12)
pcscores = pca.fit_transform(coef)
df_pca = pd.DataFrame(pcscores)
df_pca["specimen_id"] = [i for i in range(1,len(pcscores)+1)]
df_pca = df_pca.set_index("specimen_id")
df_pca = df_pca.join(data_outline_mosquito_wings.meta)
df_pca = df_pca.rename(columns={i:("PC"+str(i+1)) for i in range(12)})
df_pca
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11 PC12 genus
specimen_id
1 0.047296 -0.028614 -0.001061 -0.000310 0.013393 -0.010994 -0.001533 -0.002746 0.004097 -0.000186 0.001503 -0.000810 AN
2 0.028955 0.015756 0.001092 -0.003740 -0.006694 -0.002453 -0.002395 -0.001203 0.008363 -0.003959 0.001362 -0.000196 AN
3 -0.191432 0.001392 0.017447 0.001495 -0.006041 0.000175 0.004568 0.000639 0.003804 0.000880 -0.001539 0.003237 AN
4 -0.078667 0.048088 -0.002377 -0.001929 0.005109 -0.010823 0.002926 0.002686 -0.001388 0.000213 -0.000209 0.002098 AN
5 -0.035074 -0.050274 0.009340 -0.008051 -0.010825 -0.000951 -0.003192 -0.001052 0.001537 -0.005507 0.000756 0.003088 AN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
122 -0.073861 0.045384 -0.002446 -0.004791 0.000477 -0.004412 0.002848 0.003930 -0.002412 0.000114 -0.001704 0.000024 CX
123 -0.041873 -0.038377 0.000938 0.011287 0.001922 -0.006925 0.002899 -0.001107 -0.006483 0.000414 -0.000392 -0.001321 CX
124 -0.152010 0.041032 -0.016443 0.014469 -0.002731 -0.001113 0.004757 -0.001846 0.000681 -0.003672 -0.000681 -0.001829 CX
125 0.049972 0.036880 -0.012326 -0.004738 0.001212 -0.000433 0.003217 -0.002744 -0.003874 0.002662 0.000321 -0.000729 CX
126 0.067099 0.021569 -0.009049 0.017494 -0.008229 -0.005337 -0.000294 0.001003 -0.001838 -0.000874 0.000331 -0.000350 DE

126 rows × 13 columns

fig, ax = plt.subplots()
sns.scatterplot(data=df_pca, x="PC1", y = "PC2", hue="genus", ax=ax, palette="Paired")
<Axes: xlabel='PC1', ylabel='PC2'>
../../_images/524896a27ba3f0427066240e62f9126f571a41305d7187aac10ccbae660da03a.png

Morphospace#

def get_pc_scores_for_morphospace(ax, num = 5):
    xrange = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1],num)
    yrange = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1],num)
    return xrange, yrange

def plot_recon_morphs(pca, efa, hue, hue_order, fig, ax, 
             n_PCs_xy = [1, 2], lmax = 20, morph_num = 3, morph_scale = 1.0, morph_color = "lightgray", morph_alpha = 0.7, 
             standardized_by_1st_ellipsoid = False):
    
    pc_scores_h, pc_scores_v = get_pc_scores_for_morphospace(ax, morph_num)
    print("PC_h: ", pc_scores_h, ", PC_v: ", pc_scores_v)
    for pc_score_h in pc_scores_h:
        for pc_score_v in pc_scores_v:
            pc_score = np.zeros(pca.n_components_)
            n_PC_h, n_PC_v = n_PCs_xy
            pc_score[n_PC_h-1] = pc_score_h
            pc_score[n_PC_v-1] = pc_score_v

            arr_coef = pca.inverse_transform([pc_score])

            
            ax_width = ax.get_window_extent().width
            fig_width = fig.get_window_extent().width
            fig_height = fig.get_window_extent().height
            morph_size = morph_scale*ax_width/(fig_width*morph_num)
            loc = ax.transData.transform((pc_score_h, pc_score_v))
            axins = fig.add_axes([loc[0]/fig_width-morph_size/2, loc[1]/fig_height-morph_size/2,
                                       morph_size, morph_size], anchor='C')
            
            coords = efa.inverse_transform(arr_coef)
            x = coords[0][:,0]
            y = coords[0][:,1]
            
            axins.plot(x.astype(float),y.astype(float),color=morph_color, alpha = morph_alpha)
            axins.axis('equal')
            axins.axis('off') 
fig = plt.figure(figsize=(16,16),dpi=200)

ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=0.5,
                  hue=None,hue_order=None, 
                  fig = fig, ax = ax)
PC_h:  [-0.2400097  -0.11657721  0.00685527  0.13028775  0.25372023] , PC_v:  [-0.09298342 -0.03817918  0.01662506  0.07142931  0.12623355]
../../_images/59aafcfb83e663c0370bacb6f25c9d9f5b20baa59262eaf65f37c243963d6a75.png
morph_num = 5
morph_scale = 0.8
morph_color = "gray"
morph_alpha = 0.8

fig = plt.figure(figsize=(16,16),dpi=200)

#########
# PC1
#########
ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
                  hue=None,hue_order=None, 
                  morph_color=morph_color, morph_alpha=morph_alpha,
                  fig = fig, ax = ax)

sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None, 
                palette="Paired", ax = ax)

ax.patch.set_alpha(0)
ax.set(xlabel='PC1', ylabel='PC2')

print("PC1-PC2 done")

#########
# PC2
#########
ax = fig.add_subplot(2,2,2)
sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
                  hue=None,hue_order=None, 
                  morph_color=morph_color, morph_alpha=morph_alpha,
                  fig = fig, ax = ax, n_PCs_xy = [2, 3])

sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None, 
                palette="Paired", ax = ax)

ax.patch.set_alpha(0)
ax.set(xlabel='PC2', ylabel='PC3')

print("PC2-PC3 done")

#########
# PC3
#########
ax=fig.add_subplot(2,2,3)
sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None, 
                palette="Paired", ax = ax, legend = True)

plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
                  hue=None,hue_order=None, 
                  morph_color=morph_color, morph_alpha=morph_alpha,
                  fig = fig, ax = ax, n_PCs_xy = [3, 1])

sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None, 
                palette="Paired", ax = ax)

ax.patch.set_alpha(0)
ax.set(xlabel='PC3', ylabel='PC1')

print("PC3-PC1 done")

#########
# CCR
#########

fig.add_subplot(2,2,4)
sns.barplot(x=["PC"+str(i+1) for i in range(12)], y=pca.explained_variance_ratio_[0:16], color="gray")
PC_h:  [-0.2400097  -0.11657721  0.00685527  0.13028775  0.25372023] , PC_v:  [-0.09298342 -0.03817918  0.01662506  0.07142931  0.12623355]
PC1-PC2 done
PC_h:  [-0.09298342 -0.03817918  0.01662506  0.07142931  0.12623355] , PC_v:  [-0.0252314  -0.01161327  0.00200486  0.01562299  0.02924111]
PC2-PC3 done
PC_h:  [-0.0252314  -0.01161327  0.00200486  0.01562299  0.02924111] , PC_v:  [-0.2400097  -0.11657721  0.00685527  0.13028775  0.25372023]
PC3-PC1 done
<Axes: >
../../_images/fc9b2845a867065a4ac21a7d202e02a71b28bf5b76c846469696ae6bc79a8d51.png