Elliptic Fourier Analysis#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from ktch.datasets import load_outline_mosquito_wings
from ktch.outline import EllipticFourierAnalysis
Load mosquito wing outline dataset#
from Rohlf and Archie 1984 Syst. Zool.
data_outline_mosquito_wings = load_outline_mosquito_wings(as_frame=True)
data_outline_mosquito_wings.coords
x | y | ||
---|---|---|---|
specimen_id | coord_id | ||
1 | 0 | 0.99973 | 0.00000 |
1 | 0.81881 | 0.05151 | |
2 | 0.70063 | 0.08851 | |
3 | 0.61179 | 0.11670 | |
4 | 0.53226 | 0.13666 | |
... | ... | ... | ... |
126 | 95 | 0.68167 | -0.22149 |
96 | 0.76135 | -0.19548 | |
97 | 0.90752 | -0.17312 | |
98 | 0.95720 | -0.12093 | |
99 | 0.95964 | -0.06038 |
12600 rows × 2 columns
coords = data_outline_mosquito_wings.coords.to_numpy().reshape(-1,100,2)
fig, ax = plt.subplots()
sns.lineplot(x=coords[0][:,0], y=coords[0][:,1], sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')

fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(
data= data_outline_mosquito_wings.coords,x="x", y="y",
hue="specimen_id",
sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')

EFA#
efa = EllipticFourierAnalysis(n_harmonics=20)
coef = efa.fit_transform(coords)
PCA#
pca = PCA(n_components=12)
pcscores = pca.fit_transform(coef)
df_pca = pd.DataFrame(pcscores)
df_pca["specimen_id"] = [i for i in range(1,len(pcscores)+1)]
df_pca = df_pca.set_index("specimen_id")
df_pca = df_pca.join(data_outline_mosquito_wings.meta)
df_pca = df_pca.rename(columns={i:("PC"+str(i+1)) for i in range(12)})
df_pca
PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 | genus | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
specimen_id | |||||||||||||
1 | -0.257313 | -0.093748 | 0.016349 | -0.002731 | 0.001431 | -0.015579 | 0.007754 | -0.000806 | -0.004022 | 0.000015 | -0.001270 | -0.000471 | AN |
2 | 0.084530 | 0.167501 | 0.002138 | 0.003125 | 0.003936 | 0.004930 | 0.002947 | -0.000856 | -0.008540 | -0.004009 | -0.003076 | 0.000560 | AN |
3 | -0.567882 | -0.331899 | -0.053529 | -0.008826 | 0.002041 | 0.006791 | 0.001164 | -0.004781 | -0.002162 | -0.000885 | 0.002780 | -0.001788 | AN |
4 | -0.749645 | 0.763909 | -0.023704 | 0.003482 | 0.009889 | -0.007432 | 0.005178 | -0.001529 | 0.002099 | -0.001644 | 0.001524 | -0.001603 | AN |
5 | 0.141708 | -1.207885 | -0.004927 | 0.000772 | 0.000314 | 0.008138 | 0.003166 | 0.001188 | -0.001228 | -0.004765 | -0.003182 | 0.000463 | AN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
122 | -0.095254 | 0.446578 | -0.022834 | 0.005526 | 0.008614 | -0.001661 | 0.000655 | -0.001871 | 0.003742 | -0.001391 | 0.002097 | 0.000211 | CX |
123 | -0.884751 | -0.120869 | -0.007592 | -0.003100 | -0.007955 | -0.004103 | 0.005769 | 0.000140 | 0.006750 | 0.001332 | 0.000022 | 0.001339 | CX |
124 | -0.231700 | 0.896062 | -0.039666 | 0.010685 | -0.008908 | 0.002171 | 0.002839 | -0.003981 | 0.001010 | -0.001815 | -0.001781 | 0.000888 | CX |
125 | -0.140596 | 0.667735 | 0.016898 | 0.007007 | 0.005828 | -0.000016 | 0.001037 | 0.000602 | 0.002910 | 0.004465 | -0.000094 | 0.000611 | CX |
126 | -0.037013 | 0.778983 | 0.006481 | 0.006582 | -0.012522 | 0.004344 | 0.004916 | 0.000398 | 0.002532 | -0.002234 | 0.000643 | -0.001508 | DE |
126 rows × 13 columns
fig, ax = plt.subplots()
sns.scatterplot(data=df_pca, x="PC1", y = "PC2", hue="genus", ax=ax, palette="Paired")
<Axes: xlabel='PC1', ylabel='PC2'>

Morphospace#
def get_pc_scores_for_morphospace(ax, num = 5):
xrange = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1],num)
yrange = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1],num)
return xrange, yrange
def plot_recon_morphs(pca, efa, hue, hue_order, fig, ax,
n_PCs_xy = [1, 2], lmax = 20, morph_num = 3, morph_scale = 1.0, morph_color = "lightgray", morph_alpha = 0.7,
standardized_by_1st_ellipsoid = False):
pc_scores_h, pc_scores_v = get_pc_scores_for_morphospace(ax, morph_num)
print("PC_h: ", pc_scores_h, ", PC_v: ", pc_scores_v)
for pc_score_h in pc_scores_h:
for pc_score_v in pc_scores_v:
pc_score = np.zeros(pca.n_components_)
n_PC_h, n_PC_v = n_PCs_xy
pc_score[n_PC_h-1] = pc_score_h
pc_score[n_PC_v-1] = pc_score_v
arr_coef = pca.inverse_transform([pc_score])
ax_width = ax.get_window_extent().width
fig_width = fig.get_window_extent().width
fig_height = fig.get_window_extent().height
morph_size = morph_scale*ax_width/(fig_width*morph_num)
loc = ax.transData.transform((pc_score_h, pc_score_v))
axins = fig.add_axes([loc[0]/fig_width-morph_size/2, loc[1]/fig_height-morph_size/2,
morph_size, morph_size], anchor='C')
coords = efa.inverse_transform(arr_coef)
x = coords[0][:,0]
y = coords[0][:,1]
axins.plot(x.astype(float),y.astype(float),color=morph_color, alpha = morph_alpha)
axins.axis('equal')
axins.axis('off')
fig = plt.figure(figsize=(16,16),dpi=200)
ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=0.5,
hue=None,hue_order=None,
fig = fig, ax = ax)
PC_h: [-1.95970887 -1.015491 -0.07127314 0.87294473 1.81716259] , PC_v: [-1.34149708 -0.60662861 0.12823986 0.86310833 1.5979768 ]

morph_num = 5
morph_scale = 0.8
morph_color = "gray"
morph_alpha = 0.8
fig = plt.figure(figsize=(16,16),dpi=200)
#########
# PC1
#########
ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
hue=None,hue_order=None,
morph_color=morph_color, morph_alpha=morph_alpha,
fig = fig, ax = ax)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None,
palette="Paired", ax = ax)
ax.patch.set_alpha(0)
ax.set(xlabel='PC1', ylabel='PC2')
print("PC1-PC2 done")
#########
# PC2
#########
ax = fig.add_subplot(2,2,2)
sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
hue=None,hue_order=None,
morph_color=morph_color, morph_alpha=morph_alpha,
fig = fig, ax = ax, n_PCs_xy = [2, 3])
sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None,
palette="Paired", ax = ax)
ax.patch.set_alpha(0)
ax.set(xlabel='PC2', ylabel='PC3')
print("PC2-PC3 done")
#########
# PC3
#########
ax=fig.add_subplot(2,2,3)
sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
hue=None,hue_order=None,
morph_color=morph_color, morph_alpha=morph_alpha,
fig = fig, ax = ax, n_PCs_xy = [3, 1])
sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None,
palette="Paired", ax = ax)
ax.patch.set_alpha(0)
ax.set(xlabel='PC3', ylabel='PC1')
print("PC3-PC1 done")
#########
# CCR
#########
fig.add_subplot(2,2,4)
sns.barplot(x=["PC"+str(i+1) for i in range(12)], y=pca.explained_variance_ratio_[0:16], color="gray")
PC_h: [-1.95970887 -1.015491 -0.07127314 0.87294473 1.81716259] , PC_v: [-1.34149708 -0.60662861 0.12823986 0.86310833 1.5979768 ]
PC1-PC2 done
PC_h: [-1.34149708 -0.60662861 0.12823986 0.86310833 1.5979768 ] , PC_v: [-0.06223614 -0.02815604 0.00592407 0.04000417 0.07408428]
PC2-PC3 done
PC_h: [-0.06223614 -0.02815604 0.00592407 0.04000417 0.07408428] , PC_v: [-1.95970887 -1.015491 -0.07127314 0.87294473 1.81716259]
PC3-PC1 done
<Axes: >
