Elliptic Fourier Analysis#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.decomposition import PCA
from ktch.datasets import load_outline_mosquito_wings
from ktch.outline import EllipticFourierAnalysis
Load mosquito wing outline dataset#
from Rohlf and Archie 1984 Syst. Zool.
data_outline_mosquito_wings = load_outline_mosquito_wings(as_frame=True)
data_outline_mosquito_wings.coords
x | y | ||
---|---|---|---|
specimen_id | coord_id | ||
1 | 0 | 0.99973 | 0.00000 |
1 | 0.81881 | 0.05151 | |
2 | 0.70063 | 0.08851 | |
3 | 0.61179 | 0.11670 | |
4 | 0.53226 | 0.13666 | |
... | ... | ... | ... |
126 | 95 | 0.68167 | -0.22149 |
96 | 0.76135 | -0.19548 | |
97 | 0.90752 | -0.17312 | |
98 | 0.95720 | -0.12093 | |
99 | 0.95964 | -0.06038 |
12600 rows × 2 columns
coords = data_outline_mosquito_wings.coords.to_numpy().reshape(-1,100,2)
fig, ax = plt.subplots()
sns.lineplot(x=coords[0][:,0], y=coords[0][:,1], sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')
fig, ax = plt.subplots(figsize=(10,10))
sns.lineplot(
data= data_outline_mosquito_wings.coords,x="x", y="y",
hue="specimen_id",
sort= False,estimator=None,ax=ax)
ax.set_aspect('equal')
EFA#
efa = EllipticFourierAnalysis(n_harmonics=20)
coef = efa.fit_transform(coords)
PCA#
pca = PCA(n_components=12)
pcscores = pca.fit_transform(coef)
df_pca = pd.DataFrame(pcscores)
df_pca["specimen_id"] = [i for i in range(1,len(pcscores)+1)]
df_pca = df_pca.set_index("specimen_id")
df_pca = df_pca.join(data_outline_mosquito_wings.meta)
df_pca = df_pca.rename(columns={i:("PC"+str(i+1)) for i in range(12)})
df_pca
PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | PC11 | PC12 | genus | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
specimen_id | |||||||||||||
1 | 0.047296 | -0.028614 | -0.001061 | -0.000310 | 0.013393 | -0.010994 | -0.001533 | -0.002746 | 0.004097 | -0.000186 | 0.001503 | -0.000810 | AN |
2 | 0.028955 | 0.015756 | 0.001092 | -0.003740 | -0.006694 | -0.002453 | -0.002395 | -0.001203 | 0.008363 | -0.003959 | 0.001362 | -0.000196 | AN |
3 | -0.191432 | 0.001392 | 0.017447 | 0.001495 | -0.006041 | 0.000175 | 0.004568 | 0.000639 | 0.003804 | 0.000880 | -0.001539 | 0.003237 | AN |
4 | -0.078667 | 0.048088 | -0.002377 | -0.001929 | 0.005109 | -0.010823 | 0.002926 | 0.002686 | -0.001388 | 0.000213 | -0.000209 | 0.002098 | AN |
5 | -0.035074 | -0.050274 | 0.009340 | -0.008051 | -0.010825 | -0.000951 | -0.003192 | -0.001052 | 0.001537 | -0.005507 | 0.000756 | 0.003088 | AN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
122 | -0.073861 | 0.045384 | -0.002446 | -0.004791 | 0.000477 | -0.004412 | 0.002848 | 0.003930 | -0.002412 | 0.000114 | -0.001704 | 0.000024 | CX |
123 | -0.041873 | -0.038377 | 0.000938 | 0.011287 | 0.001922 | -0.006925 | 0.002899 | -0.001107 | -0.006483 | 0.000414 | -0.000392 | -0.001321 | CX |
124 | -0.152010 | 0.041032 | -0.016443 | 0.014469 | -0.002731 | -0.001113 | 0.004757 | -0.001846 | 0.000681 | -0.003672 | -0.000681 | -0.001829 | CX |
125 | 0.049972 | 0.036880 | -0.012326 | -0.004738 | 0.001212 | -0.000433 | 0.003217 | -0.002744 | -0.003874 | 0.002662 | 0.000321 | -0.000729 | CX |
126 | 0.067099 | 0.021569 | -0.009049 | 0.017494 | -0.008229 | -0.005337 | -0.000294 | 0.001003 | -0.001838 | -0.000874 | 0.000331 | -0.000350 | DE |
126 rows × 13 columns
fig, ax = plt.subplots()
sns.scatterplot(data=df_pca, x="PC1", y = "PC2", hue="genus", ax=ax, palette="Paired")
<Axes: xlabel='PC1', ylabel='PC2'>
Morphospace#
def get_pc_scores_for_morphospace(ax, num = 5):
xrange = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1],num)
yrange = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1],num)
return xrange, yrange
def plot_recon_morphs(pca, efa, hue, hue_order, fig, ax,
n_PCs_xy = [1, 2], lmax = 20, morph_num = 3, morph_scale = 1.0, morph_color = "lightgray", morph_alpha = 0.7,
standardized_by_1st_ellipsoid = False):
pc_scores_h, pc_scores_v = get_pc_scores_for_morphospace(ax, morph_num)
print("PC_h: ", pc_scores_h, ", PC_v: ", pc_scores_v)
for pc_score_h in pc_scores_h:
for pc_score_v in pc_scores_v:
pc_score = np.zeros(pca.n_components_)
n_PC_h, n_PC_v = n_PCs_xy
pc_score[n_PC_h-1] = pc_score_h
pc_score[n_PC_v-1] = pc_score_v
arr_coef = pca.inverse_transform([pc_score])
ax_width = ax.get_window_extent().width
fig_width = fig.get_window_extent().width
fig_height = fig.get_window_extent().height
morph_size = morph_scale*ax_width/(fig_width*morph_num)
loc = ax.transData.transform((pc_score_h, pc_score_v))
axins = fig.add_axes([loc[0]/fig_width-morph_size/2, loc[1]/fig_height-morph_size/2,
morph_size, morph_size], anchor='C')
coords = efa.inverse_transform(arr_coef)
x = coords[0][:,0]
y = coords[0][:,1]
axins.plot(x.astype(float),y.astype(float),color=morph_color, alpha = morph_alpha)
axins.axis('equal')
axins.axis('off')
fig = plt.figure(figsize=(16,16),dpi=200)
ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=0.5,
hue=None,hue_order=None,
fig = fig, ax = ax)
PC_h: [-0.2400097 -0.11657721 0.00685527 0.13028775 0.25372023] , PC_v: [-0.09298342 -0.03817918 0.01662506 0.07142931 0.12623355]
morph_num = 5
morph_scale = 0.8
morph_color = "gray"
morph_alpha = 0.8
fig = plt.figure(figsize=(16,16),dpi=200)
#########
# PC1
#########
ax = fig.add_subplot(2,2,1)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
hue=None,hue_order=None,
morph_color=morph_color, morph_alpha=morph_alpha,
fig = fig, ax = ax)
sns.scatterplot(data=df_pca, x="PC1", y="PC2", hue="genus", hue_order=None,
palette="Paired", ax = ax)
ax.patch.set_alpha(0)
ax.set(xlabel='PC1', ylabel='PC2')
print("PC1-PC2 done")
#########
# PC2
#########
ax = fig.add_subplot(2,2,2)
sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
hue=None,hue_order=None,
morph_color=morph_color, morph_alpha=morph_alpha,
fig = fig, ax = ax, n_PCs_xy = [2, 3])
sns.scatterplot(data=df_pca, x="PC2", y="PC3", hue="genus", hue_order=None,
palette="Paired", ax = ax)
ax.patch.set_alpha(0)
ax.set(xlabel='PC2', ylabel='PC3')
print("PC2-PC3 done")
#########
# PC3
#########
ax=fig.add_subplot(2,2,3)
sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None,
palette="Paired", ax = ax, legend = True)
plot_recon_morphs(pca,efa,morph_num=5,morph_scale=morph_scale,
hue=None,hue_order=None,
morph_color=morph_color, morph_alpha=morph_alpha,
fig = fig, ax = ax, n_PCs_xy = [3, 1])
sns.scatterplot(data=df_pca, x="PC3", y="PC1", hue="genus", hue_order=None,
palette="Paired", ax = ax)
ax.patch.set_alpha(0)
ax.set(xlabel='PC3', ylabel='PC1')
print("PC3-PC1 done")
#########
# CCR
#########
fig.add_subplot(2,2,4)
sns.barplot(x=["PC"+str(i+1) for i in range(12)], y=pca.explained_variance_ratio_[0:16], color="gray")
PC_h: [-0.2400097 -0.11657721 0.00685527 0.13028775 0.25372023] , PC_v: [-0.09298342 -0.03817918 0.01662506 0.07142931 0.12623355]
PC1-PC2 done
PC_h: [-0.09298342 -0.03817918 0.01662506 0.07142931 0.12623355] , PC_v: [-0.0252314 -0.01161327 0.00200486 0.01562299 0.02924111]
PC2-PC3 done
PC_h: [-0.0252314 -0.01161327 0.00200486 0.01562299 0.02924111] , PC_v: [-0.2400097 -0.11657721 0.00685527 0.13028775 0.25372023]
PC3-PC1 done
<Axes: >