{ "cells": [ { "cell_type": "markdown", "id": "608273c6", "metadata": {}, "source": [ "# Elliptic Fourier Analysis" ] }, { "cell_type": "code", "execution_count": null, "id": "8072639e", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn.decomposition import PCA\n", "\n", "from ktch.datasets import load_outline_mosquito_wings\n", "from ktch.harmonic import EllipticFourierAnalysis\n", "from ktch.plot import explained_variance_ratio_plot" ] }, { "cell_type": "markdown", "id": "a0241e4f", "metadata": {}, "source": [ "## Load mosquito wing outline dataset\n", "from Rohlf and Archie 1984 _Syst. Zool._" ] }, { "cell_type": "code", "execution_count": null, "id": "a3e65dda", "metadata": {}, "outputs": [], "source": [ "data_outline_mosquito_wings = load_outline_mosquito_wings(as_frame=True)\n", "data_outline_mosquito_wings.coords" ] }, { "cell_type": "code", "execution_count": null, "id": "1e495eac", "metadata": {}, "outputs": [], "source": [ "coords = data_outline_mosquito_wings.coords.to_numpy().reshape(-1, 100, 2)" ] }, { "cell_type": "code", "execution_count": null, "id": "98536242", "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots()\n", "sns.lineplot(x=coords[0][:, 0], y=coords[0][:, 1], sort=False, estimator=None, ax=ax)\n", "ax.set_aspect(\"equal\")" ] }, { "cell_type": "code", "execution_count": null, "id": "687ee29e", "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots(figsize=(10, 10))\n", "sns.lineplot(\n", " data=data_outline_mosquito_wings.coords,\n", " x=\"x\",\n", " y=\"y\",\n", " hue=\"specimen_id\",\n", " sort=False,\n", " estimator=None,\n", " ax=ax,\n", ")\n", "ax.set_aspect(\"equal\")" ] }, { "cell_type": "markdown", "id": "9b464f81", "metadata": {}, "source": [ "## EFA" ] }, { "cell_type": "code", "execution_count": null, "id": "2161a78e", "metadata": {}, "outputs": [], "source": [ "efa = EllipticFourierAnalysis(n_harmonics=20)" ] }, { "cell_type": "code", "execution_count": null, "id": "65e493e9", "metadata": {}, "outputs": [], "source": [ "coef = efa.fit_transform(coords)" ] }, { "cell_type": "markdown", "id": "2cf40ce5", "metadata": {}, "source": [ "## PCA" ] }, { "cell_type": "code", "execution_count": null, "id": "a0f366fd", "metadata": {}, "outputs": [], "source": [ "pca = PCA(n_components=12)\n", "pcscores = pca.fit_transform(coef)" ] }, { "cell_type": "code", "execution_count": null, "id": "74eafdc9", "metadata": {}, "outputs": [], "source": [ "df_pca = pd.DataFrame(pcscores)\n", "df_pca[\"specimen_id\"] = [i for i in range(1, len(pcscores) + 1)]\n", "df_pca = df_pca.set_index(\"specimen_id\")\n", "df_pca = df_pca.join(data_outline_mosquito_wings.meta)\n", "df_pca = df_pca.rename(columns={i: (\"PC\" + str(i + 1)) for i in range(12)})\n", "df_pca" ] }, { "cell_type": "code", "execution_count": null, "id": "a675251f", "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots()\n", "sns.scatterplot(data=df_pca, x=\"PC1\", y=\"PC2\", hue=\"genus\", ax=ax, palette=\"Paired\")" ] }, { "cell_type": "markdown", "id": "01cbfe31", "metadata": {}, "source": [ "## Morphospace" ] }, { "cell_type": "code", "execution_count": null, "id": "3204e4cd", "metadata": {}, "outputs": [], "source": [ "def get_pc_scores_for_morphospace(ax, num=5):\n", " xrange = np.linspace(ax.get_xlim()[0], ax.get_xlim()[1], num)\n", " yrange = np.linspace(ax.get_ylim()[0], ax.get_ylim()[1], num)\n", " return xrange, yrange\n", "\n", "\n", "def plot_recon_morphs(\n", " pca,\n", " efa,\n", " fig,\n", " ax,\n", " n_PCs_xy=[1, 2],\n", " morph_num=3,\n", " morph_scale=1.0,\n", " morph_color=\"lightgray\",\n", " morph_alpha=0.7,\n", "):\n", " pc_scores_h, pc_scores_v = get_pc_scores_for_morphospace(ax, morph_num)\n", " print(\"PC_h: \", pc_scores_h, \", PC_v: \", pc_scores_v)\n", " for pc_score_h in pc_scores_h:\n", " for pc_score_v in pc_scores_v:\n", " pc_score = np.zeros(pca.n_components_)\n", " n_PC_h, n_PC_v = n_PCs_xy\n", " pc_score[n_PC_h - 1] = pc_score_h\n", " pc_score[n_PC_v - 1] = pc_score_v\n", "\n", " arr_coef = pca.inverse_transform([pc_score])\n", "\n", " ax_width = ax.get_window_extent().width\n", " fig_width = fig.get_window_extent().width\n", " fig_height = fig.get_window_extent().height\n", " morph_size = morph_scale * ax_width / (fig_width * morph_num)\n", " loc = ax.transData.transform((pc_score_h, pc_score_v))\n", " axins = fig.add_axes(\n", " [\n", " loc[0] / fig_width - morph_size / 2,\n", " loc[1] / fig_height - morph_size / 2,\n", " morph_size,\n", " morph_size,\n", " ],\n", " anchor=\"C\",\n", " )\n", "\n", " coords = efa.inverse_transform(arr_coef)\n", " x = coords[0][:, 0]\n", " y = coords[0][:, 1]\n", "\n", " axins.plot(\n", " x.astype(float), y.astype(float), color=morph_color, alpha=morph_alpha\n", " )\n", " axins.axis(\"equal\")\n", " axins.axis(\"off\")" ] }, { "cell_type": "code", "execution_count": null, "id": "b25edbca", "metadata": {}, "outputs": [], "source": [ "fig = plt.figure(figsize=(16, 16), dpi=200)\n", "\n", "ax = fig.add_subplot(2, 2, 1)\n", "sns.scatterplot(\n", " data=df_pca,\n", " x=\"PC1\",\n", " y=\"PC2\",\n", " hue=\"genus\",\n", " hue_order=None,\n", " palette=\"Paired\",\n", " ax=ax,\n", " legend=True,\n", ")\n", "\n", "plot_recon_morphs(pca, efa, morph_num=5, morph_scale=0.5, fig=fig, ax=ax)" ] }, { "cell_type": "code", "execution_count": null, "id": "8904fdf0", "metadata": {}, "outputs": [], "source": [ "morph_num = 5\n", "morph_scale = 0.8\n", "morph_color = \"gray\"\n", "morph_alpha = 0.8\n", "\n", "hue_order = df_pca[\"genus\"].unique()\n", "\n", "fig = plt.figure(figsize=(16, 16), dpi=200)\n", "\n", "#########\n", "# PC1\n", "#########\n", "ax = fig.add_subplot(2, 2, 1)\n", "sns.scatterplot(\n", " data=df_pca,\n", " x=\"PC1\",\n", " y=\"PC2\",\n", " hue=\"genus\",\n", " hue_order=hue_order,\n", " palette=\"Paired\",\n", " ax=ax,\n", " legend=True,\n", ")\n", "\n", "plot_recon_morphs(\n", " pca,\n", " efa,\n", " morph_num=5,\n", " morph_scale=morph_scale,\n", " morph_color=morph_color,\n", " morph_alpha=morph_alpha,\n", " fig=fig,\n", " ax=ax,\n", ")\n", "\n", "ax.patch.set_alpha(0)\n", "ax.set(xlabel=\"PC1\", ylabel=\"PC2\")\n", "\n", "print(\"PC1-PC2 done\")\n", "\n", "#########\n", "# PC2\n", "#########\n", "ax = fig.add_subplot(2, 2, 2)\n", "sns.scatterplot(\n", " data=df_pca,\n", " x=\"PC2\",\n", " y=\"PC3\",\n", " hue=\"genus\",\n", " hue_order=hue_order,\n", " palette=\"Paired\",\n", " ax=ax,\n", " legend=True,\n", ")\n", "\n", "plot_recon_morphs(\n", " pca,\n", " efa,\n", " morph_num=5,\n", " morph_scale=morph_scale,\n", " morph_color=morph_color,\n", " morph_alpha=morph_alpha,\n", " fig=fig,\n", " ax=ax,\n", " n_PCs_xy=[2, 3],\n", ")\n", "\n", "ax.patch.set_alpha(0)\n", "ax.set(xlabel=\"PC2\", ylabel=\"PC3\")\n", "\n", "print(\"PC2-PC3 done\")\n", "\n", "#########\n", "# PC3\n", "#########\n", "ax = fig.add_subplot(2, 2, 3)\n", "sns.scatterplot(\n", " data=df_pca,\n", " x=\"PC3\",\n", " y=\"PC1\",\n", " hue=\"genus\",\n", " hue_order=hue_order,\n", " palette=\"Paired\",\n", " ax=ax,\n", " legend=True,\n", ")\n", "\n", "plot_recon_morphs(\n", " pca,\n", " efa,\n", " morph_num=5,\n", " morph_scale=morph_scale,\n", " morph_color=morph_color,\n", " morph_alpha=morph_alpha,\n", " fig=fig,\n", " ax=ax,\n", " n_PCs_xy=[3, 1],\n", ")\n", "\n", "ax.patch.set_alpha(0)\n", "ax.set(xlabel=\"PC3\", ylabel=\"PC1\")\n", "\n", "print(\"PC3-PC1 done\")\n", "\n", "#########\n", "# CCR\n", "#########\n", "\n", "ax = fig.add_subplot(2, 2, 4)\n", "explained_variance_ratio_plot(pca, ax=ax, verbose=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "c0ecccfc", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "jupytext": { "default_lexer": "ipython3" }, "kernelspec": { "display_name": "ktch", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }