{ "cells": [ { "cell_type": "markdown", "id": "c393312f", "metadata": {}, "source": [ "# Generalized Procrustes analysis" ] }, { "cell_type": "code", "execution_count": null, "id": "ec807bdc", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn.decomposition import PCA\n", "\n", "from ktch.datasets import load_landmark_mosquito_wings\n", "from ktch.landmark import GeneralizedProcrustesAnalysis" ] }, { "cell_type": "markdown", "id": "2bdd0a36", "metadata": {}, "source": [ "## Load mosquito wing landmark dataset\n", "from Rohlf and Slice 1990 _Syst. Zool._" ] }, { "cell_type": "code", "execution_count": null, "id": "3025e308", "metadata": {}, "outputs": [], "source": [ "data_landmark_mosquito_wings = load_landmark_mosquito_wings(as_frame=True)\n", "data_landmark_mosquito_wings.coords" ] }, { "cell_type": "code", "execution_count": null, "id": "bc82bdca", "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots()\n", "sns.scatterplot(\n", " data = data_landmark_mosquito_wings.coords,\n", " x=\"x\",y=\"y\", \n", " hue=\"specimen_id\", style=\"coord_id\",ax=ax\n", " )\n", "ax.set_aspect('equal')\n", "ax.legend(loc=\"upper left\", bbox_to_anchor=(1, 1))" ] }, { "cell_type": "markdown", "id": "34703ec6", "metadata": {}, "source": [ "For applying generalized Procrustes analysis (GPA), \n", "we convert the configuration data into DataFrame of shape n_specimens x (n_landmarks*n_dim)." ] }, { "cell_type": "code", "execution_count": null, "id": "8f25545a", "metadata": {}, "outputs": [], "source": [ "df_coords = data_landmark_mosquito_wings.coords.unstack().swaplevel(1, 0, axis=1).sort_index(axis=1)\n", "df_coords.columns = [dim +\"_\"+ str(landmark_idx) for landmark_idx,dim in df_coords.columns]\n", "df_coords" ] }, { "cell_type": "markdown", "id": "eadf0cf6", "metadata": {}, "source": [ "## GPA" ] }, { "cell_type": "code", "execution_count": null, "id": "5634d9b7", "metadata": {}, "outputs": [], "source": [ "gpa = GeneralizedProcrustesAnalysis().set_output(transform=\"pandas\")" ] }, { "cell_type": "code", "execution_count": null, "id": "c99915c7", "metadata": {}, "outputs": [], "source": [ "df_shapes = gpa.fit_transform(df_coords)\n", "df_shapes" ] }, { "cell_type": "markdown", "id": "2f4f571c", "metadata": {}, "source": [ "We create a DataFrame, called `df_shapes_vis`, of shape (n_specimens*n_landmarks) x n_dim to visualize the aligned shapes." ] }, { "cell_type": "code", "execution_count": null, "id": "9dd30c0d", "metadata": {}, "outputs": [], "source": [ "df_shapes_vis = df_shapes.copy()\n", "df_shapes_vis.columns = pd.MultiIndex.from_tuples([[int(landmark_idx), dim] for dim, landmark_idx in [idx.split(\"_\") for idx in df_shapes_vis.columns]], names=[\"coord_id\",\"dim\"])\n", "df_shapes_vis.sort_index(axis=1, inplace=True)\n", "df_shapes_vis = df_shapes_vis.swaplevel(0,1,axis=1).stack(level=1)\n", "df_shapes_vis" ] }, { "cell_type": "code", "execution_count": null, "id": "5c97e93b", "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots()\n", "sns.scatterplot(\n", " data = df_shapes_vis,\n", " x=\"x\",y=\"y\", \n", " hue=\"specimen_id\", style=\"coord_id\",ax=ax\n", " )\n", "ax.set_aspect('equal')\n", "ax.legend(loc=\"upper left\", bbox_to_anchor=(1, 1))" ] }, { "cell_type": "markdown", "id": "e1fc4e6a", "metadata": {}, "source": [ "## PCA" ] }, { "cell_type": "code", "execution_count": null, "id": "4d20d29c", "metadata": {}, "outputs": [], "source": [ "pca = PCA(n_components=10).set_output(transform=\"pandas\")\n", "df_pca = pca.fit_transform(df_shapes)\n", "\n", "df_pca = df_pca.join(data_landmark_mosquito_wings.meta)\n", "df_pca" ] }, { "cell_type": "code", "execution_count": null, "id": "6b56c822", "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots()\n", "sns.scatterplot(\n", " data=df_pca, x=\"pca0\", y = \"pca1\", \n", " hue=\"genus\",palette=\"Paired\",\n", " ax=ax\n", " )\n", "ax.legend(loc=\"upper left\", bbox_to_anchor=(1, 1))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }