diff --git a/docs/47_clustering/umap.ipynb b/docs/47_clustering/umap.ipynb index 3f98810f..2a5230fa 100644 --- a/docs/47_clustering/umap.ipynb +++ b/docs/47_clustering/umap.ipynb @@ -1101,7 +1101,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/docs/51_plotting/feature_correlation.ipynb b/docs/51_plotting/feature_correlation.ipynb new file mode 100644 index 00000000..17cab728 --- /dev/null +++ b/docs/51_plotting/feature_correlation.ipynb @@ -0,0 +1,845 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "bea6da31-a5ef-4b44-b96c-7570e8659b34", + "metadata": {}, + "source": [ + "# Feature correlation\n", + "\n", + "When inspecting feature extraction results, it is often important to take releationships between features into account. Therefore, a feature correlation matrix is a useful tool. Visualizing it in color is recommended." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "307300eb-b787-48a7-b7a5-1fbb8b5266e5", + "metadata": {}, + "outputs": [], + "source": [ + "from napari_simpleitk_image_processing import label_statistics\n", + "import numpy as np\n", + "import seaborn\n", + "import pyclesperanto_prototype as cle\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "id": "61828c9a-b6bc-4623-b391-88a35f112ff9", + "metadata": {}, + "source": [ + "### Load data\n", + "We first load the image data that will be used for feature extraction." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "63828c9a-b6bc-4623-b391-88a35f112ff9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\haase\\miniconda3\\envs\\bio311\\Lib\\site-packages\\pyclesperanto_prototype\\_tier9\\_imread.py:5: UserWarning: cle.imread is deprecated, use skimage.io.imread instead.\n", + " warnings.warn(\"cle.imread is deprecated, use skimage.io.imread instead.\")\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
labelmaximummeanminimumvarianceelongationferet_diameterflatnessroundnessequivalent_spherical_radiusnumber_of_pixelsperimeter
01143.0117.48945193.090.0560321.2286908.7749641.1536180.9656573.839016237191.790349
12113.083.05221965.094.0862711.32509613.1529461.2155720.8189054.505089383311.446414
23130.0108.93040392.057.1091091.56591112.8840991.4344760.8071734.024309273252.130963
34129.094.57699170.0130.7161361.22702714.3527001.3972760.8330065.128456565396.766310
45149.0119.45454589.0144.4313211.42982910.7238051.2691210.8716804.034113275234.611278
.......................................
1195119660.042.11825729.050.2708091.10704611.0905371.3079620.9763064.863917482304.506355
1196119783.047.67326729.0159.3307721.04695112.4096741.2361470.9626025.526416707398.703613
1197119853.041.50289030.028.1231801.0425999.6436511.3309950.9653764.355077346246.890816
1198119972.045.09157029.0106.3162021.11428512.9614811.2691820.9620375.476460688391.758021
1199120066.044.23268229.067.4849091.20323912.2065561.3816010.9616685.122397563342.871234
\n", + "

1200 rows × 12 columns

\n", + "
" + ], + "text/plain": [ + " label maximum mean minimum variance elongation \\\n", + "0 1 143.0 117.489451 93.0 90.056032 1.228690 \n", + "1 2 113.0 83.052219 65.0 94.086271 1.325096 \n", + "2 3 130.0 108.930403 92.0 57.109109 1.565911 \n", + "3 4 129.0 94.576991 70.0 130.716136 1.227027 \n", + "4 5 149.0 119.454545 89.0 144.431321 1.429829 \n", + "... ... ... ... ... ... ... \n", + "1195 1196 60.0 42.118257 29.0 50.270809 1.107046 \n", + "1196 1197 83.0 47.673267 29.0 159.330772 1.046951 \n", + "1197 1198 53.0 41.502890 30.0 28.123180 1.042599 \n", + "1198 1199 72.0 45.091570 29.0 106.316202 1.114285 \n", + "1199 1200 66.0 44.232682 29.0 67.484909 1.203239 \n", + "\n", + " feret_diameter flatness roundness equivalent_spherical_radius \\\n", + "0 8.774964 1.153618 0.965657 3.839016 \n", + "1 13.152946 1.215572 0.818905 4.505089 \n", + "2 12.884099 1.434476 0.807173 4.024309 \n", + "3 14.352700 1.397276 0.833006 5.128456 \n", + "4 10.723805 1.269121 0.871680 4.034113 \n", + "... ... ... ... ... \n", + "1195 11.090537 1.307962 0.976306 4.863917 \n", + "1196 12.409674 1.236147 0.962602 5.526416 \n", + "1197 9.643651 1.330995 0.965376 4.355077 \n", + "1198 12.961481 1.269182 0.962037 5.476460 \n", + "1199 12.206556 1.381601 0.961668 5.122397 \n", + "\n", + " number_of_pixels perimeter \n", + "0 237 191.790349 \n", + "1 383 311.446414 \n", + "2 273 252.130963 \n", + "3 565 396.766310 \n", + "4 275 234.611278 \n", + "... ... ... \n", + "1195 482 304.506355 \n", + "1196 707 398.703613 \n", + "1197 346 246.890816 \n", + "1198 688 391.758021 \n", + "1199 563 342.871234 \n", + "\n", + "[1200 rows x 12 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Load data\n", + "image = cle.imread(\"../../data/Lund-25MB.tif\")\n", + "\n", + "# Segment nuclei\n", + "background_subtracted = cle.top_hat_box(image, radius_x=5, radius_y=5)\n", + "labels = cle.voronoi_otsu_labeling(background_subtracted, spot_sigma=1)\n", + "\n", + "# Feature extraction\n", + "nuclei_statistics = label_statistics(image, labels, \n", + " intensity=True, \n", + " size=True, \n", + " shape=True, \n", + " perimeter=True,\n", + " moments=True)\n", + "\n", + "# Feature selection\n", + "selected_table = nuclei_statistics[\n", + " [\n", + " # likely unrelated features\n", + " 'label', \n", + " \n", + " # intensity releated features\n", + " 'maximum', 'mean', 'minimum', 'variance',\n", + " \n", + " # shape related features \n", + " 'elongation', 'feret_diameter', 'flatness', 'roundness',\n", + "\n", + " # size related features\n", + " 'equivalent_spherical_radius', 'number_of_pixels', 'perimeter'\n", + " ]\n", + "]\n", + "selected_table" + ] + }, + { + "cell_type": "markdown", + "id": "1efc67e1-3919-4141-b881-0323cfd6f5cb", + "metadata": {}, + "source": [ + "### Correlation matrix\n", + "We examine the correlation between the selected features to understand their relationships." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "5efc67e1-3919-4141-b881-0323cfd6f5cb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 labelmaximummeanminimumvarianceelongationferet_diameterflatnessroundnessequivalent_spherical_radiusnumber_of_pixelsperimeter
label1.000000-0.605035-0.651268-0.581233-0.134539-0.0148570.105859-0.0663840.3812670.2519680.2468690.190365
maximum-0.6050351.0000000.8246530.5777060.563160-0.0280760.1449440.025563-0.485114-0.011892-0.0350780.068570
mean-0.6512680.8246531.0000000.9187500.0528480.122359-0.1738720.112322-0.644827-0.451865-0.478943-0.362011
minimum-0.5812330.5777060.9187501.000000-0.2734890.217240-0.3118680.148296-0.600965-0.615060-0.604247-0.521081
variance-0.1345390.5631600.052848-0.2734891.000000-0.1919630.370870-0.0848410.0690650.4857700.4901670.500228
elongation-0.014857-0.0280760.1223590.217240-0.1919631.0000000.1844450.091196-0.418459-0.152117-0.125144-0.083723
feret_diameter0.1058590.144944-0.173872-0.3118680.3708700.1844451.0000000.083095-0.2017870.8540900.7853600.896780
flatness-0.0663840.0255630.1123220.148296-0.0848410.0911960.0830951.000000-0.438565-0.111196-0.137907-0.085824
roundness0.381267-0.485114-0.644827-0.6009650.069065-0.418459-0.201787-0.4385651.0000000.2252410.3301820.137811
equivalent_spherical_radius0.251968-0.011892-0.451865-0.6150600.485770-0.1521170.854090-0.1111960.2252411.0000000.9483570.976222
number_of_pixels0.246869-0.035078-0.478943-0.6042470.490167-0.1251440.785360-0.1379070.3301820.9483571.0000000.964439
perimeter0.1903650.068570-0.362011-0.5210810.500228-0.0837230.896780-0.0858240.1378110.9762220.9644391.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = selected_table.corr()\n", + "\n", + "def colorize(styler):\n", + " styler.background_gradient(axis=None, cmap=\"coolwarm\")\n", + " return styler\n", + "df.style.pipe(colorize)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f3d87b8d-19b5-44cf-986b-20c01a40b217", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/_toc.yml b/docs/_toc.yml index ddd14861..73c44020 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -400,7 +400,7 @@ parts: #- file: 47_clustering/dens_map #- file: 47_clustering/tissue_clustering - file: 47_clustering/interactive_dimensionality_reduction_and_clustering/readme - - file: 47_clustering/feature_selection + #- file: 47_clustering/feature_selection - file: 51_plotting/readme sections: @@ -408,6 +408,7 @@ parts: - file: 51_plotting/plotting_seaborn - file: 51_plotting/plotting_distributions - file: 51_plotting/multivariate_views + - file: 51_plotting/feature_correlation - file: 51_plotting/interpolating_between_feature_spaces # plotly