From 6836672617ea2a95e9f3001632aba7998565a232 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Thu, 26 Apr 2018 16:43:04 -0400
Subject: [PATCH] Rewrite and simplify postprocessing using Dask

---
 nanshe_ipython.ipynb | 126 ++++++++++++++++++-------------------------
 1 file changed, 51 insertions(+), 75 deletions(-)

diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb
index 632f5b6..3660aec 100644
--- a/nanshe_ipython.ipynb
+++ b/nanshe_ipython.ipynb
@@ -1201,15 +1201,7 @@
         "### Postprocessing\n",
         "\n",
         "* `significance_threshold` (`float`): number of standard deviations below which to include in \"noise\" estimate\n",
-        "* `wavelet_scale` (`int`): scale of wavelet transform to apply (should be the same as the one used above)\n",
-        "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant\n",
-        "* `accepted_region_shape_constraints` (`dict`): if ROIs don't match this, reduce the `wavelet_scale` once.\n",
-        "* `percentage_pixels_below_max` (`float`): upper bound on ratio of ROI pixels not at max intensity vs. all ROI pixels\n",
-        "* `min_local_max_distance` (`float`): minimum allowable euclidean distance between two ROIs maximum intensities\n",
-        "* `accepted_neuron_shape_constraints` (`dict`): shape constraints for ROI to be kept.\n",
-        "\n",
-        "* `alignment_min_threshold` (`float`): similarity measure of the intensity of two ROIs images used for merging.\n",
-        "* `overlap_min_threshold` (`float`): similarity measure of the masks of two ROIs used for merging."
+        "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant"
       ]
     },
     {
@@ -1219,87 +1211,71 @@
       "outputs": [],
       "source": [
         "significance_threshold = 3.0\n",
-        "wavelet_scale = 3\n",
-        "noise_threshold = 3.0\n",
-        "percentage_pixels_below_max = 0.8\n",
-        "min_local_max_distance = 16.0\n",
+        "noise_threshold = 2.5\n",
         "\n",
-        "alignment_min_threshold = 0.6\n",
-        "overlap_min_threshold = 0.6\n",
         "\n",
-        "\n",
-        "for k in zarr_store.get(subgroup_post, {}).keys():\n",
+        "for k in [\"post/zscore\", \"post/noise\", \"post/mask\"]:\n",
         "    with suppress(KeyError):\n",
-        "        del dask_store[subgroup_post + \"/\" + k]\n",
+        "        del dask_store[k]\n",
         "with suppress(KeyError):\n",
-        "    del zarr_store[subgroup_post]\n",
-        "zarr_store.require_group(subgroup_post)\n",
-        "\n",
-        "\n",
-        "imgs = dask_store._diskstore[subgroup_dict]\n",
-        "da_imgs = da.from_array(imgs, chunks=((1,) + imgs.shape[1:]))\n",
-        "\n",
-        "result = block_postprocess_data_parallel(client)(da_imgs,\n",
-        "                              **{\n",
-        "                                    \"wavelet_denoising\" : {\n",
-        "                                        \"estimate_noise\" : {\n",
-        "                                            \"significance_threshold\" : significance_threshold\n",
-        "                                        },\n",
-        "                                        \"wavelet.transform\" : {\n",
-        "                                            \"scale\" : wavelet_scale\n",
-        "                                        },\n",
-        "                                        \"significant_mask\" : {\n",
-        "                                            \"noise_threshold\" : noise_threshold\n",
-        "                                        },\n",
-        "                                        \"accepted_region_shape_constraints\" : {\n",
-        "                                            \"major_axis_length\" : {\n",
-        "                                                \"min\" : 0.0,\n",
-        "                                                \"max\" : 25.0\n",
-        "                                            }\n",
-        "                                        },\n",
-        "                                        \"remove_low_intensity_local_maxima\" : {\n",
-        "                                            \"percentage_pixels_below_max\" : percentage_pixels_below_max\n",
-        "                                        },\n",
-        "                                        \"remove_too_close_local_maxima\" : {\n",
-        "                                            \"min_local_max_distance\" : min_local_max_distance\n",
-        "                                        },\n",
-        "                                        \"accepted_neuron_shape_constraints\" : {\n",
-        "                                            \"area\" : {\n",
-        "                                                \"min\" : 25,\n",
-        "                                                \"max\" : 600\n",
-        "                                            },\n",
-        "                                            \"eccentricity\" : {\n",
-        "                                                \"min\" : 0.0,\n",
-        "                                                \"max\" : 0.9\n",
-        "                                            }\n",
-        "                                        }\n",
-        "                                    },\n",
-        "                                    \"merge_neuron_sets\" : {\n",
-        "                                        \"alignment_min_threshold\" : alignment_min_threshold,\n",
-        "                                        \"overlap_min_threshold\" : overlap_min_threshold,\n",
-        "                                        \"fuse_neurons\" : {\n",
-        "                                            \"fraction_mean_neuron_max_threshold\" : 0.01\n",
-        "                                        }\n",
-        "                                    }\n",
-        "                              }\n",
+        "    del zarr_store[\"post\"]\n",
+        "zarr_store.require_group(\"post\")\n",
+        "\n",
+        "\n",
+        "da_imgs = dask_store[subgroup_dict]\n",
+        "\n",
+        "da_imgs_means = da_imgs.mean(axis=tuple(irange(1, da_imgs.ndim)), keepdims=True)\n",
+        "da_imgs_stds = da_imgs.std(axis=tuple(irange(1, da_imgs.ndim)), keepdims=True)\n",
+        "\n",
+        "da_imgs_zscore = (da_imgs - da_imgs_means) / da_imgs_stds\n",
+        "da_imgs_zscore_mag = abs(da_imgs_zscore)\n",
+        "\n",
+        "da_imgs_insignificant_mask = (da_imgs_zscore_mag < significance_threshold)\n",
+        "da_imgs_noise = da.atop(\n",
+        "    lambda a, m: np.stack([np.nan_to_num(np.std(e_a[e_m])) for e_a, e_m in zip(a, m)]), (0,),\n",
+        "    da_imgs, tuple(irange(da_imgs.ndim)),\n",
+        "    da_imgs_insignificant_mask, tuple(irange(da_imgs.ndim)),\n",
+        "    dtype=float,\n",
+        "    concatenate=True\n",
         ")\n",
+        "da_imgs_noise = da_imgs_noise[(slice(None),) + (da_imgs.ndim - 1) * (None,)]\n",
+        "da_imgs_significant_mask = (da_imgs_zscore_mag >= (noise_threshold * da_imgs_noise / da_imgs_stds))\n",
         "\n",
-        "# Store projections\n",
-        "dask_store.update(dict(zip(\n",
-        "    [\"%s/%s\" % (subgroup_post, e) for e in result.dtype.names],\n",
-        "    [result[e] for e in result.dtype.names]\n",
-        ")))\n",
+        "\n",
+        "# dask_store[\"post2_mask\"] = da_imgs_significant_mask\n",
+        "dask_store.update({\n",
+        "    \"post/noise\": da_imgs_noise,\n",
+        "    \"post/zscore\": da_imgs_zscore,\n",
+        "    \"post/mask\": da_imgs_significant_mask\n",
+        "})\n",
         "\n",
         "dask.distributed.progress(\n",
         "    dask.distributed.futures_of([\n",
-        "        dask_store[\"%s/%s\" % (subgroup_post, e)]\n",
-        "        for e in result.dtype.names\n",
+        "        dask_store[\"post/noise\"],\n",
+        "        dask_store[\"post/zscore\"],\n",
+        "        dask_store[\"post/mask\"],\n",
         "    ]),\n",
         "    notebook=False\n",
         ")\n",
         "print(\"\")"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "da_imgs = dask_store[\"post/mask\"].astype(np.uint8)\n",
+        "\n",
+        "mplsv = plt.figure(FigureClass=MPLViewer)\n",
+        "mplsv.set_images(\n",
+        "    da_imgs,\n",
+        "    vmin=0,\n",
+        "    vmax=1\n",
+        ")"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {},