From 6836672617ea2a95e9f3001632aba7998565a232 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Thu, 26 Apr 2018 16:43:04 -0400 Subject: [PATCH] Rewrite and simplify postprocessing using Dask --- nanshe_ipython.ipynb | 126 ++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 75 deletions(-) diff --git a/nanshe_ipython.ipynb b/nanshe_ipython.ipynb index 632f5b6..3660aec 100644 --- a/nanshe_ipython.ipynb +++ b/nanshe_ipython.ipynb @@ -1201,15 +1201,7 @@ "### Postprocessing\n", "\n", "* `significance_threshold` (`float`): number of standard deviations below which to include in \"noise\" estimate\n", - "* `wavelet_scale` (`int`): scale of wavelet transform to apply (should be the same as the one used above)\n", - "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant\n", - "* `accepted_region_shape_constraints` (`dict`): if ROIs don't match this, reduce the `wavelet_scale` once.\n", - "* `percentage_pixels_below_max` (`float`): upper bound on ratio of ROI pixels not at max intensity vs. all ROI pixels\n", - "* `min_local_max_distance` (`float`): minimum allowable euclidean distance between two ROIs maximum intensities\n", - "* `accepted_neuron_shape_constraints` (`dict`): shape constraints for ROI to be kept.\n", - "\n", - "* `alignment_min_threshold` (`float`): similarity measure of the intensity of two ROIs images used for merging.\n", - "* `overlap_min_threshold` (`float`): similarity measure of the masks of two ROIs used for merging." + "* `noise_threshold` (`float`): number of units of \"noise\" above which something needs to be to be significant" ] }, { @@ -1219,87 +1211,71 @@ "outputs": [], "source": [ "significance_threshold = 3.0\n", - "wavelet_scale = 3\n", - "noise_threshold = 3.0\n", - "percentage_pixels_below_max = 0.8\n", - "min_local_max_distance = 16.0\n", + "noise_threshold = 2.5\n", "\n", - "alignment_min_threshold = 0.6\n", - "overlap_min_threshold = 0.6\n", "\n", - "\n", - "for k in zarr_store.get(subgroup_post, {}).keys():\n", + "for k in [\"post/zscore\", \"post/noise\", \"post/mask\"]:\n", " with suppress(KeyError):\n", - " del dask_store[subgroup_post + \"/\" + k]\n", + " del dask_store[k]\n", "with suppress(KeyError):\n", - " del zarr_store[subgroup_post]\n", - "zarr_store.require_group(subgroup_post)\n", - "\n", - "\n", - "imgs = dask_store._diskstore[subgroup_dict]\n", - "da_imgs = da.from_array(imgs, chunks=((1,) + imgs.shape[1:]))\n", - "\n", - "result = block_postprocess_data_parallel(client)(da_imgs,\n", - " **{\n", - " \"wavelet_denoising\" : {\n", - " \"estimate_noise\" : {\n", - " \"significance_threshold\" : significance_threshold\n", - " },\n", - " \"wavelet.transform\" : {\n", - " \"scale\" : wavelet_scale\n", - " },\n", - " \"significant_mask\" : {\n", - " \"noise_threshold\" : noise_threshold\n", - " },\n", - " \"accepted_region_shape_constraints\" : {\n", - " \"major_axis_length\" : {\n", - " \"min\" : 0.0,\n", - " \"max\" : 25.0\n", - " }\n", - " },\n", - " \"remove_low_intensity_local_maxima\" : {\n", - " \"percentage_pixels_below_max\" : percentage_pixels_below_max\n", - " },\n", - " \"remove_too_close_local_maxima\" : {\n", - " \"min_local_max_distance\" : min_local_max_distance\n", - " },\n", - " \"accepted_neuron_shape_constraints\" : {\n", - " \"area\" : {\n", - " \"min\" : 25,\n", - " \"max\" : 600\n", - " },\n", - " \"eccentricity\" : {\n", - " \"min\" : 0.0,\n", - " \"max\" : 0.9\n", - " }\n", - " }\n", - " },\n", - " \"merge_neuron_sets\" : {\n", - " \"alignment_min_threshold\" : alignment_min_threshold,\n", - " \"overlap_min_threshold\" : overlap_min_threshold,\n", - " \"fuse_neurons\" : {\n", - " \"fraction_mean_neuron_max_threshold\" : 0.01\n", - " }\n", - " }\n", - " }\n", + " del zarr_store[\"post\"]\n", + "zarr_store.require_group(\"post\")\n", + "\n", + "\n", + "da_imgs = dask_store[subgroup_dict]\n", + "\n", + "da_imgs_means = da_imgs.mean(axis=tuple(irange(1, da_imgs.ndim)), keepdims=True)\n", + "da_imgs_stds = da_imgs.std(axis=tuple(irange(1, da_imgs.ndim)), keepdims=True)\n", + "\n", + "da_imgs_zscore = (da_imgs - da_imgs_means) / da_imgs_stds\n", + "da_imgs_zscore_mag = abs(da_imgs_zscore)\n", + "\n", + "da_imgs_insignificant_mask = (da_imgs_zscore_mag < significance_threshold)\n", + "da_imgs_noise = da.atop(\n", + " lambda a, m: np.stack([np.nan_to_num(np.std(e_a[e_m])) for e_a, e_m in zip(a, m)]), (0,),\n", + " da_imgs, tuple(irange(da_imgs.ndim)),\n", + " da_imgs_insignificant_mask, tuple(irange(da_imgs.ndim)),\n", + " dtype=float,\n", + " concatenate=True\n", ")\n", + "da_imgs_noise = da_imgs_noise[(slice(None),) + (da_imgs.ndim - 1) * (None,)]\n", + "da_imgs_significant_mask = (da_imgs_zscore_mag >= (noise_threshold * da_imgs_noise / da_imgs_stds))\n", "\n", - "# Store projections\n", - "dask_store.update(dict(zip(\n", - " [\"%s/%s\" % (subgroup_post, e) for e in result.dtype.names],\n", - " [result[e] for e in result.dtype.names]\n", - ")))\n", + "\n", + "# dask_store[\"post2_mask\"] = da_imgs_significant_mask\n", + "dask_store.update({\n", + " \"post/noise\": da_imgs_noise,\n", + " \"post/zscore\": da_imgs_zscore,\n", + " \"post/mask\": da_imgs_significant_mask\n", + "})\n", "\n", "dask.distributed.progress(\n", " dask.distributed.futures_of([\n", - " dask_store[\"%s/%s\" % (subgroup_post, e)]\n", - " for e in result.dtype.names\n", + " dask_store[\"post/noise\"],\n", + " dask_store[\"post/zscore\"],\n", + " dask_store[\"post/mask\"],\n", " ]),\n", " notebook=False\n", ")\n", "print(\"\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "da_imgs = dask_store[\"post/mask\"].astype(np.uint8)\n", + "\n", + "mplsv = plt.figure(FigureClass=MPLViewer)\n", + "mplsv.set_images(\n", + " da_imgs,\n", + " vmin=0,\n", + " vmax=1\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {},