diff --git a/docs/user-guide/tree_structures.ipynb b/docs/user-guide/tree_structures.ipynb new file mode 100644 index 000000000..23e5d4aa5 --- /dev/null +++ b/docs/user-guide/tree_structures.ipynb @@ -0,0 +1,433 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "48760981b4ed3d9b", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "panel-layout": { + "height": 60.57500457763672, + "visible": true, + "width": 100 + } + }, + "source": [ + "# Tree Structures" + ] + }, + { + "cell_type": "markdown", + "id": "ac9d0c6c-744e-4be8-a309-aea5b4e03a21", + "metadata": { + "panel-layout": { + "height": 68.2750015258789, + "visible": true, + "width": 100 + } + }, + "source": [ + "UXarry supports two different tree structures, BallTree and KDTree. These trees are tailored for neighbor searches, making them useful for spatial data queries." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", + "metadata": {}, + "outputs": [], + "source": [ + "import uxarray as ux" + ] + }, + { + "cell_type": "markdown", + "id": "52865051-d8ba-47ef-af86-615d4cd30910", + "metadata": { + "panel-layout": { + "height": 51.13750076293945, + "visible": true, + "width": 100 + } + }, + "source": [ + "For this example we will be using a UGRID meshfile" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4032aa2d-5836-47cb-84e4-50f9d103a9ba", + "metadata": {}, + "outputs": [], + "source": [ + "grid_path = \"../../test/meshfiles/ugrid/quad-hexagon/grid.nc\"\n", + "uxgrid = ux.open_grid(grid_path)" + ] + }, + { + "cell_type": "markdown", + "id": "b3e40836-64a6-4e2b-9a26-016107076653", + "metadata": { + "panel-layout": { + "height": 44.07500076293945, + "visible": true, + "width": 100 + } + }, + "source": [ + "### BallTree" + ] + }, + { + "cell_type": "markdown", + "id": "b9519b67-fcdf-42b0-9c02-4ba90bad5b01", + "metadata": { + "panel-layout": { + "height": 86.2125015258789, + "visible": true, + "width": 100 + } + }, + "source": [ + "UXarray `BallTree` is built off of [sklearn.neighbors.BallTree](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.BallTree.html). A BallTree data structure organizes points in a multi-dimensional space into a tree of spheres. It is highly efficient for higher-dimensional data and for fast queries." + ] + }, + { + "cell_type": "markdown", + "id": "7b7ee4e4-9f98-4a3b-9cd2-94eaabd01d92", + "metadata": { + "panel-layout": { + "height": 43.212501525878906, + "visible": true, + "width": 100 + } + }, + "source": [ + "#### Parameters" + ] + }, + { + "cell_type": "markdown", + "id": "bd8db118-1e2c-488b-b153-4e1920097f42", + "metadata": { + "panel-layout": { + "height": 420.2875061035156, + "visible": true, + "width": 100 + } + }, + "source": [ + "The `BallTree` class can be accessed through the `Grid.get_ball_tree(coordinates, coordinate_system, distance_metric, reconstruct)` method, which takes in the following parameters:\n", + "\n", + "* `coordinates` allows us to specify what nodes to build the tree on. We can choose from `nodes`, `face_centers`, or `edge_centers`. Each one will change how the tree is built. `nodes` builds the tree from the corner nodes of the grid, `face_centers` builds the tree from the face centers of each face, and `edge_centers` builds from the center of each edge in the grid. The default is to build from `nodes`. \n", + "* `coordinate_system` specifies the tree's coordinate type when constructing. We can use either `cartesian`, which uses the `(x, y, z)` coordinate system, or `spherical`, which uses the `(lat, lon)` coordinate system. The default parameter is `spherical`.\n", + "* `distance_metric` relates to the distance computation, typically returned as a distance when querying for neighbors. There are a large number of options for us to use. A list of these options can be found [here](https://uxarray.readthedocs.io/en/latest/user_api/generated/uxarray.Grid.get_ball_tree.html). An important note here is that some distance metrics aren't compatible with some coordinate systems. `BallTree` uses the haversine distance as default, which will only work with spherical coordinates and not with cartesian. The default parameter is `haversine`.\n", + "* `reconstruct` is a bool variable that allows the user to reconstruct the tree. As default for performance, if a user calls `get_ball_tree` and a tree has already been created, it will simply use that one. If `reconstruct` is set to `True`, it will override this and reconstruct the tree. The default parameter is `False`." + ] + }, + { + "cell_type": "markdown", + "id": "bd222e2c-234c-4796-ae21-6fc7d924ed42", + "metadata": { + "panel-layout": { + "height": 43.212501525878906, + "visible": true, + "width": 100 + } + }, + "source": [ + "#### Constructing a BallTree" + ] + }, + { + "cell_type": "markdown", + "id": "7269a8d3-461f-4a3a-95c4-6b2c30397183", + "metadata": { + "panel-layout": { + "height": 68.2750015258789, + "visible": true, + "width": 100 + } + }, + "source": [ + "We can store the BallTree data structure in a variable, which allows us to access the tree in a simple way for queries." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a9efd88c-e37d-4ace-8deb-9aaff9204dab", + "metadata": {}, + "outputs": [], + "source": [ + "ball_tree = uxgrid.get_ball_tree(\n", + " coordinates=\"nodes\",\n", + " coordinate_system=\"spherical\",\n", + " distance_metric=\"haversine\",\n", + " reconstruct=\"False\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d7cfe09d-8d47-4dae-9081-664fcc5778d6", + "metadata": { + "panel-layout": { + "height": 43.212501525878906, + "visible": true, + "width": 100 + } + }, + "source": [ + "#### Query" + ] + }, + { + "cell_type": "markdown", + "id": "0cce1546-c551-48f4-a670-d992240e5bf6", + "metadata": { + "panel-layout": { + "height": 103.3499984741211, + "visible": true, + "width": 100 + } + }, + "source": [ + "Now we can use that variable to query for the distance and indexes of the nearest neigbhors. The first parameter is the point from which to do the search. `return_distance` allows us to choose to return the distance of the neighbors, and `k` controls how many neighbors to find." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1606f1a2-9d79-48f8-9fed-4b74326d1051", + "metadata": {}, + "outputs": [], + "source": [ + "d, ind = ball_tree.query([0.0, 0.0], return_distance=True, k=1)" + ] + }, + { + "cell_type": "markdown", + "id": "855e418c-ffa5-43d2-a24d-d96385ffdd61", + "metadata": { + "panel-layout": { + "height": 68.2750015258789, + "visible": true, + "width": 100 + } + }, + "source": [ + "If we don't plan on using the tree for other things in the future we can skip the extra step and query right away" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "b70a1c4a-9920-4a9a-a680-3cfc439ea221", + "metadata": {}, + "outputs": [], + "source": [ + "d, ind = uxgrid.get_ball_tree(\n", + " coordinates=\"nodes\",\n", + " coordinate_system=\"spherical\",\n", + " distance_metric=\"haversine\",\n", + " reconstruct=\"True\",\n", + ").query([0.0, 0.0], return_distance=True, k=1)" + ] + }, + { + "cell_type": "markdown", + "id": "c42abfeb-722a-420c-b776-4948d9a5f0f6", + "metadata": { + "panel-layout": { + "height": 43.212501525878906, + "visible": true, + "width": 100 + } + }, + "source": [ + "#### Query Radius" + ] + }, + { + "cell_type": "markdown", + "id": "5d59ee3f-4a3e-406d-ab6b-4e15b4a1bd15", + "metadata": { + "panel-layout": { + "height": 68.2750015258789, + "visible": true, + "width": 100 + } + }, + "source": [ + "We can also query the tree using a radius search, instead of a nearest neighbor search. This allows us to get all points within a certain radius of a specific point. For spherical coordinates, the radius is in units of degrees, and for cartesian coordinates, the radius is in meters." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a3520e67-3a2b-4664-a1c7-2e33ecdde625", + "metadata": {}, + "outputs": [], + "source": [ + "d, ind = ball_tree.query_radius([0.0, 0.0], r=5, return_distance=True)" + ] + }, + { + "cell_type": "markdown", + "id": "10f87d02-960b-4611-931a-07390964c2a9", + "metadata": { + "panel-layout": { + "height": 44.07500076293945, + "visible": true, + "width": 100 + } + }, + "source": [ + "### KDTree" + ] + }, + { + "cell_type": "markdown", + "id": "72e531ae-6e5f-48dc-9ffd-b74a6794f5dd", + "metadata": { + "panel-layout": { + "height": 122.08750915527344, + "visible": true, + "width": 100 + } + }, + "source": [ + "The KDTree structure is a binary search tree useful for low-dimensional data. Its implementation is almost identical to BallTree, and the parameters are identical. An important note is that the different allowed inputs for `distance_metric` change between the trees. For KDTree allowed `distance_metrics` can be found [here](https://uxarray.readthedocs.io/en/latest/user_api/generated/uxarray.Grid.get_kd_tree.html). We can call it using `get_kd_tree()`. Generally, KDTree is going to be slower than BallTree, and it is recommended to use BallTree for most im" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "271536e6-391f-43e6-90c6-30567e5b3198", + "metadata": {}, + "outputs": [], + "source": [ + "kd_tree = uxgrid.get_kd_tree()" + ] + }, + { + "cell_type": "markdown", + "id": "804b1099-4268-4a09-817f-ec1eb4487776", + "metadata": { + "panel-layout": { + "height": 51.13750076293945, + "visible": true, + "width": 100 + } + }, + "source": [ + "`query()` and `query_radius()` work identically to the BallTree." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ddf5103e-bdd9-4e55-9f3d-02c8eda5a759", + "metadata": { + "panel-layout": { + "height": 27.137500762939453, + "visible": true, + "width": 100 + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array(0.00104681), array(0, dtype=int64))" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kd_tree.query([1.0, 0.0, 0.0], k=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "74edecae-fdac-49a1-83b6-a0d654aab610", + "metadata": { + "panel-layout": { + "height": 95.6875, + "visible": true, + "width": 100 + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([0.00104681, 0.00335129, 0.00454262, 0.00477313, 0.00355618,\n", + " 0.00179975, 0.00519071, 0.00539414, 0.00562861, 0.00646756,\n", + " 0.0066315 , 0.00730387, 0.00334007, 0.00563384, 0.00612996,\n", + " 0.00436176]),\n", + " array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],\n", + " dtype=int64))" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "kd_tree.query_radius([1.0, 0.0, 0.0], r=5, return_distance=True)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + }, + "panel-cell-order": [ + "48760981b4ed3d9b", + "ac9d0c6c-744e-4be8-a309-aea5b4e03a21", + "b3e40836-64a6-4e2b-9a26-016107076653", + "b9519b67-fcdf-42b0-9c02-4ba90bad5b01", + "7b7ee4e4-9f98-4a3b-9cd2-94eaabd01d92", + "bd8db118-1e2c-488b-b153-4e1920097f42", + "bd222e2c-234c-4796-ae21-6fc7d924ed42", + "52865051-d8ba-47ef-af86-615d4cd30910", + "7269a8d3-461f-4a3a-95c4-6b2c30397183", + "d7cfe09d-8d47-4dae-9081-664fcc5778d6", + "0cce1546-c551-48f4-a670-d992240e5bf6", + "855e418c-ffa5-43d2-a24d-d96385ffdd61", + "c42abfeb-722a-420c-b776-4948d9a5f0f6", + "5d59ee3f-4a3e-406d-ab6b-4e15b4a1bd15", + "10f87d02-960b-4611-931a-07390964c2a9", + "72e531ae-6e5f-48dc-9ffd-b74a6794f5dd", + "804b1099-4268-4a09-817f-ec1eb4487776", + "ddf5103e-bdd9-4e55-9f3d-02c8eda5a759", + "74edecae-fdac-49a1-83b6-a0d654aab610" + ] + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/userguide.rst b/docs/userguide.rst index 1e204f384..5d8797def 100644 --- a/docs/userguide.rst +++ b/docs/userguide.rst @@ -18,3 +18,4 @@ common tasks that you can accomplish with UXarray. user-guide/topological-aggregations.ipynb user-guide/area_calc.ipynb user-guide/remapping.ipynb + user-guide/tree_structures.ipynb diff --git a/uxarray/grid/grid.py b/uxarray/grid/grid.py index 716a1983b..efaee93f4 100644 --- a/uxarray/grid/grid.py +++ b/uxarray/grid/grid.py @@ -902,7 +902,10 @@ def get_ball_tree( Selects which coordinate type to use to create the tree, "cartesian" selecting cartesian coordinates, and "spherical" selecting spherical coordinates. distance_metric : str, default="haversine" - Distance metric used to construct the BallTree + Distance metric used to construct the BallTree, options include: + 'euclidean', 'l2', 'minkowski', 'p','manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity', 'seuclidean', + 'mahalanobis', 'hamming', 'canberra', 'braycurtis', 'jaccard', 'dice', 'rogerstanimoto', 'russellrao', + 'sokalmichener', 'sokalsneath', 'haversine' reconstruct : bool, default=False If true, reconstructs the tree @@ -949,7 +952,8 @@ def get_kd_tree( Selects which coordinate type to use to create the tree, "cartesian" selecting cartesian coordinates, and "spherical" selecting spherical coordinates. distance_metric : str, default="minkowski" - Distance metric used to construct the KDTree + Distance metric used to construct the KDTree, available options include: + 'euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity' reconstruct : bool, default=False If true, reconstructs the tree