diff --git a/doc/api/index.rst b/doc/api/index.rst index cbe7a9286..21a56306a 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -36,6 +36,7 @@ Composite Estimators Chain Vector + Derivative Model Selection --------------- diff --git a/examples/gradient.py b/examples/gradient.py new file mode 100644 index 000000000..d4666de91 --- /dev/null +++ b/examples/gradient.py @@ -0,0 +1,36 @@ +""" +Derivative calculations +===================== + + + +""" +import matplotlib.pyplot as plt +import numpy as np +import verde as vd + + +data = vd.datasets.CheckerBoard( + region=(0, 5000, 0, 2500), w_east=5000, w_north=2500 +).scatter(size=700, random_state=0) + +spline = vd.Spline().fit((data.easting, data.northing), data.scalars) +east_deriv = vd.Derivative(spline, step=10, direction=(1, 0)).grid(spacing=50) +north_deriv = vd.Derivative(spline, step=10, direction=(0, 1)).grid(spacing=50) + +fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(9, 10), sharex=True) + +ax1.set_title("Original data") +tmp = ax1.scatter(data.easting, data.northing, c=data.scalars, cmap="RdBu_r") +plt.colorbar(tmp, ax=ax1, label="data unit") +ax1.set_ylabel("northing") + +east_deriv.scalars.plot.pcolormesh(ax=ax2, cbar_kwargs=dict(label="data unit / m")) +ax2.set_title("East derivative") +ax2.set_xlabel("") + +north_deriv.scalars.plot.pcolormesh(ax=ax3, cbar_kwargs=dict(label="data unit / m")) +ax3.set_title("North derivative") + +fig.tight_layout() +plt.show() diff --git a/examples/vector_derivatives.py b/examples/vector_derivatives.py new file mode 100644 index 000000000..735051bcc --- /dev/null +++ b/examples/vector_derivatives.py @@ -0,0 +1,77 @@ +""" +Derivatives of vector fields +============================ +""" +import matplotlib.pyplot as plt +import cartopy.crs as ccrs +import numpy as np +import pyproj +import verde as vd + + +# Fetch the wind speed data from Texas. +data = vd.datasets.fetch_texas_wind() +# Separate out some of the data into utility variables +coordinates = (data.longitude.values, data.latitude.values) +region = vd.get_region(coordinates) +# Use a Mercator projection because Spline is a Cartesian gridder +projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean()) + +# Split the data into a training and testing set. We'll fit the gridder on the +# training set and use the testing set to evaluate how well the gridder is +# performing. +train, test = vd.train_test_split( + coordinates=projection(*coordinates), + data=(data.wind_speed_east_knots, data.wind_speed_north_knots), + test_size=0.1, + random_state=1, +) + +estimator = vd.Vector([vd.Spline(damping=1e-6, mindist=500e3) for i in range(2)]) + +# Fit on the training data +estimator.fit(*train) +# And score on the testing data. The best possible score is 1, meaning a perfect +# prediction of the test data. +score = estimator.score(*test) +print("Validation score (R²): {:.2f}".format(score)) + +# Interpolate the wind speed onto a regular geographic grid and mask the data +# that are outside of the convex hull of the data points. +dims = ["latitude", "longitude"] +grid = estimator.grid(region, spacing=20 / 60, projection=projection, dims=dims) +grid = vd.convexhull_mask(coordinates, grid=grid, projection=projection) + +spacing = 10 / 60 +step = 0.5 * spacing * 100e3 + +east_derivs = vd.Derivative(estimator, step=step, direction=(1, 0)).grid( + region, spacing=spacing, projection=projection, dims=dims +) +north_derivs = vd.Derivative(estimator, step=step, direction=(0, 1)).grid( + region, spacing=spacing, projection=projection, dims=dims +) + +divergence = east_derivs.east_component + north_derivs.north_component + +# Make maps of the original and gridded wind speed +plt.figure(figsize=(6, 6)) +ax = plt.axes(projection=ccrs.Mercator()) + +divergence.plot(ax=ax, transform=ccrs.PlateCarree()) + +tmp = ax.quiver( + data.longitude.values, + data.latitude.values, + data.wind_speed_east_knots.values, + data.wind_speed_north_knots.values, + width=0.003, + scale=100, + color="black", + transform=ccrs.PlateCarree(), +) + +# Use an utility function to add tick labels and land and ocean features to the map. +vd.datasets.setup_texas_wind_map(ax) +plt.tight_layout() +plt.show() diff --git a/verde/__init__.py b/verde/__init__.py index f80b3e34b..72681c0ac 100644 --- a/verde/__init__.py +++ b/verde/__init__.py @@ -14,6 +14,7 @@ pad_region, longitude_continuity, ) +from .derivative import Derivative from .mask import distance_mask, convexhull_mask from .utils import variance_to_weights, maxabs, grid_to_table from .io import load_surfer diff --git a/verde/derivative.py b/verde/derivative.py new file mode 100644 index 000000000..f13ed39e0 --- /dev/null +++ b/verde/derivative.py @@ -0,0 +1,94 @@ +""" +Utilities for calculating gradients of input data. +""" +import numpy as np + +from .base import BaseGridder +from .base.utils import check_data + + +class Derivative(BaseGridder): + """ + """ + + def __init__(self, estimator, step, direction): + super().__init__() + self.estimator = estimator + self.step = step + self.direction = direction + + @property + def region_(self): + "The region of the data used to fit this estimator." + return self.estimator.region_ + + def predict(self, coordinates): + """ + """ + direction = normalize_direction(self.direction) + if len(coordinates) != len(direction): + raise ValueError("Wrong number of dimensions") + forward_coords = tuple( + coordinate + component * self.step / 2 + for coordinate, component in zip(coordinates, direction) + ) + backward_coords = tuple( + coordinate - component * self.step / 2 + for coordinate, component in zip(coordinates, direction) + ) + forward = check_data(self.estimator.predict(forward_coords)) + backward = check_data(self.estimator.predict(backward_coords)) + derivative = tuple( + (fwd - back) / self.step for fwd, back in zip(forward, backward) + ) + if len(derivative) == 1: + derivative = derivative[0] + return derivative + + def fit(self, *args, **kwargs): + """ + Fit the estimator to + """ + self.estimator.fit(*args, **kwargs) + return self + + +def normalize_direction(direction): + """ + Transform the direction into a numpy array and normalize it. + + Parameters + ---------- + direction : list, tuple, or array + A 1D array representing a vector. + + Returns + ------- + normalized : array + The array divided by its norm, making it a unit vector. + + Examples + -------- + + >>> print(normalize_direction((1, 0))) + [1. 0.] + >>> print(normalize_direction((1.5, 0))) + [1. 0.] + >>> print(normalize_direction((2, 0))) + [1. 0.] + >>> print(normalize_direction((0, 2))) + [0. 1.] + >>> print(normalize_direction((0, 2, 0))) + [0. 1. 0.] + >>> print("{:.3f} {:.3f}".format(*normalize_direction((1, 1)))) + 0.707 0.707 + >>> print("{:.3f} {:.3f}".format(*normalize_direction((-2, 2)))) + -0.707 0.707 + + """ + # Casting to float is required for the division to work if the original + # numbers are integers. Since this is always small, it doesn't matter + # much. + direction = np.atleast_1d(direction).astype("float64") + direction /= np.linalg.norm(direction) + return direction diff --git a/verde/tests/test_derivative.py b/verde/tests/test_derivative.py new file mode 100644 index 000000000..ef2d3021e --- /dev/null +++ b/verde/tests/test_derivative.py @@ -0,0 +1,125 @@ +# pylint: disable=redefined-outer-name +""" +Test the Derivative estimator +""" +import numpy as np +import numpy.testing as npt +import pytest + +from ..derivative import Derivative +from ..trend import Trend +from ..vector import Vector +from ..chain import Chain +from ..coordinates import grid_coordinates + + +@pytest.fixture() +def polynomial(): + "A second degree polynomial data" + region = (0, 5000, -10000, -5000) + spacing = 50 + coordinates = grid_coordinates(region, spacing=spacing) + baselevel = 100 + east_coef = -0.5 + north_coef = 1.5 + data = ( + baselevel + east_coef * coordinates[0] ** 2 + north_coef * coordinates[1] ** 2 + ) + east_deriv = 2 * east_coef * coordinates[0] + north_deriv = 2 * north_coef * coordinates[1] + return coordinates, data, east_deriv, north_deriv + + +def test_gradient(polynomial): + "Check that the gradient of polynomial trend is estimated correctly" + coordinates, data, true_east_deriv, true_north_deriv = polynomial + + trend = Trend(degree=2).fit(coordinates, data) + east_deriv = Derivative(trend, step=10, direction=(1, 0)).predict(coordinates) + north_deriv = Derivative(trend, step=10, direction=(0, 1)).predict(coordinates) + + npt.assert_allclose(true_east_deriv, east_deriv, atol=1e-2) + npt.assert_allclose(true_north_deriv, north_deriv, atol=1e-2) + + +def test_gradient_fails_wrong_dimensions(polynomial): + "An error should be raised if dimensions of coordinates != directions" + coordinates, data = polynomial[:2] + + trend = Trend(degree=2).fit(coordinates, data) + with pytest.raises(ValueError): + Derivative(trend, step=10, direction=(1, 0, 1)).predict(coordinates) + with pytest.raises(ValueError): + Derivative(trend, step=10, direction=(1, 0)).predict((coordinates[0],)) + + +def test_gradient_grid(polynomial): + "Make sure the grid method works as expected" + coordinates, data, true_east_deriv = polynomial[:3] + + trend = Trend(degree=2).fit(coordinates, data) + deriv = Derivative(trend, step=10, direction=(1, 0)).grid(spacing=50) + + npt.assert_allclose(true_east_deriv, deriv.scalars.values, atol=1e-2) + + +def test_gradient_direction(polynomial): + "Check that the gradient in a range of directions" + coordinates, data, true_east_deriv, true_north_deriv = polynomial + trend = Trend(degree=2).fit(coordinates, data) + for azimuth in np.radians(np.linspace(0, 360, 60)): + direction = (np.sin(azimuth), np.cos(azimuth)) + true_deriv = true_east_deriv * direction[0] + true_north_deriv * direction[1] + deriv = Derivative(trend, step=10, direction=direction).predict(coordinates) + npt.assert_allclose(true_deriv, deriv, atol=1e-2) + + +def test_gradient_fit(polynomial): + "Check that calling fit on the Derivative works as expected" + coordinates, data, true_east_deriv, true_north_deriv = polynomial + + trend = Trend(degree=2) + east_deriv = ( + Derivative(trend, step=10, direction=(1, 0)) + .fit(coordinates, data) + .predict(coordinates) + ) + north_deriv = ( + Derivative(trend, step=10, direction=(0, 1)) + .fit(coordinates, data) + .predict(coordinates) + ) + + npt.assert_allclose(true_east_deriv, east_deriv, atol=1e-2) + npt.assert_allclose(true_north_deriv, north_deriv, atol=1e-2) + + +def test_gradient_vector(polynomial): + "Make sure putting Derivatives in a Vector works" + coordinates, data, true_east_deriv, true_north_deriv = polynomial + + trend = Trend(degree=2) + gradient = Vector( + [ + Derivative(trend, step=10, direction=(1, 0)), + Derivative(trend, step=10, direction=(0, 1)), + ] + ) + # This is wasteful because it fits the same trend twice so should not + # really be done in practice. + gradient.fit(coordinates, (data, data)) + deriv = gradient.grid(spacing=50) + + npt.assert_allclose(true_east_deriv, deriv.east_component.values, atol=1e-2) + npt.assert_allclose(true_north_deriv, deriv.north_component.values, atol=1e-2) + + +def test_gradient_chain(polynomial): + "Make sure putting Derivatives in a Chain works" + coordinates, data, true_east_deriv = polynomial[:3] + + trend = Trend(degree=2) + gradient = Chain([("east", Derivative(trend, step=10, direction=(1, 0)))]) + gradient.fit(coordinates, data) + deriv = gradient.predict(coordinates) + npt.assert_allclose(true_east_deriv, deriv, atol=1e-2)