Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a Derivative metagridder to predict derivatives #245

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions doc/api/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Composite Estimators

Chain
Vector
Derivative

Model Selection
---------------
Expand Down
36 changes: 36 additions & 0 deletions examples/gradient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Derivative calculations
=====================



"""
import matplotlib.pyplot as plt
import numpy as np
import verde as vd


data = vd.datasets.CheckerBoard(
region=(0, 5000, 0, 2500), w_east=5000, w_north=2500
).scatter(size=700, random_state=0)

spline = vd.Spline().fit((data.easting, data.northing), data.scalars)
east_deriv = vd.Derivative(spline, step=10, direction=(1, 0)).grid(spacing=50)
north_deriv = vd.Derivative(spline, step=10, direction=(0, 1)).grid(spacing=50)

fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(9, 10), sharex=True)

ax1.set_title("Original data")
tmp = ax1.scatter(data.easting, data.northing, c=data.scalars, cmap="RdBu_r")
plt.colorbar(tmp, ax=ax1, label="data unit")
ax1.set_ylabel("northing")

east_deriv.scalars.plot.pcolormesh(ax=ax2, cbar_kwargs=dict(label="data unit / m"))
ax2.set_title("East derivative")
ax2.set_xlabel("")

north_deriv.scalars.plot.pcolormesh(ax=ax3, cbar_kwargs=dict(label="data unit / m"))
ax3.set_title("North derivative")

fig.tight_layout()
plt.show()
77 changes: 77 additions & 0 deletions examples/vector_derivatives.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Derivatives of vector fields
============================
"""
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import numpy as np
import pyproj
import verde as vd


# Fetch the wind speed data from Texas.
data = vd.datasets.fetch_texas_wind()
# Separate out some of the data into utility variables
coordinates = (data.longitude.values, data.latitude.values)
region = vd.get_region(coordinates)
# Use a Mercator projection because Spline is a Cartesian gridder
projection = pyproj.Proj(proj="merc", lat_ts=data.latitude.mean())

# Split the data into a training and testing set. We'll fit the gridder on the
# training set and use the testing set to evaluate how well the gridder is
# performing.
train, test = vd.train_test_split(
coordinates=projection(*coordinates),
data=(data.wind_speed_east_knots, data.wind_speed_north_knots),
test_size=0.1,
random_state=1,
)

estimator = vd.Vector([vd.Spline(damping=1e-6, mindist=500e3) for i in range(2)])

# Fit on the training data
estimator.fit(*train)
# And score on the testing data. The best possible score is 1, meaning a perfect
# prediction of the test data.
score = estimator.score(*test)
print("Validation score (R²): {:.2f}".format(score))

# Interpolate the wind speed onto a regular geographic grid and mask the data
# that are outside of the convex hull of the data points.
dims = ["latitude", "longitude"]
grid = estimator.grid(region, spacing=20 / 60, projection=projection, dims=dims)
grid = vd.convexhull_mask(coordinates, grid=grid, projection=projection)

spacing = 10 / 60
step = 0.5 * spacing * 100e3

east_derivs = vd.Derivative(estimator, step=step, direction=(1, 0)).grid(
region, spacing=spacing, projection=projection, dims=dims
)
north_derivs = vd.Derivative(estimator, step=step, direction=(0, 1)).grid(
region, spacing=spacing, projection=projection, dims=dims
)

divergence = east_derivs.east_component + north_derivs.north_component

# Make maps of the original and gridded wind speed
plt.figure(figsize=(6, 6))
ax = plt.axes(projection=ccrs.Mercator())

divergence.plot(ax=ax, transform=ccrs.PlateCarree())

tmp = ax.quiver(
data.longitude.values,
data.latitude.values,
data.wind_speed_east_knots.values,
data.wind_speed_north_knots.values,
width=0.003,
scale=100,
color="black",
transform=ccrs.PlateCarree(),
)

# Use an utility function to add tick labels and land and ocean features to the map.
vd.datasets.setup_texas_wind_map(ax)
plt.tight_layout()
plt.show()
1 change: 1 addition & 0 deletions verde/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
pad_region,
longitude_continuity,
)
from .derivative import Derivative
from .mask import distance_mask, convexhull_mask
from .utils import variance_to_weights, maxabs, grid_to_table
from .io import load_surfer
Expand Down
94 changes: 94 additions & 0 deletions verde/derivative.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
Utilities for calculating gradients of input data.
"""
import numpy as np

from .base import BaseGridder
from .base.utils import check_data


class Derivative(BaseGridder):
"""
"""

def __init__(self, estimator, step, direction):
super().__init__()
self.estimator = estimator
self.step = step
self.direction = direction

@property
def region_(self):
"The region of the data used to fit this estimator."
return self.estimator.region_

def predict(self, coordinates):
"""
"""
direction = normalize_direction(self.direction)
if len(coordinates) != len(direction):
raise ValueError("Wrong number of dimensions")
forward_coords = tuple(
coordinate + component * self.step / 2
for coordinate, component in zip(coordinates, direction)
)
backward_coords = tuple(
coordinate - component * self.step / 2
for coordinate, component in zip(coordinates, direction)
)
forward = check_data(self.estimator.predict(forward_coords))
backward = check_data(self.estimator.predict(backward_coords))
derivative = tuple(
(fwd - back) / self.step for fwd, back in zip(forward, backward)
)
if len(derivative) == 1:
derivative = derivative[0]
return derivative

def fit(self, *args, **kwargs):
"""
Fit the estimator to
"""
self.estimator.fit(*args, **kwargs)
return self


def normalize_direction(direction):
"""
Transform the direction into a numpy array and normalize it.

Parameters
----------
direction : list, tuple, or array
A 1D array representing a vector.

Returns
-------
normalized : array
The array divided by its norm, making it a unit vector.

Examples
--------

>>> print(normalize_direction((1, 0)))
[1. 0.]
>>> print(normalize_direction((1.5, 0)))
[1. 0.]
>>> print(normalize_direction((2, 0)))
[1. 0.]
>>> print(normalize_direction((0, 2)))
[0. 1.]
>>> print(normalize_direction((0, 2, 0)))
[0. 1. 0.]
>>> print("{:.3f} {:.3f}".format(*normalize_direction((1, 1))))
0.707 0.707
>>> print("{:.3f} {:.3f}".format(*normalize_direction((-2, 2))))
-0.707 0.707

"""
# Casting to float is required for the division to work if the original
# numbers are integers. Since this is always small, it doesn't matter
# much.
direction = np.atleast_1d(direction).astype("float64")
direction /= np.linalg.norm(direction)
return direction
125 changes: 125 additions & 0 deletions verde/tests/test_derivative.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# pylint: disable=redefined-outer-name
"""
Test the Derivative estimator
"""
import numpy as np
import numpy.testing as npt
import pytest

from ..derivative import Derivative
from ..trend import Trend
from ..vector import Vector
from ..chain import Chain
from ..coordinates import grid_coordinates


@pytest.fixture()
def polynomial():
"A second degree polynomial data"
region = (0, 5000, -10000, -5000)
spacing = 50
coordinates = grid_coordinates(region, spacing=spacing)
baselevel = 100
east_coef = -0.5
north_coef = 1.5
data = (
baselevel + east_coef * coordinates[0] ** 2 + north_coef * coordinates[1] ** 2
)
east_deriv = 2 * east_coef * coordinates[0]
north_deriv = 2 * north_coef * coordinates[1]
return coordinates, data, east_deriv, north_deriv


def test_gradient(polynomial):
"Check that the gradient of polynomial trend is estimated correctly"
coordinates, data, true_east_deriv, true_north_deriv = polynomial

trend = Trend(degree=2).fit(coordinates, data)
east_deriv = Derivative(trend, step=10, direction=(1, 0)).predict(coordinates)
north_deriv = Derivative(trend, step=10, direction=(0, 1)).predict(coordinates)

npt.assert_allclose(true_east_deriv, east_deriv, atol=1e-2)
npt.assert_allclose(true_north_deriv, north_deriv, atol=1e-2)


def test_gradient_fails_wrong_dimensions(polynomial):
"An error should be raised if dimensions of coordinates != directions"
coordinates, data = polynomial[:2]

trend = Trend(degree=2).fit(coordinates, data)
with pytest.raises(ValueError):
Derivative(trend, step=10, direction=(1, 0, 1)).predict(coordinates)
with pytest.raises(ValueError):
Derivative(trend, step=10, direction=(1, 0)).predict((coordinates[0],))


def test_gradient_grid(polynomial):
"Make sure the grid method works as expected"
coordinates, data, true_east_deriv = polynomial[:3]

trend = Trend(degree=2).fit(coordinates, data)
deriv = Derivative(trend, step=10, direction=(1, 0)).grid(spacing=50)

npt.assert_allclose(true_east_deriv, deriv.scalars.values, atol=1e-2)


def test_gradient_direction(polynomial):
"Check that the gradient in a range of directions"
coordinates, data, true_east_deriv, true_north_deriv = polynomial
trend = Trend(degree=2).fit(coordinates, data)
for azimuth in np.radians(np.linspace(0, 360, 60)):
direction = (np.sin(azimuth), np.cos(azimuth))
true_deriv = true_east_deriv * direction[0] + true_north_deriv * direction[1]
deriv = Derivative(trend, step=10, direction=direction).predict(coordinates)
npt.assert_allclose(true_deriv, deriv, atol=1e-2)


def test_gradient_fit(polynomial):
"Check that calling fit on the Derivative works as expected"
coordinates, data, true_east_deriv, true_north_deriv = polynomial

trend = Trend(degree=2)
east_deriv = (
Derivative(trend, step=10, direction=(1, 0))
.fit(coordinates, data)
.predict(coordinates)
)
north_deriv = (
Derivative(trend, step=10, direction=(0, 1))
.fit(coordinates, data)
.predict(coordinates)
)

npt.assert_allclose(true_east_deriv, east_deriv, atol=1e-2)
npt.assert_allclose(true_north_deriv, north_deriv, atol=1e-2)


def test_gradient_vector(polynomial):
"Make sure putting Derivatives in a Vector works"
coordinates, data, true_east_deriv, true_north_deriv = polynomial

trend = Trend(degree=2)
gradient = Vector(
[
Derivative(trend, step=10, direction=(1, 0)),
Derivative(trend, step=10, direction=(0, 1)),
]
)
# This is wasteful because it fits the same trend twice so should not
# really be done in practice.
gradient.fit(coordinates, (data, data))
deriv = gradient.grid(spacing=50)

npt.assert_allclose(true_east_deriv, deriv.east_component.values, atol=1e-2)
npt.assert_allclose(true_north_deriv, deriv.north_component.values, atol=1e-2)


def test_gradient_chain(polynomial):
"Make sure putting Derivatives in a Chain works"
coordinates, data, true_east_deriv = polynomial[:3]

trend = Trend(degree=2)
gradient = Chain([("east", Derivative(trend, step=10, direction=(1, 0)))])
gradient.fit(coordinates, data)
deriv = gradient.predict(coordinates)
npt.assert_allclose(true_east_deriv, deriv, atol=1e-2)