From 6903568c372c55f9447ad0d71adb4b06b0f120c0 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh Date: Thu, 20 Sep 2018 15:53:06 -0400 Subject: [PATCH 01/37] First stab at writing functions to put obs op in BSR form. The influence functions are really big; with the savings in the background error covariance elsewhere in this package, they are the largest. For regional inversions, we only need a few weeks of transport before everything leaves the domain. This tries to keep the space savings in the inversion, not just on disk. --- src/inversion/observation_operator.py | 221 ++++++++++++++++++++++++++ src/inversion/tests.py | 123 ++++++++++++++ 2 files changed, 344 insertions(+) create mode 100644 src/inversion/observation_operator.py diff --git a/src/inversion/observation_operator.py b/src/inversion/observation_operator.py new file mode 100644 index 0000000..5c32f7b --- /dev/null +++ b/src/inversion/observation_operator.py @@ -0,0 +1,221 @@ +"""Utilities for working with influence function files. + +Designed for files dimensioned: +`observation_time, site, forecast_period, y, x`. + +Functions here turn these into BSR matrices dimensioned +`observation, time, y, x` or +`observation_time, site, time, y, x` +keeping the memory savings of the first form while still usable for +standard code. +""" +import numpy as np +from numpy import newaxis + +import scipy.sparse +import pandas as pd + +import xarray.core.utils +import xarray.core.variable + +xarray.core.variable.NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( + xarray.core.variable.NON_NUMPY_SUPPORTED_ARRAY_TYPES + + (scipy.sparse.spmatrix,) +) + + +def align_full_obs_op(obs_op): + """Align observation operator on flux_time. + + Parameters + ---------- + obs_op: xarray.DataArray + dims: observation_time, site, time_before_observation, y, x + coords: flux_time + time_before_obs monotone increasing + observation_time monotone decreasing + + Returns + ------- + obs_op_aligned: xarray.DataArray + dims: observation, flux + MultiIndices: + observation_time, site + flux_time, y, x + data backed by scipy.sparse.bsr_matrix + bsr_matrix does not support indexing + """ + flux_time = obs_op.coords["flux_time"] + + column_offset = np.arange(flux_time.shape[1])[::-1] + dt = abs(flux_time[0, 1] - flux_time[0, 0]) + earliest_flux = flux_time.min() + flux_time_index = pd.date_range( + earliest_flux.values, + flux_time.max().values, + freq="{interval:d}S".format( + interval=int( + dt.values / np.timedelta64(1, "s") + ) + )) + + # Find offset of first flux in each row + row_offset_start = np.asarray( + (flux_time[:, -1] - earliest_flux) / dt + ).astype(np.int64) + + # repeat row_offset_start for sites + y_index = obs_op.get_index("dim_y") + x_index = obs_op.get_index("dim_x") + + # obs_op.stack(observation=("observation_time", "site")) + obs_op = obs_op.stack(space=("dim_y", "dim_x")) + data = obs_op.transpose( + "observation_time", "time_before_observation", + "site", "space" + ).stack( + block_dim=("observation_time", "time_before_observation") + ).transpose( + "block_dim", "site", "space" + ) + + aligned_data = scipy.sparse.bsr_matrix( + (data.data, + (row_offset_start[:, newaxis] + + column_offset[newaxis, :]).reshape(-1), + np.arange(flux_time.shape[0] + 1) * flux_time.shape[1])) + # rows: obs_time, site stack + # cols: flux_time, space stack + aligned_data.ndim = 2 + + aligned_ds = xarray.DataArray( + aligned_data, + coords=dict( + observation=( + xarray.core.utils.multiindex_from_product_levels( + [obs_op.indexes["observation_time"], + obs_op.indexes["site"]], + names=["forecast_reference_time", "site"] + ) + ), + flux_state_space=( + xarray.core.utils.multiindex_from_product_levels( + [flux_time_index, + y_index, + x_index], + names=["time", "dim_y", "dim_x"] + ) + ), + ), + dims=("observation", "flux_state_space"), + name=obs_op.name, + attrs=obs_op.attrs, + encoding=obs_op.encoding, + ) + + for coord_name in obs_op.coords: + # I already have some coords + # Coords for dimensions don't carry over + # I've already taken care of time dims + if (((coord_name not in aligned_ds.coords and + coord_name not in obs_op.indexes) and + "time" not in coord_name)): + aligned_ds.coords[coord_name] = obs_op.coords[coord_name] + return aligned_ds + + +def align_partial_obs_op(obs_op): + """Align observation operator on flux_time. + + Parameters + ---------- + obs_op: xarray.DataArray + dims: observation, time_before_obs, y, x + coords: flux_time, observation_time + time_before_obs monotone increasing + observation_time monotone decreasing + only one dim each with 'y' and 'x' + obs_op[-1, -1] is earliest influence map + + Returns + ------- + obs_op_aligned: xarray.DataArray + dims: observation, flux + MultiIndices: + observation_time, site + flux_time, y, x + data backed by scipy.sparse.bsr_matrix + bsr_matrix does not support indexing + """ + flux_time = obs_op.coords["flux_time"] + + column_offset = np.arange(flux_time.shape[1])[::-1] + dt = abs(flux_time[0, 1] - flux_time[0, 0]) + earliest_flux = flux_time.min() + + x_index_name = [name for name in obs_op.dims if "x" in name][0] + y_index_name = [name for name in obs_op.dims if "y" in name][0] + flux_time_index = pd.date_range( + earliest_flux.values, + flux_time.max().values, + freq="{interval:d}S".format( + interval=int( + dt.values / np.timedelta64(1, "s") + ) + )) + x_index = obs_op.get_index(x_index_name) + y_index = obs_op.get_index(y_index_name) + + # Find offset of first flux in each row + row_offset_start = np.asarray( + (flux_time[:, -1] - earliest_flux) / dt + ).astype(np.int64) + + # repeat row_offset_start for sites + + # obs_op.stack(observation=("observation_time", "site")) + obs_op = obs_op.stack(space=(y_index_name, x_index_name)) + data = obs_op.transpose( + "observation", "time_before_observation", + "space" + ).stack( + block_dim=("observation", "time_before_observation") + ).expand_dims( + "block_extra_dim", 1 + ).transpose( + "block_dim", "block_extra_dim", "space" + ) + + aligned_data = scipy.sparse.bsr_matrix( + (data.data, + (row_offset_start[:, newaxis] + column_offset[newaxis, :]).flat, + np.arange(flux_time.shape[0] + 1) * flux_time.shape[1])) + # rows: obs_time, site stack + # cols: flux_time, space stack + aligned_data.ndim = 2 + + col_index = xarray.core.utils.multiindex_from_product_levels( + (flux_time_index, + y_index, x_index), + ("flux_time", y_index_name, x_index_name)) + + aligned_ds = xarray.DataArray( + aligned_data, + dict( + observation=obs_op.indexes["observation"], + flux_state_space=col_index, + ), + ("observation", "flux_state_space"), + obs_op.name, + obs_op.attrs, + obs_op.encoding, + ) + for coord_name in obs_op.coords: + # I already have some coords + # Dim coords don't carry over + if ((coord_name not in aligned_ds.coords and + coord_name not in obs_op.indexes and + "time" not in coord_name)): + aligned_ds.coords[coord_name] = obs_op.coords[coord_name] + return aligned_ds + diff --git a/src/inversion/tests.py b/src/inversion/tests.py index b537bae..db99a82 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -2900,5 +2900,128 @@ def test_harder(self): [43, 46, 48]]) +class TestObsOpAligners(unittest2.TestCase): + """Test the alignment fucntions. + + Ideally checks both that it runs and that it gives correct answer. + + Depends on unreleased xarray changes. + """ + + def test_align_full(self): + """Test aligning the full influence function. + + The input is the influence function straignt from the file. + """ + import xarray + import pandas as pd + import inversion.observation_operator + N_OBS_TIMES = 20 + N_SITES = 5 + FORECAST_LENGTH = 10 + NY = 5 + NX = 5 + + ds = xarray.Dataset( + dict(influence_function=( + ("observation_time", + "site", + "time_before_observation", + "dim_y", + "dim_x"), + np.ones((N_OBS_TIMES, N_SITES, + FORECAST_LENGTH, NY, NX))) + ), + dict( + observation_time=pd.date_range( + "2010-01-01", periods=N_OBS_TIMES, freq="1H"), + site=range(N_SITES), + time_before_observation=pd.timedelta_range( + 0, periods=FORECAST_LENGTH, freq="6H"), + projection_y_coordinate=np.linspace(0, 2100, NY), + projection_x_coordinate=np.linspace(0, 2100, NX), + ), + ) + flux_time = ( + np.floor( + (ds.coords["observation_time"] - + ds.coords["time_before_observation"] - + np.array("2010-01-01", dtype="M8[ns]")) / + np.array(6, dtype="m8[h]").astype("m8[ns]")) * + np.array(6, dtype="m8[h]").astype("m8[ns]") + + np.array("2010-01-01", dtype="M8[ns]")) + ds.coords["flux_time"] = flux_time + + aligned_ds = ( + inversion.observation_operator.align_full_obs_op( + ds.influence_function)) + + aligned_data = aligned_ds.data + self.assertIsInstance(aligned_data, scipy.sparse.bsr_matrix) + self.assertEqual(aligned_data.blocksize, (N_SITES, NY * NX)) + np_tst.assert_allclose(aligned_data.data, 1) + np_tst.assert_allclose(np.diff(aligned_data.indptr), FORECAST_LENGTH) + + def test_align_partial(self): + """Test aligning the full influence function. + + The input is the influence function straignt from the file. + """ + import xarray + import pandas as pd + import inversion.observation_operator + N_OBS_TIMES = 20 + N_SITES = 5 + FORECAST_LENGTH = 10 + NY = 5 + NX = 5 + + ds = xarray.Dataset( + dict(influence_function=( + ("observation_time", + "site", + "time_before_observation", + "dim_y", + "dim_x"), + np.ones((N_OBS_TIMES, N_SITES, + FORECAST_LENGTH, NY, NX))) + ), + dict( + observation_time=pd.date_range( + "2010-01-01", periods=N_OBS_TIMES, freq="1H"), + site=range(N_SITES), + time_before_observation=pd.timedelta_range( + 0, periods=FORECAST_LENGTH, freq="6H"), + projection_y_coordinate=np.linspace(0, 2100, NY), + projection_x_coordinate=np.linspace(0, 2100, NX), + ), + ) + flux_time = ( + np.floor( + (ds.coords["observation_time"] - + ds.coords["time_before_observation"] - + np.array("2010-01-01", dtype="M8[ns]")) / + np.array(6, dtype="m8[h]").astype("m8[ns]")) * + np.array(6, dtype="m8[h]").astype("m8[ns]") + + np.array("2010-01-01", dtype="M8[ns]")) + ds.coords["flux_time"] = flux_time + + aligned_ds = ( + inversion.observation_operator.align_partial_obs_op( + ds.influence_function.isel_points( + dim="observation", + site=range(N_SITES), + observation_time=range(0, 3 * N_SITES, 3), + ).set_index(observation=["observation_time", "site"]) + ) + ) + + aligned_data = aligned_ds.data + self.assertIsInstance(aligned_data, scipy.sparse.bsr_matrix) + self.assertEqual(aligned_data.blocksize, (N_SITES, NY * NX)) + np_tst.assert_allclose(aligned_data.data, 1) + np_tst.assert_allclose(np.diff(aligned_data.indptr), FORECAST_LENGTH) + + if __name__ == "__main__": unittest2.main() From b55e4bc29db88a3594512fa5f4aa5294aa5c66b1 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Sun, 18 Nov 2018 23:08:08 -0500 Subject: [PATCH 02/37] Mark tests for new features as expected failures. This feature needs XArray support for passing bsr_matrix and for stacking previously-stacked dimensions. --- src/inversion/tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index db99a82..f88a602 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -2908,6 +2908,8 @@ class TestObsOpAligners(unittest2.TestCase): Depends on unreleased xarray changes. """ + # xarray only supports ndarrays and dask arrays + @unittest2.expectedFailure def test_align_full(self): """Test aligning the full influence function. @@ -2962,6 +2964,8 @@ def test_align_full(self): np_tst.assert_allclose(aligned_data.data, 1) np_tst.assert_allclose(np.diff(aligned_data.indptr), FORECAST_LENGTH) + # xarray doesn't support calling stack on a stacked dimension + @unittest2.expectedFailure def test_align_partial(self): """Test aligning the full influence function. From e795464ace1c373bdaf81efb576cab1a102db4af Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Mon, 4 Mar 2019 17:42:31 -0500 Subject: [PATCH 03/37] Return the influence functions as bsr_matrix directly, rather than wrapping. This avoids problems in trying to patch XArray in tox. I also got the tests passing, working around the problems with stacking a stacked dimension. I still have to test that everything's in the right place. --- src/inversion/observation_operator.py | 134 ++++++++++++++------------ src/inversion/tests.py | 12 +-- 2 files changed, 75 insertions(+), 71 deletions(-) diff --git a/src/inversion/observation_operator.py b/src/inversion/observation_operator.py index 5c32f7b..705e46f 100644 --- a/src/inversion/observation_operator.py +++ b/src/inversion/observation_operator.py @@ -8,6 +8,12 @@ `observation_time, site, time, y, x` keeping the memory savings of the first form while still usable for standard code. + +I should mention at some point that "influence function" and +"observation operator" refer to the same quantity, represented by +:math:`H` in the equations. This is used to represent which fluxes +influence each observation, as well as what observations would be +expected given a specific set of fluxes. """ import numpy as np from numpy import newaxis @@ -88,40 +94,40 @@ def align_full_obs_op(obs_op): # cols: flux_time, space stack aligned_data.ndim = 2 - aligned_ds = xarray.DataArray( - aligned_data, - coords=dict( - observation=( - xarray.core.utils.multiindex_from_product_levels( - [obs_op.indexes["observation_time"], - obs_op.indexes["site"]], - names=["forecast_reference_time", "site"] - ) - ), - flux_state_space=( - xarray.core.utils.multiindex_from_product_levels( - [flux_time_index, - y_index, - x_index], - names=["time", "dim_y", "dim_x"] - ) - ), - ), - dims=("observation", "flux_state_space"), - name=obs_op.name, - attrs=obs_op.attrs, - encoding=obs_op.encoding, - ) - - for coord_name in obs_op.coords: - # I already have some coords - # Coords for dimensions don't carry over - # I've already taken care of time dims - if (((coord_name not in aligned_ds.coords and - coord_name not in obs_op.indexes) and - "time" not in coord_name)): - aligned_ds.coords[coord_name] = obs_op.coords[coord_name] - return aligned_ds + # aligned_ds = xarray.DataArray( + # aligned_data, + # coords=dict( + # observation=( + # xarray.core.utils.multiindex_from_product_levels( + # [obs_op.indexes["observation_time"], + # obs_op.indexes["site"]], + # names=["forecast_reference_time", "site"] + # ) + # ), + # flux_state_space=( + # xarray.core.utils.multiindex_from_product_levels( + # [flux_time_index, + # y_index, + # x_index], + # names=["time", "dim_y", "dim_x"] + # ) + # ), + # ), + # dims=("observation", "flux_state_space"), + # name=obs_op.name, + # attrs=obs_op.attrs, + # encoding=obs_op.encoding, + # ) + + # for coord_name in obs_op.coords: + # # I already have some coords + # # Coords for dimensions don't carry over + # # I've already taken care of time dims + # if (((coord_name not in aligned_ds.coords and + # coord_name not in obs_op.indexes) and + # "time" not in coord_name)): + # aligned_ds.coords[coord_name] = obs_op.coords[coord_name] + return aligned_data def align_partial_obs_op(obs_op): @@ -178,44 +184,48 @@ def align_partial_obs_op(obs_op): data = obs_op.transpose( "observation", "time_before_observation", "space" - ).stack( - block_dim=("observation", "time_before_observation") ).expand_dims( "block_extra_dim", 1 ).transpose( - "block_dim", "block_extra_dim", "space" + "observation", "time_before_observation", "block_extra_dim", "space" ) + # .stack( + # block_dim=("observation", "time_before_observation") + # ) + n_obs = len(obs_op.indexes["observation"]) + n_times_back = len(obs_op.indexes["time_before_observation"]) + n_space = len(y_index) * len(x_index) aligned_data = scipy.sparse.bsr_matrix( - (data.data, + (data.data.reshape(n_obs * n_times_back, 1, n_space), (row_offset_start[:, newaxis] + column_offset[newaxis, :]).flat, np.arange(flux_time.shape[0] + 1) * flux_time.shape[1])) # rows: obs_time, site stack # cols: flux_time, space stack aligned_data.ndim = 2 - col_index = xarray.core.utils.multiindex_from_product_levels( - (flux_time_index, - y_index, x_index), - ("flux_time", y_index_name, x_index_name)) - - aligned_ds = xarray.DataArray( - aligned_data, - dict( - observation=obs_op.indexes["observation"], - flux_state_space=col_index, - ), - ("observation", "flux_state_space"), - obs_op.name, - obs_op.attrs, - obs_op.encoding, - ) - for coord_name in obs_op.coords: - # I already have some coords - # Dim coords don't carry over - if ((coord_name not in aligned_ds.coords and - coord_name not in obs_op.indexes and - "time" not in coord_name)): - aligned_ds.coords[coord_name] = obs_op.coords[coord_name] - return aligned_ds + # col_index = xarray.core.utils.multiindex_from_product_levels( + # (flux_time_index, + # y_index, x_index), + # ("flux_time", y_index_name, x_index_name)) + + # aligned_ds = xarray.DataArray( + # aligned_data, + # dict( + # observation=obs_op.indexes["observation"], + # flux_state_space=col_index, + # ), + # ("observation", "flux_state_space"), + # obs_op.name, + # obs_op.attrs, + # obs_op.encoding, + # ) + # for coord_name in obs_op.coords: + # # I already have some coords + # # Dim coords don't carry over + # if ((coord_name not in aligned_ds.coords and + # coord_name not in obs_op.indexes and + # "time" not in coord_name)): + # aligned_ds.coords[coord_name] = obs_op.coords[coord_name] + return aligned_data diff --git a/src/inversion/tests.py b/src/inversion/tests.py index f88a602..1ab4dcf 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -2908,8 +2908,6 @@ class TestObsOpAligners(unittest2.TestCase): Depends on unreleased xarray changes. """ - # xarray only supports ndarrays and dask arrays - @unittest2.expectedFailure def test_align_full(self): """Test aligning the full influence function. @@ -2954,18 +2952,15 @@ def test_align_full(self): np.array("2010-01-01", dtype="M8[ns]")) ds.coords["flux_time"] = flux_time - aligned_ds = ( + aligned_data = ( inversion.observation_operator.align_full_obs_op( ds.influence_function)) - aligned_data = aligned_ds.data self.assertIsInstance(aligned_data, scipy.sparse.bsr_matrix) self.assertEqual(aligned_data.blocksize, (N_SITES, NY * NX)) np_tst.assert_allclose(aligned_data.data, 1) np_tst.assert_allclose(np.diff(aligned_data.indptr), FORECAST_LENGTH) - # xarray doesn't support calling stack on a stacked dimension - @unittest2.expectedFailure def test_align_partial(self): """Test aligning the full influence function. @@ -3010,7 +3005,7 @@ def test_align_partial(self): np.array("2010-01-01", dtype="M8[ns]")) ds.coords["flux_time"] = flux_time - aligned_ds = ( + aligned_data = ( inversion.observation_operator.align_partial_obs_op( ds.influence_function.isel_points( dim="observation", @@ -3020,9 +3015,8 @@ def test_align_partial(self): ) ) - aligned_data = aligned_ds.data self.assertIsInstance(aligned_data, scipy.sparse.bsr_matrix) - self.assertEqual(aligned_data.blocksize, (N_SITES, NY * NX)) + self.assertEqual(aligned_data.blocksize, (1, NY * NX)) np_tst.assert_allclose(aligned_data.data, 1) np_tst.assert_allclose(np.diff(aligned_data.indptr), FORECAST_LENGTH) From e9fbff8471375fd6eb74542f291fcb3f070c50f2 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Mon, 4 Mar 2019 18:14:20 -0500 Subject: [PATCH 04/37] Test that the bsr matrix represents things in the right places. --- src/inversion/observation_operator.py | 39 +++++++++--------- src/inversion/tests.py | 59 +++++++++++++++++++++++---- 2 files changed, 69 insertions(+), 29 deletions(-) diff --git a/src/inversion/observation_operator.py b/src/inversion/observation_operator.py index 705e46f..953c371 100644 --- a/src/inversion/observation_operator.py +++ b/src/inversion/observation_operator.py @@ -19,7 +19,6 @@ from numpy import newaxis import scipy.sparse -import pandas as pd import xarray.core.utils import xarray.core.variable @@ -56,14 +55,14 @@ def align_full_obs_op(obs_op): column_offset = np.arange(flux_time.shape[1])[::-1] dt = abs(flux_time[0, 1] - flux_time[0, 0]) earliest_flux = flux_time.min() - flux_time_index = pd.date_range( - earliest_flux.values, - flux_time.max().values, - freq="{interval:d}S".format( - interval=int( - dt.values / np.timedelta64(1, "s") - ) - )) + # flux_time_index = pd.date_range( + # earliest_flux.values, + # flux_time.max().values, + # freq="{interval:d}S".format( + # interval=int( + # dt.values / np.timedelta64(1, "s") + # ) + # )) # Find offset of first flux in each row row_offset_start = np.asarray( @@ -71,8 +70,8 @@ def align_full_obs_op(obs_op): ).astype(np.int64) # repeat row_offset_start for sites - y_index = obs_op.get_index("dim_y") - x_index = obs_op.get_index("dim_x") + # y_index = obs_op.get_index("dim_y") + # x_index = obs_op.get_index("dim_x") # obs_op.stack(observation=("observation_time", "site")) obs_op = obs_op.stack(space=("dim_y", "dim_x")) @@ -159,16 +158,16 @@ def align_partial_obs_op(obs_op): dt = abs(flux_time[0, 1] - flux_time[0, 0]) earliest_flux = flux_time.min() - x_index_name = [name for name in obs_op.dims if "x" in name][0] y_index_name = [name for name in obs_op.dims if "y" in name][0] - flux_time_index = pd.date_range( - earliest_flux.values, - flux_time.max().values, - freq="{interval:d}S".format( - interval=int( - dt.values / np.timedelta64(1, "s") - ) - )) + x_index_name = y_index_name.replace("y", "x") + # flux_time_index = pd.date_range( + # earliest_flux.values, + # flux_time.max().values, + # freq="{interval:d}S".format( + # interval=int( + # dt.values / np.timedelta64(1, "s") + # ) + # )) x_index = obs_op.get_index(x_index_name) y_index = obs_op.get_index(y_index_name) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 1ab4dcf..2ed61fb 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -2930,8 +2930,8 @@ def test_align_full(self): "dim_y", "dim_x"), np.ones((N_OBS_TIMES, N_SITES, - FORECAST_LENGTH, NY, NX))) - ), + FORECAST_LENGTH, NY, NX)) + )), dict( observation_time=pd.date_range( "2010-01-01", periods=N_OBS_TIMES, freq="1H"), @@ -2961,6 +2961,26 @@ def test_align_full(self): np_tst.assert_allclose(aligned_data.data, 1) np_tst.assert_allclose(np.diff(aligned_data.indptr), FORECAST_LENGTH) + xarray_aligned_ds = xarray.concat( + [ + here_infl.set_index( + time_before_observation="flux_time" + ).rename( + dict(time_before_observation="flux_time") + ) + for here_infl in ds["influence_function"] + ], + "observation_time" + ).fillna(0) + + np_tst.assert_allclose( + aligned_data.toarray(), + xarray_aligned_ds.stack( + observation=["observation_time", "site"], + fluxes=["flux_time", "dim_y", "dim_x"] + ).values + ) + def test_align_partial(self): """Test aligning the full influence function. @@ -2983,8 +3003,8 @@ def test_align_partial(self): "dim_y", "dim_x"), np.ones((N_OBS_TIMES, N_SITES, - FORECAST_LENGTH, NY, NX))) - ), + FORECAST_LENGTH, NY, NX)) + )), dict( observation_time=pd.date_range( "2010-01-01", periods=N_OBS_TIMES, freq="1H"), @@ -3005,13 +3025,15 @@ def test_align_partial(self): np.array("2010-01-01", dtype="M8[ns]")) ds.coords["flux_time"] = flux_time + ds_to_test = ds.influence_function.isel_points( + dim="observation", + site=range(N_SITES), + observation_time=range(0, 3 * N_SITES, 3), + ).set_index(observation=["observation_time", "site"]) + aligned_data = ( inversion.observation_operator.align_partial_obs_op( - ds.influence_function.isel_points( - dim="observation", - site=range(N_SITES), - observation_time=range(0, 3 * N_SITES, 3), - ).set_index(observation=["observation_time", "site"]) + ds_to_test ) ) @@ -3020,6 +3042,25 @@ def test_align_partial(self): np_tst.assert_allclose(aligned_data.data, 1) np_tst.assert_allclose(np.diff(aligned_data.indptr), FORECAST_LENGTH) + xarray_aligned_ds = xarray.concat( + [ + here_infl.set_index( + time_before_observation="flux_time" + ).rename( + dict(time_before_observation="flux_time") + ) + for here_infl in ds_to_test + ], + "observation" + ).fillna(0) + + np_tst.assert_allclose( + aligned_data.toarray(), + xarray_aligned_ds.stack( + fluxes=["flux_time", "dim_y", "dim_x"] + ).values + ) + if __name__ == "__main__": unittest2.main() From d9dab0b33c77ab0d7fb821aa2d7a35d5118ed498 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Mon, 4 Mar 2019 18:15:59 -0500 Subject: [PATCH 05/37] Remove code for DataArray, since support for that was dropped. --- src/inversion/observation_operator.py | 79 --------------------------- 1 file changed, 79 deletions(-) diff --git a/src/inversion/observation_operator.py b/src/inversion/observation_operator.py index 953c371..741ef4d 100644 --- a/src/inversion/observation_operator.py +++ b/src/inversion/observation_operator.py @@ -55,14 +55,6 @@ def align_full_obs_op(obs_op): column_offset = np.arange(flux_time.shape[1])[::-1] dt = abs(flux_time[0, 1] - flux_time[0, 0]) earliest_flux = flux_time.min() - # flux_time_index = pd.date_range( - # earliest_flux.values, - # flux_time.max().values, - # freq="{interval:d}S".format( - # interval=int( - # dt.values / np.timedelta64(1, "s") - # ) - # )) # Find offset of first flux in each row row_offset_start = np.asarray( @@ -70,10 +62,6 @@ def align_full_obs_op(obs_op): ).astype(np.int64) # repeat row_offset_start for sites - # y_index = obs_op.get_index("dim_y") - # x_index = obs_op.get_index("dim_x") - - # obs_op.stack(observation=("observation_time", "site")) obs_op = obs_op.stack(space=("dim_y", "dim_x")) data = obs_op.transpose( "observation_time", "time_before_observation", @@ -93,39 +81,6 @@ def align_full_obs_op(obs_op): # cols: flux_time, space stack aligned_data.ndim = 2 - # aligned_ds = xarray.DataArray( - # aligned_data, - # coords=dict( - # observation=( - # xarray.core.utils.multiindex_from_product_levels( - # [obs_op.indexes["observation_time"], - # obs_op.indexes["site"]], - # names=["forecast_reference_time", "site"] - # ) - # ), - # flux_state_space=( - # xarray.core.utils.multiindex_from_product_levels( - # [flux_time_index, - # y_index, - # x_index], - # names=["time", "dim_y", "dim_x"] - # ) - # ), - # ), - # dims=("observation", "flux_state_space"), - # name=obs_op.name, - # attrs=obs_op.attrs, - # encoding=obs_op.encoding, - # ) - - # for coord_name in obs_op.coords: - # # I already have some coords - # # Coords for dimensions don't carry over - # # I've already taken care of time dims - # if (((coord_name not in aligned_ds.coords and - # coord_name not in obs_op.indexes) and - # "time" not in coord_name)): - # aligned_ds.coords[coord_name] = obs_op.coords[coord_name] return aligned_data @@ -160,14 +115,6 @@ def align_partial_obs_op(obs_op): y_index_name = [name for name in obs_op.dims if "y" in name][0] x_index_name = y_index_name.replace("y", "x") - # flux_time_index = pd.date_range( - # earliest_flux.values, - # flux_time.max().values, - # freq="{interval:d}S".format( - # interval=int( - # dt.values / np.timedelta64(1, "s") - # ) - # )) x_index = obs_op.get_index(x_index_name) y_index = obs_op.get_index(y_index_name) @@ -188,9 +135,6 @@ def align_partial_obs_op(obs_op): ).transpose( "observation", "time_before_observation", "block_extra_dim", "space" ) - # .stack( - # block_dim=("observation", "time_before_observation") - # ) n_obs = len(obs_op.indexes["observation"]) n_times_back = len(obs_op.indexes["time_before_observation"]) n_space = len(y_index) * len(x_index) @@ -203,28 +147,5 @@ def align_partial_obs_op(obs_op): # cols: flux_time, space stack aligned_data.ndim = 2 - # col_index = xarray.core.utils.multiindex_from_product_levels( - # (flux_time_index, - # y_index, x_index), - # ("flux_time", y_index_name, x_index_name)) - - # aligned_ds = xarray.DataArray( - # aligned_data, - # dict( - # observation=obs_op.indexes["observation"], - # flux_state_space=col_index, - # ), - # ("observation", "flux_state_space"), - # obs_op.name, - # obs_op.attrs, - # obs_op.encoding, - # ) - # for coord_name in obs_op.coords: - # # I already have some coords - # # Dim coords don't carry over - # if ((coord_name not in aligned_ds.coords and - # coord_name not in obs_op.indexes and - # "time" not in coord_name)): - # aligned_ds.coords[coord_name] = obs_op.coords[coord_name] return aligned_data From bc9045014d4b79c9236719c63478335761edb27a Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Mon, 4 Mar 2019 18:16:46 -0500 Subject: [PATCH 06/37] Ignore additional testing artifacts. --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 76472ad..262bd9d 100644 --- a/.gitignore +++ b/.gitignore @@ -9,7 +9,7 @@ build/ # Test artifacts .tox/ -.coverage +.coverage* *_flymake.py .mypy_cache From 0a471c64e84824ac445ed29714cd30567f8414aa Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Mon, 4 Mar 2019 19:43:25 -0500 Subject: [PATCH 07/37] Add a function to calculate the product of a separable covariance and a BSR obs op. Only the most basic of tests at the moment. I don't feel like creating a full bsr_matrix from scratch at the moment. --- src/inversion/tests.py | 28 +++++++++++++++++++ src/inversion/util.py | 62 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 2ed61fb..d77ae9f 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -3062,5 +3062,33 @@ def test_align_partial(self): ) +class TestYMKronBSRQuadraticForm(unittest2.TestCase): + """Test ym_kronecker_quadratic_form_bsr.""" + + def test_empty(self): + """Test on an empty matrix.""" + mat1 = scipy.linalg.toeplitz([1, .5, .25]) + corr_fun = ( + inversion.correlations.ExponentialCorrelation(2)) + corr_op = ( + inversion.correlations.HomogeneousIsotropicCorrelation + .from_function(corr_fun, (4, 5))) + + full_op = inversion.linalg.DaskKroneckerProductOperator( + mat1, corr_op) + + obs_op = scipy.sparse.bsr_matrix( + (12, full_op.shape[0]), + blocksize=(3, corr_op.shape[0]), + dtype=DTYPE) + + result = inversion.util.ym_kronecker_quadratic_form_bsr( + obs_op, full_op) + + np_tst.assert_allclose(result, 0) + self.assertSequenceEqual( + result.shape, (obs_op.shape[0], obs_op.shape[0])) + + if __name__ == "__main__": unittest2.main() diff --git a/src/inversion/util.py b/src/inversion/util.py index 7b23da6..db523d6 100644 --- a/src/inversion/util.py +++ b/src/inversion/util.py @@ -8,7 +8,7 @@ import functools import numpy as np -from numpy import atleast_1d, atleast_2d +from numpy import atleast_1d, atleast_2d, einsum from scipy.sparse.linalg import LinearOperator import dask.array as da @@ -216,3 +216,63 @@ def wrapper(background, background_covariance, return analysis_estimate, analysis_covariance return wrapper + + +def ym_kronecker_quadratic_form_bsr(matrix, operator): + """Calculate matrix @ operator @ matrix.T. + + Parameters + ---------- + matrix: scipy.sparse.bsr_matrix + operator: inversion.linalg.DaskKroneckerProductOperator + + Returns + ------- + array_like + + Raises + ------ + ValueError + If the block sizes do not match + """ + result_side = matrix.shape[0] + block_size = matrix.blocksize[1] + sites_per_block = matrix.blocksize[0] + + left_operator = operator._operator1 + right_operator = operator._operator2 + n_blocks = left_operator.shape[1] + + if right_operator.shape[1] != block_size: + raise ValueError("Block sizes do not match") + + result = np.empty((result_side, result_side), + dtype=matrix.dtype, + order="F") + + row_start = matrix.indptr + columns_for_row = matrix.indices + data = matrix.data + + for i_block in range(result_side // sites_per_block): + i_row = sites_per_block * i_block + this_row_start = row_start[i_block] + next_row_start = row_start[i_block + 1] + + relevant_left_columns = left_operator[ + :, + columns_for_row[this_row_start:next_row_start] + ] + relevant_blocks_of_matrix = data[this_row_start:next_row_start] + result[:, i_row:i_row + sites_per_block] = matrix.dot( + right_operator.dot( + einsum( + "ij,jlk->kil", + relevant_left_columns, + relevant_blocks_of_matrix, + order="F" + ).reshape(block_size, n_blocks * sites_per_block) + ).reshape(block_size * n_blocks, sites_per_block) + ) + + return result From e044a6319ba11639dc3706098dcdd31abec2ffe1 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Tue, 5 Mar 2019 11:39:16 -0500 Subject: [PATCH 08/37] Test that the new quadratic form function is doing the right thing. Matrix of all ones; a reshaped 'arange', and random. Also get the function to actually do the right thing. I trust it a bunch more now. --- src/inversion/tests.py | 110 +++++++++++++++++++++++++++++++++++++++++ src/inversion/util.py | 12 ++++- 2 files changed, 120 insertions(+), 2 deletions(-) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index d77ae9f..05a1ff7 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -3089,6 +3089,116 @@ def test_empty(self): self.assertSequenceEqual( result.shape, (obs_op.shape[0], obs_op.shape[0])) + def test_simple(self): + """Test on a matrix of ones.""" + mat1 = scipy.linalg.toeplitz([1, .5, .25, .125, .0625]) + corr_fun = ( + inversion.correlations.ExponentialCorrelation(2)) + corr_op = ( + inversion.correlations.HomogeneousIsotropicCorrelation + .from_function(corr_fun, (4, 5))) + + full_op = inversion.linalg.DaskKroneckerProductOperator( + mat1, corr_op) + + data = np.ones((8, 3, corr_op.shape[0]), dtype=DTYPE) + indices = np.array((0, 1, 1, 2, 2, 3, 3, 4), dtype=int) + indptr = np.array((0, 2, 4, 6, 8)) + obs_op = scipy.sparse.bsr_matrix((data, indices, indptr)) + obs_mat = obs_op.toarray() + + result = inversion.util.ym_kronecker_quadratic_form_bsr( + obs_op, full_op) + + np_tst.assert_allclose( + result, + result.T + ) + np_tst.assert_allclose( + result, + obs_op.dot(full_op.dot(obs_mat.T)) + ) + self.assertSequenceEqual( + result.shape, (obs_op.shape[0], obs_op.shape[0])) + + def test_harder(self): + """Test on a more complicated matrix.""" + mat1 = scipy.linalg.toeplitz([1, .5, .25, .125, .0625]) + corr_fun = ( + inversion.correlations.ExponentialCorrelation(2)) + corr_op = ( + inversion.correlations.HomogeneousIsotropicCorrelation + .from_function(corr_fun, (4, 5))) + + full_op = inversion.linalg.DaskKroneckerProductOperator( + mat1, corr_op) + + data = np.arange(24 * corr_op.shape[0], dtype=DTYPE).reshape( + (8, 3, corr_op.shape[0])) + indices = np.array((0, 1, 1, 2, 2, 3, 3, 4), dtype=int) + indptr = np.array((0, 2, 4, 6, 8)) + obs_op = scipy.sparse.bsr_matrix((data, indices, indptr)) + obs_mat = obs_op.toarray() + + result = inversion.util.ym_kronecker_quadratic_form_bsr( + obs_op, full_op) + + np_tst.assert_allclose( + result, + result.T + ) + np_tst.assert_allclose( + result, + obs_op.dot(full_op.dot(obs_mat.T)) + ) + + data = np.arange(24 * corr_op.shape[0], dtype=DTYPE).reshape( + (8, 3, corr_op.shape[0]), order="F") + obs_op = scipy.sparse.bsr_matrix((data, indices, indptr)) + obs_mat = obs_op.toarray() + result = inversion.util.ym_kronecker_quadratic_form_bsr( + obs_op, full_op) + + np_tst.assert_allclose( + result, + result.T + ) + np_tst.assert_allclose( + result, + obs_op.dot(full_op.dot(obs_mat.T)) + ) + + def test_random(self): + """Test on a more complicated matrix.""" + mat1 = scipy.linalg.toeplitz([1, .5, .25, .125, .0625]) + corr_fun = ( + inversion.correlations.ExponentialCorrelation(2)) + corr_op = ( + inversion.correlations.HomogeneousIsotropicCorrelation + .from_function(corr_fun, (4, 5))) + + full_op = inversion.linalg.DaskKroneckerProductOperator( + mat1, corr_op) + + indices = np.array((0, 1, 1, 2, 2, 3, 3, 4), dtype=int) + indptr = np.array((0, 2, 4, 6, 8)) + + data = np.random.standard_normal((8, 3, corr_op.shape[0])) + obs_op = scipy.sparse.bsr_matrix((data, indices, indptr)) + obs_mat = obs_op.toarray() + result = inversion.util.ym_kronecker_quadratic_form_bsr( + obs_op, full_op) + + np_tst.assert_allclose( + result, + result.T + ) + np_tst.assert_allclose( + result, + obs_op.dot(full_op.dot(obs_mat.T)) + ) + cholesky(result) + if __name__ == "__main__": unittest2.main() diff --git a/src/inversion/util.py b/src/inversion/util.py index db523d6..3e36fda 100644 --- a/src/inversion/util.py +++ b/src/inversion/util.py @@ -259,20 +259,28 @@ def ym_kronecker_quadratic_form_bsr(matrix, operator): this_row_start = row_start[i_block] next_row_start = row_start[i_block + 1] + # flux_time x nz_blocks relevant_left_columns = left_operator[ :, columns_for_row[this_row_start:next_row_start] ] + # nz_blocks x site x space relevant_blocks_of_matrix = data[this_row_start:next_row_start] result[:, i_row:i_row + sites_per_block] = matrix.dot( right_operator.dot( + # space x flux_time x site einsum( "ij,jlk->kil", relevant_left_columns, relevant_blocks_of_matrix, order="F" - ).reshape(block_size, n_blocks * sites_per_block) - ).reshape(block_size * n_blocks, sites_per_block) + ).reshape(block_size, n_blocks * sites_per_block, + order="F") + # space x (flux_time * site) + ).reshape(block_size * n_blocks, sites_per_block, + order="F") + # (space * flux_time) x site ) + # (obs_time * site) x site return result From 5527380543dfdb0ca64c9c3afcde11c2c45f4adc Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Tue, 5 Mar 2019 11:44:59 -0500 Subject: [PATCH 09/37] Set up the framework to get this used. Having the function to do this is good. Having it be used where applicable is even better. --- src/inversion/linalg_interface.py | 3 +++ src/inversion/optimal_interpolation.py | 18 ++++++++++++--- src/inversion/tests.py | 32 ++++++++++++++++++++++++++ src/inversion/util.py | 6 ++++- 4 files changed, 55 insertions(+), 4 deletions(-) diff --git a/src/inversion/linalg_interface.py b/src/inversion/linalg_interface.py index 65ba23d..a4b48b0 100644 --- a/src/inversion/linalg_interface.py +++ b/src/inversion/linalg_interface.py @@ -6,6 +6,7 @@ from numpy import promote_types from numpy import empty, asarray, atleast_2d +from scipy.sparse import spmatrix as _spmatrix from scipy.sparse.linalg import aslinearoperator from scipy.sparse.linalg.interface import ( LinearOperator, @@ -309,6 +310,8 @@ def dot(self, x): return ProductLinearOperator(self, x) elif np.isscalar(x): return _DaskScaledLinearOperator(self, x) + elif isinstance(x, _spmatrix): + return ProductLinearOperator(self, DaskMatrixLinearOperator(x)) else: x = asarray(x) diff --git a/src/inversion/optimal_interpolation.py b/src/inversion/optimal_interpolation.py index 1513a57..3757735 100644 --- a/src/inversion/optimal_interpolation.py +++ b/src/inversion/optimal_interpolation.py @@ -4,10 +4,13 @@ """ import scipy.linalg from scipy.sparse.linalg import LinearOperator +from scipy.sparse import bsr_matrix as _bsr_matrix -from inversion.util import method_common +from inversion.util import method_common, ym_kronecker_quadratic_form_bsr from inversion.linalg import (ProductLinearOperator, ARRAY_TYPES, - solve, tolinearoperator) + solve, tolinearoperator, MatrixLinearOperator) +from inversion.linalg import ( + DaskKroneckerProductOperator as _DaskKroneckerProductOperator) @method_common @@ -333,7 +336,16 @@ def save_sum(background, background_covariance, innovation = (observations - projected_obs) # B_{proj} = HBH^T - if isinstance(observation_operator, ARRAY_TYPES): + if ((isinstance( + background_covariance, + _DaskKroneckerProductOperator) and + ((isinstance(observation_operator, + MatrixLinearOperator) and + isinstance(observation_operator.A, + _bsr_matrix))))): + projected_background_covariance = ym_kronecker_quadratic_form_bsr( + observation_operator.A, background_covariance) + elif isinstance(observation_operator, ARRAY_TYPES): # TODO: test this if hasattr(background_covariance, "quadratic_form"): projected_background_covariance = ( diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 05a1ff7..0b78f67 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -403,6 +403,38 @@ def test_iterative_failures(self): scipy.optimize.OptimizeResult) self.assertTrue(hasattr(conv_err, "hess_inv")) + def test_bsr_ymkron_handling(self): + """Check that everything still works with BSR and YMKron.""" + mat1 = scipy.linalg.toeplitz([1, .5, .25, .125, .0625]) + corr_fun = ( + inversion.correlations.ExponentialCorrelation(2)) + corr_op = ( + inversion.correlations.HomogeneousIsotropicCorrelation + .from_function(corr_fun, (4, 5))) + + bg_corr = inversion.linalg.DaskKroneckerProductOperator( + mat1, corr_op) + + data = np.ones((8, 3, corr_op.shape[0]), dtype=DTYPE) + indices = np.array((0, 1, 1, 2, 2, 3, 3, 4), dtype=int) + indptr = np.array((0, 2, 4, 6, 8)) + obs_op = scipy.sparse.bsr_matrix((data, indices, indptr)) + + background = np.zeros((bg_corr.shape[1]), dtype=DTYPE) + obs = np.ones(obs_op.shape[0], dtype=DTYPE) + obs_cov = np.eye(obs_op.shape[0]) + + for method in ALL_METHODS: + name = getname(method) + + with self.subTest(method=name): + if "chol" in name.lower(): + raise unittest2.SkipTest( + "Known Failure: " + "Cholesky factorization of linear operators") + method(background, bg_corr, + obs, obs_cov, obs_op) + class TestGaussianNoise(unittest2.TestCase): """Test the properties of the gaussian noise.""" diff --git a/src/inversion/util.py b/src/inversion/util.py index 3e36fda..5be5e0f 100644 --- a/src/inversion/util.py +++ b/src/inversion/util.py @@ -10,11 +10,13 @@ import numpy as np from numpy import atleast_1d, atleast_2d, einsum from scipy.sparse.linalg import LinearOperator +from scipy.sparse import bsr_matrix import dask.array as da from .linalg import DaskKroneckerProductOperator, kron, solve from .linalg_interface import ProductLinearOperator, tolinearoperator +from .linalg_interface import DaskMatrixLinearOperator as _MatrixLinearOperator ARRAY_TYPES = (np.ndarray, da.Array) """Array types for determining Kronecker product type. @@ -153,7 +155,9 @@ def wrapper(background, background_covariance, if not isinstance(observation_covariance, _LinearOperator): observation_covariance = atleast_2d(observation_covariance) - if not isinstance(observation_operator, _LinearOperator): + if isinstance(observation_operator, bsr_matrix): + observation_operator = _MatrixLinearOperator(observation_operator) + elif not isinstance(observation_operator, _LinearOperator): observation_operator = atleast_2d(observation_operator) if reduced_background_covariance is not None: From a57cde516018eaf81c881356dc7a74335349cfce Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Tue, 5 Mar 2019 15:11:00 -0500 Subject: [PATCH 10/37] Get tests working on python2. `ceil` doesn't return an int, and dicts have no defined order. --- src/inversion/remapper.py | 3 +-- src/inversion/tests.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/inversion/remapper.py b/src/inversion/remapper.py index a5abbdb..b31de47 100644 --- a/src/inversion/remapper.py +++ b/src/inversion/remapper.py @@ -4,7 +4,6 @@ resolution than the fluxes. """ from __future__ import division -import math from math import ceil import numpy as np @@ -33,7 +32,7 @@ def get_remappers(domain_size, block_side=3): In this package, that would be the fluxes. """ domain_size = tuple(domain_size) - reduced_size = tuple(ceil(dim / block_side) for dim in domain_size) + reduced_size = tuple(int(ceil(dim / block_side)) for dim in domain_size) extensive_remapper = zeros(reduced_size + domain_size, dtype=DTYPE) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 0b78f67..6a1b84d 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -3010,7 +3010,7 @@ def test_align_full(self): xarray_aligned_ds.stack( observation=["observation_time", "site"], fluxes=["flux_time", "dim_y", "dim_x"] - ).values + ).transpose("observation", "fluxes").values ) def test_align_partial(self): From 1c9f21f4fd54d6ce21fa3a1f9fbda888880b7166 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Tue, 5 Mar 2019 15:12:51 -0500 Subject: [PATCH 11/37] Test that the new quadratic form fails properly when it won't work. --- src/inversion/tests.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 6a1b84d..6d40e55 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -3231,6 +3231,27 @@ def test_random(self): ) cholesky(result) + def test_bad_shape(self): + """Test failure on mismatched shapes.""" + mat1 = scipy.linalg.toeplitz([1, .5, .25]) + corr_fun = ( + inversion.correlations.ExponentialCorrelation(2)) + corr_op = ( + inversion.correlations.HomogeneousIsotropicCorrelation + .from_function(corr_fun, (4, 5))) + + full_op = inversion.linalg.DaskKroneckerProductOperator( + mat1, corr_op) + + obs_op = scipy.sparse.bsr_matrix( + (12, full_op.shape[0] + 3), + blocksize=(3, corr_op.shape[0] + 1), + dtype=DTYPE) + + with self.assertRaises(ValueError): + inversion.util.ym_kronecker_quadratic_form_bsr( + obs_op, full_op) + if __name__ == "__main__": unittest2.main() From 2ef6b907fbdb5806a4f09f95ad9ed8bfeb3c0f51 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Tue, 5 Mar 2019 17:08:27 -0500 Subject: [PATCH 12/37] Fix a few style issues in the inversion code. --- examples/month_inversion_magic_dask.py | 47 ++++++++++++++------------ 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index e1db8be..7d1a834 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -602,8 +602,10 @@ def flush_output_streams(): # group will each be the number of elements in that group. reduced_temporal_correlation_ds = ( temporal_correlation_ds - .resample(flux_time=UNCERTAINTY_TEMPORAL_RESOLUTION).mean("flux_time") - .resample(flux_time_adjoint=UNCERTAINTY_TEMPORAL_RESOLUTION).mean("flux_time_adjoint") + .resample(flux_time=UNCERTAINTY_TEMPORAL_RESOLUTION) + .mean("flux_time") + .resample(flux_time_adjoint=UNCERTAINTY_TEMPORAL_RESOLUTION) + .mean("flux_time_adjoint") ) print(datetime.datetime.now(UTC).strftime("%c"), "Have temporal correlations") print(reduced_temporal_correlation_ds.values) @@ -656,7 +658,8 @@ def flush_output_streams(): spatial_covariance = inversion.covariances.CorrelationStandardDeviation( spatial_correlations, reduced_flux_stds) -print(datetime.datetime.now(UTC).strftime("%c"), "Have full spatial covariance") +print(datetime.datetime.now(UTC).strftime("%c"), + "Have full spatial covariance") flush_output_streams() reduced_spatial_covariance = spatial_correlation_remapper.reshape( @@ -668,7 +671,8 @@ def flush_output_streams(): ).T ) ) -print(datetime.datetime.now(UTC).strftime("%c"), "Have reduced spatial covariance") +print(datetime.datetime.now(UTC).strftime("%c"), + "Have reduced spatial covariance") flush_output_streams() print(datetime.datetime.now(UTC).strftime("%c"), "Have spatial covariances") @@ -692,24 +696,18 @@ def flush_output_streams(): "dim_x", pd.interval_range( 0, - aligned_influences.indexes["dim_x"][-1] + UNCERTAINTY_FLUX_RESOLUTION, + (aligned_influences.indexes["dim_x"][-1] + + UNCERTAINTY_FLUX_RESOLUTION), freq=UNCERTAINTY_FLUX_RESOLUTION, closed="left") - # np.arange( - # -1, - # aligned_influences.indexes["dim_x"][-1] + UNCERTAINTY_FLUX_RESOLUTION, - # UNCERTAINTY_FLUX_RESOLUTION), ).sum("dim_x") .groupby_bins( "dim_y", pd.interval_range( 0, - aligned_influences.indexes["dim_y"][-1] + UNCERTAINTY_FLUX_RESOLUTION, + (aligned_influences.indexes["dim_y"][-1] + + UNCERTAINTY_FLUX_RESOLUTION), freq=UNCERTAINTY_FLUX_RESOLUTION, closed="left") - # np.arange( - # -1, - # aligned_influences.indexes["dim_y"][-1] + UNCERTAINTY_FLUX_RESOLUTION, - # UNCERTAINTY_FLUX_RESOLUTION), ).sum("dim_y") .resample(flux_time=UNCERTAINTY_TEMPORAL_RESOLUTION).sum("flux_time") ).rename(dim_x_bins="reduced_dim_x", dim_y_bins="reduced_dim_y", @@ -850,12 +848,13 @@ def flush_output_streams(): posterior_covariance_ds = xarray.Dataset( dict( reduced_posterior_covariance=( - ("reduced_flux_time_adjoint", - "reduced_dim_y_adjoint", - "reduced_dim_x_adjoint", - "reduced_flux_time", - "reduced_dim_y", - "reduced_dim_x", + ( + "reduced_flux_time_adjoint", + "reduced_dim_y_adjoint", + "reduced_dim_x_adjoint", + "reduced_flux_time", + "reduced_dim_y", + "reduced_dim_x", ), reduced_posterior_covariances.reshape( reduced_temporal_correlation_ds.shape[0], @@ -893,8 +892,12 @@ def flush_output_streams(): ), ), dict( - reduced_flux_time=reduced_temporal_correlation_ds.coords["flux_time"].values, - reduced_flux_time_adjoint=reduced_temporal_correlation_ds.coords["flux_time_adjoint"].values, + reduced_flux_time=( + reduced_temporal_correlation_ds + .coords["flux_time"].values), + reduced_flux_time_adjoint=( + reduced_temporal_correlation_ds + .coords["flux_time_adjoint"].values), wrf_proj=reduced_influences.coords["wrf_proj"] ), posterior_global_atts From 8a52598f1d9493e8eb98ea7c87eb118bb40bab6c Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Tue, 5 Mar 2019 17:12:06 -0500 Subject: [PATCH 13/37] Simplify the handling of reduced-resolution variants in method_common. Use bitwise xor instead of many nested conditionals. --- src/inversion/util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/inversion/util.py b/src/inversion/util.py index 5be5e0f..753c5ab 100644 --- a/src/inversion/util.py +++ b/src/inversion/util.py @@ -160,18 +160,18 @@ def wrapper(background, background_covariance, elif not isinstance(observation_operator, _LinearOperator): observation_operator = atleast_2d(observation_operator) + if (((reduced_background_covariance is None) ^ + (reduced_observation_operator is None))): + raise ValueError("Need reduced versions of both B and H") + if reduced_background_covariance is not None: if not isinstance(reduced_background_covariance, LinearOperator): reduced_background_covariance = atleast_2d( reduced_background_covariance) - if reduced_observation_operator is None: - raise ValueError("Need reduced versions of both B and H") if not isinstance(reduced_observation_operator, LinearOperator): reduced_observation_operator = atleast_2d( reduced_observation_operator) - elif reduced_observation_operator is not None: - raise ValueError("Need reduced versions of both B and H") analysis_estimate, analysis_covariance = ( inversion_method(background, background_covariance, From 65ce042b6f039d2bd58b00e264bf343f072dc2ef Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Wed, 6 Mar 2019 12:19:36 -0500 Subject: [PATCH 14/37] Allow requesting a shape for the influence function and start using this. I need the returned shape to match that of the fluxes, so make sure that happens. Also start using these new features in the inversion. --- examples/month_inversion_magic_dask.py | 66 ++++++++++---------------- src/inversion/observation_operator.py | 9 ++-- src/inversion/tests.py | 11 +++++ 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index 7d1a834..f976542 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -34,6 +34,7 @@ import inversion.correlations import inversion.covariances from inversion.util import kronecker_product +from inversion.observation_operator import align_partial_obs_op from inversion.linalg import asarray, kron from inversion.noise import gaussian_noise import cf_acdd @@ -465,25 +466,29 @@ def flush_output_streams(): dimension_order.insert(dimension_order.index("observation_time"), "observation") dimension_order = tuple(dimension_order) -aligned_influences = xarray.concat( - [here_infl.set_index( - time_before_observation="flux_time").rename( - dict(time_before_observation="flux_time")) - for here_infl in INFLUENCE_FUNCTIONS.sel_points( - site=site_index, observation_time=pd_obs_index)], - "observation").set_index( - observation=("observation_time", "site")) +INFLUENCE_FUNCTIONS_TO_USE = INFLUENCE_FUNCTIONS.sel_points( + site=site_index, observation_time=pd_obs_index, + dim="observation" +).set_index( + observation=["observation_time", "site"] +).transpose("observation", "time_before_observation", + "dim_y", "dim_x") +aligned_influences = align_partial_obs_op( + INFLUENCE_FUNCTIONS_TO_USE, + (INFLUENCE_FUNCTIONS_TO_USE.shape[0], + len(FLUX_TIMES_INDEX)) +) print(datetime.datetime.now(UTC).strftime("%c"), "Aligned flux times in influence function, " "aligning fluxes with influence function") flush_output_streams() -aligned_influences, aligned_true_fluxes, aligned_prior_fluxes = ( - xarray.align( - aligned_influences, - TRUE_FLUXES_MATCHED[TRUE_FLUX_NAME], - PRIOR_FLUXES_MATCHED[PRIOR_FLUX_NAME], - exclude=("dim_x", "dim_y"), - join="outer", copy=False)) +min_infl_time = INFLUENCE_FUNCTIONS.coords["flux_time"].min() +max_infl_time = INFLUENCE_FUNCTIONS.coords["flux_time"].max() +min_flux_time = PRIOR_FLUXES_MATCHED.coords["flux_time"].min() +max_flux_time = PRIOR_FLUXES_MATCHED.coords["flux_time"].max() + +aligned_true_fluxes = TRUE_FLUXES_MATCHED[TRUE_FLUX_NAME] +aligned_prior_fluxes = PRIOR_FLUXES_MATCHED[PRIOR_FLUX_NAME] print(datetime.datetime.now(UTC).strftime("%c"), "Aligned fluxes and influence function") flush_output_streams() @@ -491,14 +496,10 @@ def flush_output_streams(): "flux_time", "dim_y", "dim_x") aligned_prior_fluxes = aligned_prior_fluxes.transpose( "flux_time", "dim_y", "dim_x", "realization") -aligned_influences = aligned_influences.transpose( - "observation", "flux_time", "dim_y", "dim_x") print(datetime.datetime.now(UTC).strftime("%c"), "Rechunked to square") flush_output_streams() -aligned_influences = aligned_influences.fillna(0) aligned_true_fluxes.load() aligned_prior_fluxes.load() -aligned_influences.load() print(datetime.datetime.now(UTC).strftime("%c"), "Loaded data") flush_output_streams() @@ -689,30 +690,13 @@ def flush_output_streams(): # ("reduced_dim_y", "reduced_dim_x", "dim_y", "dim_x"), # "spatial_obs_op_remapper_ds", # ) +reduced_influences_data = aligned_influences.data.dot( + spatial_obs_op_remapper.reshape(-1, N_GRID_POINTS).T +) reduced_influences = ( - aligned_influences - .groupby_bins( - "dim_x", - pd.interval_range( - 0, - (aligned_influences.indexes["dim_x"][-1] + - UNCERTAINTY_FLUX_RESOLUTION), - freq=UNCERTAINTY_FLUX_RESOLUTION, - closed="left") - ).sum("dim_x") - .groupby_bins( - "dim_y", - pd.interval_range( - 0, - (aligned_influences.indexes["dim_y"][-1] + - UNCERTAINTY_FLUX_RESOLUTION), - freq=UNCERTAINTY_FLUX_RESOLUTION, closed="left") - ).sum("dim_y") - .resample(flux_time=UNCERTAINTY_TEMPORAL_RESOLUTION).sum("flux_time") -).rename(dim_x_bins="reduced_dim_x", dim_y_bins="reduced_dim_y", - flux_time="reduced_flux_time") -reduced_influences.load() + aligned_influences.dot(spatial_obs_op_remapper.reshape(-1, N_GRID_POINTS).T) +) print(datetime.datetime.now(UTC).strftime("%c"), "Have influence for monthly average plots") flush_output_streams() diff --git a/src/inversion/observation_operator.py b/src/inversion/observation_operator.py index 741ef4d..b0a612b 100644 --- a/src/inversion/observation_operator.py +++ b/src/inversion/observation_operator.py @@ -84,7 +84,7 @@ def align_full_obs_op(obs_op): return aligned_data -def align_partial_obs_op(obs_op): +def align_partial_obs_op(obs_op, required_shape=None): """Align observation operator on flux_time. Parameters @@ -96,6 +96,9 @@ def align_partial_obs_op(obs_op): observation_time monotone decreasing only one dim each with 'y' and 'x' obs_op[-1, -1] is earliest influence map + required_shape: tuple of int, optional + The share required of the returned + matrix. Returns ------- @@ -142,10 +145,10 @@ def align_partial_obs_op(obs_op): aligned_data = scipy.sparse.bsr_matrix( (data.data.reshape(n_obs * n_times_back, 1, n_space), (row_offset_start[:, newaxis] + column_offset[newaxis, :]).flat, - np.arange(flux_time.shape[0] + 1) * flux_time.shape[1])) + np.arange(flux_time.shape[0] + 1) * flux_time.shape[1]), + shape=required_shape) # rows: obs_time, site stack # cols: flux_time, space stack aligned_data.ndim = 2 return aligned_data - diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 6d40e55..ec18f51 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -3093,6 +3093,17 @@ def test_align_partial(self): ).values ) + requested_shape = (aligned_data.shape[0], + aligned_data.shape[1] + 3 * aligned_data.blocksize[1]) + bigger_aligned_data = ( + inversion.observation_operator.align_partial_obs_op( + ds_to_test, + requested_shape + ) + ) + self.assertSequenceEqual(bigger_aligned_data.shape, + requested_shape) + class TestYMKronBSRQuadraticForm(unittest2.TestCase): """Test ym_kronecker_quadratic_form_bsr.""" From f8dfcccc617d751203333d53ce3cb11b267ab41c Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Wed, 6 Mar 2019 13:12:19 -0500 Subject: [PATCH 15/37] Add the remappers and influence function helpers to the documentation. --- doc/source/inversion.observation_operator.rst | 7 +++++++ doc/source/inversion.remapper.rst | 7 +++++++ doc/source/inversion.rst | 7 +++++++ src/inversion/observation_operator.py | 15 +++++---------- 4 files changed, 26 insertions(+), 10 deletions(-) create mode 100644 doc/source/inversion.observation_operator.rst create mode 100644 doc/source/inversion.remapper.rst diff --git a/doc/source/inversion.observation_operator.rst b/doc/source/inversion.observation_operator.rst new file mode 100644 index 0000000..d15b470 --- /dev/null +++ b/doc/source/inversion.observation_operator.rst @@ -0,0 +1,7 @@ +inversion\.observation\_operator module +======================================= + +.. automodule:: inversion.observation_operator + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/inversion.remapper.rst b/doc/source/inversion.remapper.rst new file mode 100644 index 0000000..c3d2f20 --- /dev/null +++ b/doc/source/inversion.remapper.rst @@ -0,0 +1,7 @@ +inversion\.remapper module +========================== + +.. automodule:: inversion.remapper + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/source/inversion.rst b/doc/source/inversion.rst index c7fadce..524c70c 100644 --- a/doc/source/inversion.rst +++ b/doc/source/inversion.rst @@ -34,6 +34,13 @@ High-level wrappers .. toctree:: inversion.wrapper +Helpers for real-data inversions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. toctree:: + inversion.observation_operator + inversion.remapper + Other utilities ~~~~~~~~~~~~~~~ diff --git a/src/inversion/observation_operator.py b/src/inversion/observation_operator.py index b0a612b..12d9a34 100644 --- a/src/inversion/observation_operator.py +++ b/src/inversion/observation_operator.py @@ -42,12 +42,10 @@ def align_full_obs_op(obs_op): Returns ------- - obs_op_aligned: xarray.DataArray + obs_op_aligned: scipy.sparse.bsr_matrix dims: observation, flux - MultiIndices: - observation_time, site - flux_time, y, x - data backed by scipy.sparse.bsr_matrix + observation is composed of observation_time, site + flux is composed of flux_time, y, x bsr_matrix does not support indexing """ flux_time = obs_op.coords["flux_time"] @@ -102,12 +100,9 @@ def align_partial_obs_op(obs_op, required_shape=None): Returns ------- - obs_op_aligned: xarray.DataArray + obs_op_aligned: scipy.sparse.bsr_matrix dims: observation, flux - MultiIndices: - observation_time, site - flux_time, y, x - data backed by scipy.sparse.bsr_matrix + flux is composed of flux_time, y, x bsr_matrix does not support indexing """ flux_time = obs_op.coords["flux_time"] From 2c57bbccba4a1ff7b0a44ceb78ccd512ac879ac4 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 7 Mar 2019 14:48:32 -0500 Subject: [PATCH 16/37] A first attempt to resample BSR-format data. I have tests for all the failure modes, and they should catch any major problems. --- src/inversion/remapper.py | 99 +++++++++++++++++++++++++++- src/inversion/tests.py | 134 +++++++++++++++++++++++++++++++++++--- 2 files changed, 221 insertions(+), 12 deletions(-) diff --git a/src/inversion/remapper.py b/src/inversion/remapper.py index b31de47..669d2fb 100644 --- a/src/inversion/remapper.py +++ b/src/inversion/remapper.py @@ -7,11 +7,14 @@ from math import ceil import numpy as np -from numpy import zeros, newaxis +from numpy import zeros, newaxis, arange +from numpy import sum as np_sum + +from xarray import DataArray DTYPE = np.float64 -def get_remappers(domain_size, block_side=3): +def get_spatial_remappers(domain_size, block_side=3): """Get matrices to remap from original to coarser resolution. Parameters @@ -50,3 +53,95 @@ def get_remappers(domain_size, block_side=3): intensive_remapper /= n_nz[:, :, newaxis, newaxis] return extensive_remapper, intensive_remapper + + +def remap_bsr_temporal(flux_time_index, new_freq, matrix_to_remap): + """Remap a BSR matrix using space as the blocks. + + It is assumed that the blocks are n by n_flux_points_space, + and that the block column index represents time. + + If you have a full array you want on a different temporal + frequency, attach metadata to make it a :class:`~xarray.DataArray` + and use its :meth:`~xarray.DataArray.resample`. + + Parameters + ---------- + flux_time_index: pandas.DatetimeIndex + new_freq: str + matrix_to_remap: scipy.sparse.bsr_matrix + + Returns + ------- + remapped_matrix: array_like + + Raises + ------ + ValueError + If new_freq is specified in terms of weeks. "7D" is fine. + "1W" uses different logic. + """ + if new_freq.endswith("W"): + raise ValueError( + "Weekly frequencies are weird. I don't support them here.") + + row_starts = matrix_to_remap.indptr + columns = matrix_to_remap.indices + data = matrix_to_remap.data + blocksize = matrix_to_remap.blocksize + n_flux_times = len(flux_time_index) + + if matrix_to_remap.shape[1] / blocksize[1] != n_flux_times: + raise ValueError("Index does not correspond to structure passed.") + + test_series = DataArray( + data=arange(len(flux_time_index)), + coords=dict(time=flux_time_index.values), + dims="time", + name="temp_array", + ) + resampled = test_series.resample(time=new_freq).sum("time") + resampled_index = resampled.indexes["time"] + + result = zeros((matrix_to_remap.shape[0], + len(resampled), + blocksize[1]), + order="F", + dtype=matrix_to_remap.dtype) + + # columns[new_i] is list of old_i that get summed to make + # result[:, new_i] + columns_old_to_new = [ + [old_i for old_i in range(len(test_series)) + if (resampled_index[new_i] <= + flux_time_index[old_i] < + resampled_index[new_i + 1])] + for new_i in range(0, len(resampled_index) - 1)] + if columns_old_to_new: + columns_old_to_new.append( + range(columns_old_to_new[-1][-1] + 1, len(flux_time_index))) + else: + columns_old_to_new.append( + range(len(flux_time_index))) + + for j in range(len(resampled)): + for block_i in range(result.shape[0] // blocksize[0]): + list_of_columns = columns[ + row_starts[block_i]:row_starts[block_i + 1] + ] + columns_in_block = [ + old_j for old_j in list_of_columns + if old_j in columns_old_to_new[j] + ] + result_i = block_i * blocksize[0] + np_sum( + data[columns_in_block, :, :], + axis=0, + out=result[result_i:result_i + blocksize[0], + j, :], + ) + + return result.reshape( + matrix_to_remap.shape[0], + len(resampled) * blocksize[1] + ) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index ec18f51..dcf030f 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -35,6 +35,7 @@ import xarray import inversion.optimal_interpolation +import inversion.observation_operator import inversion.correlations import inversion.covariances import inversion.variational @@ -2889,7 +2890,7 @@ class TestRemapper(unittest2.TestCase): def test_simple(self): """Test for the simplest possible case.""" - extensive, intensive = inversion.remapper.get_remappers( + extensive, intensive = inversion.remapper.get_spatial_remappers( (6, 6), 3) old_data = np.arange(36, dtype=float).reshape(6, 6) @@ -2910,7 +2911,7 @@ def test_simple(self): def test_harder(self): """Test for domains that do not divide evenly.""" - extensive, intensive = inversion.remapper.get_remappers( + extensive, intensive = inversion.remapper.get_spatial_remappers( (7, 7), 3) old_data = np.arange(49, dtype=float).reshape(7, 7) @@ -2931,6 +2932,123 @@ def test_harder(self): [29, 32, 34], [43, 46, 48]]) + def test_remap_time(self): + """Test the function for remapping in time.""" + N_OBS_TIMES = 10 + N_SITES = 2 + FORECAST_LENGTH = 40 + NY = 3 + NX = 4 + + ds = xarray.Dataset( + dict(influence_function=( + ("observation_time", + "site", + "time_before_observation", + "dim_y", + "dim_x"), + np.ones((N_OBS_TIMES, N_SITES, + FORECAST_LENGTH, NY, NX)) + )), + dict( + observation_time=pd.date_range( + "2010-01-01", periods=N_OBS_TIMES, freq="2H"), + site=range(N_SITES), + time_before_observation=pd.timedelta_range( + 0, periods=FORECAST_LENGTH, freq="6H"), + dim_y=np.linspace(0, 2100, NY), + dim_x=np.linspace(0, 2100, NX), + ), + ) + flux_time = ( + np.floor( + (ds.coords["observation_time"] - + ds.coords["time_before_observation"] - + np.array("2010-01-01", dtype="M8[ns]")) / + np.array(6, dtype="m8[h]").astype("m8[ns]")) * + np.array(6, dtype="m8[h]").astype("m8[ns]") + + np.array("2010-01-01", dtype="M8[ns]")) + ds.coords["flux_time"] = flux_time + + aligned_data = ( + inversion.observation_operator.align_full_obs_op( + ds.influence_function)) + + xarray_aligned_ds = xarray.concat( + [ + here_infl.set_index( + time_before_observation="flux_time" + ).rename( + dict(time_before_observation="flux_time") + ) + for here_infl in ds["influence_function"] + ], + "observation_time" + ).fillna(0) + + downsampled_matrix = inversion.remapper.remap_bsr_temporal( + xarray_aligned_ds.indexes["flux_time"], + "3D", + aligned_data) + + xarray_resampled_ds = xarray_aligned_ds.resample( + flux_time="3D" + ).sum("flux_time") + + self.assertIsInstance(downsampled_matrix, np.ndarray) + self.assertEqual(len(downsampled_matrix.shape), 2) + np_tst.assert_allclose( + downsampled_matrix, + xarray_resampled_ds.stack( + observation=["observation_time", "site"], + fluxes=["flux_time", "dim_y", "dim_x"] + ).transpose("observation", "fluxes").values + ) + + def test_temporal_remap_short_empty(self): + """Test remapping of a dataset that fits into one period.""" + result = inversion.remapper.remap_bsr_temporal( + pd.date_range("2010-07-28 06:00", "2010-07-31", freq="6H"), + "7D", + scipy.sparse.bsr_matrix( + (20, 120), + blocksize=(4, 10) + ) + ) + + np_tst.assert_allclose( + result, + np.zeros((20, 10)) + ) + + def test_temporal_remap_fail(self): + """Test failure modes of the temporal remapping code.""" + with self.assertRaises( + ValueError, + msg="Weekly frequencies are weird. I don't support them here." + ): + inversion.remapper.remap_bsr_temporal( + pd.date_range("2010-01-01", "2010-01-10", freq="6H"), + "1W", + scipy.sparse.bsr_matrix( + (20, 200), + blocksize=(4, 5) + ) + ) + + with self.assertRaises( + ValueError, + msg="Index does not correspond to structure passed." + ): + inversion.remapper.remap_bsr_temporal( + pd.date_range("2010-01-01", "2010-01-10", freq="6H"), + "7D", + scipy.sparse.bsr_matrix( + (20, 160), + blocksize=(4, 20) + ) + ) + class TestObsOpAligners(unittest2.TestCase): """Test the alignment fucntions. @@ -2945,9 +3063,6 @@ def test_align_full(self): The input is the influence function straignt from the file. """ - import xarray - import pandas as pd - import inversion.observation_operator N_OBS_TIMES = 20 N_SITES = 5 FORECAST_LENGTH = 10 @@ -3018,9 +3133,6 @@ def test_align_partial(self): The input is the influence function straignt from the file. """ - import xarray - import pandas as pd - import inversion.observation_operator N_OBS_TIMES = 20 N_SITES = 5 FORECAST_LENGTH = 10 @@ -3093,8 +3205,10 @@ def test_align_partial(self): ).values ) - requested_shape = (aligned_data.shape[0], - aligned_data.shape[1] + 3 * aligned_data.blocksize[1]) + requested_shape = ( + aligned_data.shape[0], + aligned_data.shape[1] + 3 * aligned_data.blocksize[1] + ) bigger_aligned_data = ( inversion.observation_operator.align_partial_obs_op( ds_to_test, From fcdec4106154b9b9c2c751fc33f7c0a376996b22 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Fri, 8 Mar 2019 11:27:57 -0500 Subject: [PATCH 17/37] Get the BSR observation operators used in the inversion. The previous version was incomplete, in particular, it highlighted the need for temporal remapping then stopped. This implements the temporal remapping and fixes a few typos in the code. --- examples/month_inversion_magic_dask.py | 79 +++++++++++++++++--------- 1 file changed, 53 insertions(+), 26 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index f976542..15396b7 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -12,6 +12,7 @@ import glob import sys +import scipy.sparse import pandas as pd import dateutil.tz import numpy as np @@ -476,7 +477,8 @@ def flush_output_streams(): aligned_influences = align_partial_obs_op( INFLUENCE_FUNCTIONS_TO_USE, (INFLUENCE_FUNCTIONS_TO_USE.shape[0], - len(FLUX_TIMES_INDEX)) + len(FLUX_TIMES_INDEX) * + N_GRID_POINTS) ) print(datetime.datetime.now(UTC).strftime("%c"), "Aligned flux times in influence function, " @@ -553,9 +555,10 @@ def flush_output_streams(): # # reduced_spatial_correlations = ( # # spatial_correlation_remapper.dot( # # spatial_correlations.dot(spatial_correlation_remapper))) +print(datetime.datetime.now(UTC).strftime("%c"), "Getting spatial remappers") import inversion.remapper spatial_obs_op_remapper, spatial_correlation_remapper = ( - inversion.remapper.get_remappers( + inversion.remapper.get_spatial_remappers( (len(TRUE_FLUXES_MATCHED.coords["dim_y"]), len(TRUE_FLUXES_MATCHED.coords["dim_x"])), UNCERTAINTY_RESOLUTION_REDUCTION_FACTOR)) @@ -694,8 +697,22 @@ def flush_output_streams(): spatial_obs_op_remapper.reshape(-1, N_GRID_POINTS).T ) -reduced_influences = ( - aligned_influences.dot(spatial_obs_op_remapper.reshape(-1, N_GRID_POINTS).T) +reduced_influences = inversion.remapper.remap_bsr_temporal( + aligned_prior_fluxes.indexes["flux_time"], + UNCERTAINTY_TEMPORAL_RESOLUTION, + scipy.sparse.bsr_matrix( + ( + reduced_influences_data, + aligned_influences.indices, + aligned_influences.indptr + ), + shape=( + aligned_influences.shape[0], + aligned_influences.shape[1] // + aligned_influences.blocksize[1] * + reduced_influences_data.shape[-1] + ) + ) ) print(datetime.datetime.now(UTC).strftime("%c"), "Have influence for monthly average plots") @@ -780,15 +797,9 @@ def flush_output_streams(): prior_covariance, used_observations.values, observation_covariance, - aligned_influences.stack( - fluxes=("flux_time", "dim_y", "dim_x") - ).values, - # .reshape(aligned_influences.shape[0], - # np.prod(aligned_influences.shape[-3:]))), + aligned_influences, reduced_prior_covariance, - reduced_influences.stack( - fluxes=("reduced_flux_time", "reduced_dim_y", - "reduced_dim_x")).values + reduced_influences ) ) print(datetime.datetime.now(UTC).strftime("%c"), @@ -842,11 +853,11 @@ def flush_output_streams(): ), reduced_posterior_covariances.reshape( reduced_temporal_correlation_ds.shape[0], - reduced_influences.shape[2], - reduced_influences.shape[3], + REDUCED_NY, + REDUCED_NX, reduced_temporal_correlation_ds.shape[1], - reduced_influences.shape[2], - reduced_influences.shape[3] + REDUCED_NY, + REDUCED_NX ), dict( long_name="reduced_covariance_matrix_for_posterior_fluxes", @@ -863,11 +874,11 @@ def flush_output_streams(): ), reduced_prior_covariance.reshape( reduced_temporal_correlation_ds.shape[0], - reduced_influences.shape[2], - reduced_influences.shape[3], + REDUCED_NY, + REDUCED_NX, reduced_temporal_correlation_ds.shape[1], - reduced_influences.shape[2], - reduced_influences.shape[3] + REDUCED_NY, + REDUCED_NX, ), dict( long_name="reduced_covariance_matrix_for_prior_fluxes", @@ -882,12 +893,17 @@ def flush_output_streams(): reduced_flux_time_adjoint=( reduced_temporal_correlation_ds .coords["flux_time_adjoint"].values), - wrf_proj=reduced_influences.coords["wrf_proj"] + wrf_proj=INFLUENCE_FUNCTIONS_TO_USE.coords["wrf_proj"] ), posterior_global_atts ) -RED_DIM_Y = reduced_influences.indexes["reduced_dim_y"] +RED_DIM_Y = pd.interval_range( + start=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_y"][0], + end=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_y"][-1] + UNCERTAINTY_FLUX_RESOLUTION, + freq=UNCERTAINTY_FLUX_RESOLUTION, + closed="left" +) posterior_covariance_ds.coords["reduced_dim_y"] = RED_DIM_Y.left posterior_covariance_ds.coords["reduced_dim_y_bnds"] = ( ("reduced_dim_y", "bnds2"), @@ -895,19 +911,24 @@ def flush_output_streams(): dict(closed="left") ) posterior_covariance_ds.coords["reduced_dim_y"].attrs.update( - aligned_influences.coords["dim_y"].attrs) + INFLUENCE_FUNCTIONS_TO_USE.coords["dim_y"].attrs) posterior_covariance_ds.coords["reduced_dim_y"].attrs.update( dict(bounds="reduced_dim_y_bnds", adjoint="reduced_dim_y_adjoint")) -RED_DIM_X = reduced_influences.indexes["reduced_dim_x"] +RED_DIM_X = pd.interval_range( + start=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_x"][0], + end=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_x"][-1] + UNCERTAINTY_FLUX_RESOLUTION, + freq=UNCERTAINTY_FLUX_RESOLUTION, + closed="left" +) posterior_covariance_ds.coords["reduced_dim_x"] = RED_DIM_X.left posterior_covariance_ds.coords["reduced_dim_x_bnds"] = ( ("reduced_dim_x", "bnds2"), np.vstack([RED_DIM_X.left, RED_DIM_X.right]).T, dict(closed="left")) posterior_covariance_ds.coords["reduced_dim_x"].attrs.update( - aligned_influences.coords["dim_x"].attrs) + INFLUENCE_FUNCTIONS_TO_USE.coords["dim_x"].attrs) posterior_covariance_ds.coords["reduced_dim_x"].attrs.update( dict(bounds="reduced_dim_x_bnds", adjoint="reduced_dim_x")) @@ -920,7 +941,7 @@ def flush_output_streams(): dict(closed="left") ) posterior_covariance_ds.coords["reduced_dim_y_adjoint"].attrs.update( - aligned_influences.coords["dim_y"].attrs) + INFLUENCE_FUNCTIONS_TO_USE.coords["dim_y"].attrs) posterior_covariance_ds.coords["reduced_dim_y_adjoint"].attrs.update( dict(bounds="reduced_dim_y_adjoint_bnds", adjoint="reduced_dim_y")) @@ -931,6 +952,12 @@ def flush_output_streams(): ("reduced_dim_x", "bnds2"), np.vstack([RED_DIM_X.left, RED_DIM_X.right]).T, dict(closed="left")) +posterior_covariance_ds.coords["reduced_dim_x_adjoint"].attrs.update( + INFLUENCE_FUNCTIONS_TO_USE.coords["dim_x"].attrs) +posterior_covariance_ds.coords["reduced_dim_x_adjoint"].attrs.update( + dict(bounds="reduced_dim_x_adjoint_bnds", + adjoint="reduced_dim_x")) + posterior_covariance_ds.attrs.update( dict(title="posterior_flux_uncertainty", summary="Covariance matrices for prior and posterior fluxes.", From 97a279a1c1a92d768ee60db6e240c6c862f7226f Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Fri, 8 Mar 2019 11:30:21 -0500 Subject: [PATCH 18/37] Use a lower spatial and higher temporal resolution for uncertainties. Also avoid overwriting old results so I can keep using those if this doesn't work. --- examples/month_inversion_magic_dask.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index 15396b7..1f853f0 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -62,7 +62,7 @@ """ FLUX_RESOLUTION = 27 """Resolution of fluxes and influence functions in kilometers.""" -UNCERTAINTY_RESOLUTION_REDUCTION_FACTOR = 3 +UNCERTAINTY_RESOLUTION_REDUCTION_FACTOR = 4 """How much coarser uncertainty is than mean estimate in the x direction. If we compute the uncertainty at full resolution, the resulting file @@ -75,7 +75,7 @@ UNCERTAINTY_FLUX_RESOLUTION = (UNCERTAINTY_RESOLUTION_REDUCTION_FACTOR * FLUX_RESOLUTION * 1e3) """Resolution of posterior uncertainties in meters.""" -UNCERTAINTY_TEMPORAL_RESOLUTION = "1W" +UNCERTAINTY_TEMPORAL_RESOLUTION = "5D" """The resolution at which the uncertainty is calculated and saved. Higher resolution means the uncertainties will be more accurate. @@ -831,7 +831,7 @@ def flush_output_streams(): ("2010-07_monthly_inversion_{flux_interval:02d}h_027km_" "noise{ncorr_fun:s}{ncorr_len:d}km{ncorr_fun_time:s}{ncorr_len_time:d}d_" "icov{icorr_fun:s}{icorr_len:d}km{icorr_fun_time:s}{icorr_len_time:d}d_" - "output.nc4") + "output_bsr.nc4") .format(flux_interval=FLUX_INTERVAL, ncorr_fun=CORR_FUN, ncorr_len=CORR_LEN, icorr_len=CORRELATION_LENGTH, icorr_fun="exp", icorr_len_time=DAILY_FLUX_TIMESCALE, icorr_fun_time=DAILY_FLUX_FUN, @@ -973,7 +973,7 @@ def flush_output_streams(): ("2010-07_monthly_inversion_{flux_interval:02d}h_027km_" "noise{ncorr_fun:s}{ncorr_len:d}km{ncorr_fun_time:s}{ncorr_len_time:d}d_" "icov{icorr_fun:s}{icorr_len:d}km{icorr_fun_time:s}{icorr_len_time:d}d_" - "covariance_output.nc4") + "covariance_output_bsr.nc4") .format(flux_interval=FLUX_INTERVAL, ncorr_fun=CORR_FUN, ncorr_len=CORR_LEN, icorr_len=CORRELATION_LENGTH, icorr_fun="exp", icorr_len_time=DAILY_FLUX_TIMESCALE, icorr_fun_time=DAILY_FLUX_FUN, From 516bde64db8c038dccc6db8f957ae0d7e0433b82 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 9 Mar 2019 19:02:21 -0500 Subject: [PATCH 19/37] Add more print statements to try to narrow down where the slowdown occurs. --- examples/month_inversion_magic_dask.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index 1f853f0..b641b97 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -680,7 +680,6 @@ def flush_output_streams(): flush_output_streams() print(datetime.datetime.now(UTC).strftime("%c"), "Have spatial covariances") -print(reduced_spatial_covariance) flush_output_streams() ###################################################################### @@ -693,10 +692,21 @@ def flush_output_streams(): # ("reduced_dim_y", "reduced_dim_x", "dim_y", "dim_x"), # "spatial_obs_op_remapper_ds", # ) +spatial_obs_op_remap_matrix = ( + spatial_obs_op_remapper.reshape( + REDUCED_N_GRID_POINTS, N_GRID_POINTS + ) +) +print(datetime.datetime.now(UTC).strftime("%c"), + "Reducing influence function spatial resolution") +flush_output_streams() reduced_influences_data = aligned_influences.data.dot( - spatial_obs_op_remapper.reshape(-1, N_GRID_POINTS).T + spatial_obs_op_remap_matrix.T ) +print(datetime.datetime.now(UTC).strftime("%c"), + "Reduced influence function spatial resolution, reducing temporal resolution") +flush_output_streams() reduced_influences = inversion.remapper.remap_bsr_temporal( aligned_prior_fluxes.indexes["flux_time"], UNCERTAINTY_TEMPORAL_RESOLUTION, @@ -714,6 +724,10 @@ def flush_output_streams(): ) ) ) +print(datetime.datetime.now(UTC).strftime("%c"), + "Reduced influence function temporal resolution") +flush_output_streams() + print(datetime.datetime.now(UTC).strftime("%c"), "Have influence for monthly average plots") flush_output_streams() From 33b500d130459a53a83b800f95b150e1763f9b56 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 9 Mar 2019 19:03:48 -0500 Subject: [PATCH 20/37] Explain reasoning for ordering decisions in temporal remapping code. Also try to fix issues found therein. Namely, accesing a row-major-ish array in column-major order. Since that was the larger array, the savings from that should dominate over the loss from writing to a column-major array in row-major order. --- src/inversion/remapper.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/inversion/remapper.py b/src/inversion/remapper.py index 669d2fb..206549f 100644 --- a/src/inversion/remapper.py +++ b/src/inversion/remapper.py @@ -106,6 +106,11 @@ def remap_bsr_temporal(flux_time_index, new_freq, matrix_to_remap): result = zeros((matrix_to_remap.shape[0], len(resampled), blocksize[1]), + # The reduced observation operator is used in: + # H.T @ vec + # la.solve(mat, H) + # In both cases H being F-contiguous is an + # advantage. order="F", dtype=matrix_to_remap.dtype) @@ -124,11 +129,14 @@ def remap_bsr_temporal(flux_time_index, new_freq, matrix_to_remap): columns_old_to_new.append( range(len(flux_time_index))) - for j in range(len(resampled)): - for block_i in range(result.shape[0] // blocksize[0]): - list_of_columns = columns[ - row_starts[block_i]:row_starts[block_i + 1] - ] + # The full observation operator, however, would almost certainly + # be C-contiguous. Since that would likely dominate the time, + # have the outer loop be over rows here. + for block_i in range(result.shape[0] // blocksize[0]): + list_of_columns = columns[ + row_starts[block_i]:row_starts[block_i + 1] + ] + for j in range(len(resampled)): columns_in_block = [ old_j for old_j in list_of_columns if old_j in columns_old_to_new[j] From 03c073f4349cd5a34c06979b5b79a63b22ac74d8 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 9 Mar 2019 19:06:11 -0500 Subject: [PATCH 21/37] Be more explicit about the shape tests. It turns out the extra tests are unnecessary, but it's better to double-check than not check at all. Prompted by accidentally creating a BSR matrix with shape smaller than blocksize. I should probably report that to scipy. --- src/inversion/tests.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index dcf030f..a9e2d1c 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -3215,8 +3215,14 @@ def test_align_partial(self): requested_shape ) ) - self.assertSequenceEqual(bigger_aligned_data.shape, - requested_shape) + self.assertSequenceEqual( + bigger_aligned_data.shape, + requested_shape + ) + self.assertSequenceEqual( + bigger_aligned_data.toarray().shape, + requested_shape + ) class TestYMKronBSRQuadraticForm(unittest2.TestCase): From bbae6868aec2955e1a17082045c91b0682647670 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Sat, 9 Mar 2019 19:09:51 -0500 Subject: [PATCH 22/37] Add a few more checks to the code style test run. --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index bb82aef..f8edbe1 100644 --- a/tox.ini +++ b/tox.ini @@ -47,6 +47,8 @@ deps= flake8-rst-docstrings flake8-bugbear flake8-builtins + flake8-todo + pep8-naming {[testenv]deps} commands= {envpython} setup.py flake8 From 794d3534e0514b69b78627522907ea9d582cc878 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Sat, 9 Mar 2019 19:28:26 -0500 Subject: [PATCH 23/37] Include a description for 'Helpers for real-data inversions'. Just including that title doesn't actually help anyone who doesn't already know what's going on. This isn't a complete description of what they're for and why someone might want to use them, but it's a start. The project really needs a better introduction to how to use the code than "just look at the three thousand lines of tests, each of which does a piece of what you want to do, but without explanation, and the thousand lines of extremely messy code I've been tinkering with for a year and a half through a couple of big changes in how the code works, which occasionally has some explanation that might still be valid." API documentation only really works once you know what's going on. That or make a main script that reads a config file and performs the inversion described. The wrapper code is a first step in that direction, but I don't like the interface terribly much. --- doc/source/inversion.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/doc/source/inversion.rst b/doc/source/inversion.rst index 524c70c..2d727aa 100644 --- a/doc/source/inversion.rst +++ b/doc/source/inversion.rst @@ -19,9 +19,9 @@ Inversion functions All inversion functions have the same signature and give similar answers: the difference is how they get there. PSAS and Variational -methods use iterative solvers. Optimal Interpolation uses a -Gauss-Jordan solver. Variational methods use a different but -equivalent formulation of the problem. +methods use iterative solvers for linear systems, where Optimal +Interpolation uses Gauss-Jordan elimination. Variational methods use +a different but equivalent formulation of the problem. .. toctree:: inversion.optimal_interpolation @@ -37,6 +37,11 @@ High-level wrappers Helpers for real-data inversions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Helper functions to calculate the prior error covariance and +observation operator at reduced resolution so the inversion routines +can calculate the posterior error covariance in reasonable time and in +a reasonable amount of storage. + .. toctree:: inversion.observation_operator inversion.remapper From 34183fd7be70c25ba55d55a246a079eb3b414680 Mon Sep 17 00:00:00 2001 From: Daniel Wesloh <22566757+DWesl@users.noreply.github.com> Date: Sat, 9 Mar 2019 19:39:30 -0500 Subject: [PATCH 24/37] Wrap a few overlong lines in the pseudo-data inversion. --- examples/month_inversion_magic_dask.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index b641b97..0fb7a74 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -705,7 +705,8 @@ def flush_output_streams(): ) print(datetime.datetime.now(UTC).strftime("%c"), - "Reduced influence function spatial resolution, reducing temporal resolution") + "Reduced influence function spatial resolution, " + "reducing temporal resolution") flush_output_streams() reduced_influences = inversion.remapper.remap_bsr_temporal( aligned_prior_fluxes.indexes["flux_time"], @@ -914,7 +915,8 @@ def flush_output_streams(): RED_DIM_Y = pd.interval_range( start=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_y"][0], - end=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_y"][-1] + UNCERTAINTY_FLUX_RESOLUTION, + end=(INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_y"][-1] + + UNCERTAINTY_FLUX_RESOLUTION), freq=UNCERTAINTY_FLUX_RESOLUTION, closed="left" ) @@ -932,7 +934,8 @@ def flush_output_streams(): RED_DIM_X = pd.interval_range( start=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_x"][0], - end=INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_x"][-1] + UNCERTAINTY_FLUX_RESOLUTION, + end=(INFLUENCE_FUNCTIONS_TO_USE.indexes["dim_x"][-1] + + UNCERTAINTY_FLUX_RESOLUTION), freq=UNCERTAINTY_FLUX_RESOLUTION, closed="left" ) From 455bf644598135e6974e164798c63c0c9d26c3cf Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Wed, 20 Mar 2019 09:38:17 -0400 Subject: [PATCH 25/37] Use sparse matrices for the influence function resolution coarsening. This is much faster than a dense matrix multiply. --- examples/month_inversion_magic_dask.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index b641b97..e35ef85 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -692,17 +692,22 @@ def flush_output_streams(): # ("reduced_dim_y", "reduced_dim_x", "dim_y", "dim_x"), # "spatial_obs_op_remapper_ds", # ) -spatial_obs_op_remap_matrix = ( +spatial_obs_op_remap_matrix = scipy.sparse.csr_matrix( spatial_obs_op_remapper.reshape( REDUCED_N_GRID_POINTS, N_GRID_POINTS ) ) +aligned_influence_data = aligned_influences.data.reshape( + aligned_influences.data.shape[0], N_GRID_POINTS +) print(datetime.datetime.now(UTC).strftime("%c"), "Reducing influence function spatial resolution") flush_output_streams() -reduced_influences_data = aligned_influences.data.dot( - spatial_obs_op_remap_matrix.T -) +reduced_influences_data = ( + spatial_obs_op_remap_matrix.dot( + aligned_influence_data.T + ) +).T.reshape(aligned_influences.data.shape[0], 1, REDUCED_N_GRID_POINTS) print(datetime.datetime.now(UTC).strftime("%c"), "Reduced influence function spatial resolution, reducing temporal resolution") @@ -727,6 +732,8 @@ def flush_output_streams(): print(datetime.datetime.now(UTC).strftime("%c"), "Reduced influence function temporal resolution") flush_output_streams() +del spatial_obs_op_remap_matrix, aligned_influence_data +del reduced_influences_data print(datetime.datetime.now(UTC).strftime("%c"), "Have influence for monthly average plots") From 3bb9526e60e41936dd1b61fffa080e7decb63415 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Wed, 20 Mar 2019 13:04:04 -0400 Subject: [PATCH 26/37] Remove several now-unneeded print statements. This should help reduce clutter in the logs. --- examples/month_inversion_magic_dask.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index e35ef85..2f3db76 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -357,7 +357,6 @@ def flush_output_streams(): wrf_times = OBS_DATASET.indexes["forecast_reference_time"].round("S") OBS_DATASET.coords["forecast_reference_time"] = wrf_times -print(OBS_DATASET.dims, OBS_DATASET.coords) OBS_DATASET.coords["site"] = list( map(lambda x: x.decode("ascii"), OBS_DATASET["name_of_observation_site"].values)) @@ -367,7 +366,6 @@ def flush_output_streams(): .rename(dict(dim1="site")) ) del OBS_DATASET.coords["name_of_observation_site"] -print(OBS_DATASET.dims, OBS_DATASET.coords) # Assign a few more coords and pull out only the fluxes we need. FLUX_DATASET = FLUX_DATASET.sel(flux_time=FLUX_TIMES_INDEX) N_REALIZATIONS = len(FLUX_DATASET.indexes["realization"]) @@ -406,7 +404,6 @@ def flush_output_streams(): WRF_OBS_MATCHED = WRF_OBS.rename(dict( forecast_reference_time="observation_time")) WRF_OBS_SITE = WRF_OBS_MATCHED -print(WRF_OBS_MATCHED.dims, WRF_OBS_MATCHED.coords) WRF_OBS_START = WRF_OBS_MATCHED.indexes["observation_time"][0] WRF_OBS_INTERVAL = (WRF_OBS_START - @@ -612,7 +609,6 @@ def flush_output_streams(): .mean("flux_time_adjoint") ) print(datetime.datetime.now(UTC).strftime("%c"), "Have temporal correlations") -print(reduced_temporal_correlation_ds.values) flush_output_streams() full_correlations = kronecker_product( From a05d9b1f5a9765d2e97c358f7c6988db3800a7e6 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Wed, 20 Mar 2019 13:04:50 -0400 Subject: [PATCH 27/37] Update the timings in the run script. --- examples/month_inversion_magic_dask.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/examples/month_inversion_magic_dask.py b/examples/month_inversion_magic_dask.py index 2f3db76..ed8cf82 100644 --- a/examples/month_inversion_magic_dask.py +++ b/examples/month_inversion_magic_dask.py @@ -151,6 +151,16 @@ N_REALIZATIONS = 80 # 1 9m55 (80 realizations) 9m46 # 30 2h48m40 (80 realizations) 3h56m41 +## Sparse +# clock user + sys +# 1 4m40 ( 1 realization ) 5m31 +# 2 8m54 ( 1 realization ) 10m42 +# 4 10m07 ( 1 realization ) 13m04 +# 8 22m19 ( 1 realization ) 28m37 +# 16 58m05 ( 1 realization ) 1h13 +# 30 2h43m05 ( 1 realization ) 3h07m12 +# 30 2h40m30 ( 2 realizations) 3h04m18 +# 30 3h20m57 (80 realizations) 3h42m30 """Number of days of obs to use.""" OBS_WINDOW = OBS_DAYS * OBS_TIMES_PER_DAY """Number of observation times.""" From ef3140ba682a5c4701075c78f19d4c66d1b681fe Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Wed, 20 Mar 2019 13:06:06 -0400 Subject: [PATCH 28/37] Wrap some long lines and otherwise make flake8 happy. I apparently forgot to run the style checks on the new tests. Fix problems there and a few other places. --- src/inversion/remapper.py | 1 + src/inversion/tests.py | 37 +++++++++++++++++++++++-------------- src/inversion/wrapper.py | 2 ++ 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/inversion/remapper.py b/src/inversion/remapper.py index 206549f..bf0a544 100644 --- a/src/inversion/remapper.py +++ b/src/inversion/remapper.py @@ -14,6 +14,7 @@ DTYPE = np.float64 + def get_spatial_remappers(domain_size, block_side=3): """Get matrices to remap from original to coarser resolution. diff --git a/src/inversion/tests.py b/src/inversion/tests.py index a9e2d1c..0889da9 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -2541,20 +2541,24 @@ def test_multi_dim_correlated(self): times_in_second_group = bg.shape[0] - times_in_first_group test_bg = np.arange(bg.size, dtype=DTYPE).reshape(bg.shape) - temp_cov = scipy.linalg.toeplitz((1 - 1./14) ** np.arange(bg.shape[0])) - spatial_cov = (inversion.correlations.HomogeneousIsotropicCorrelation - .from_function(inversion.correlations.ExponentialCorrelation(3.), - bg.shape[1:], - False)) + temp_cov = scipy.linalg.toeplitz( + (1 - 1. / 14) ** np.arange(bg.shape[0])) + spatial_cov = ( + inversion.correlations.HomogeneousIsotropicCorrelation + .from_function(inversion.correlations.ExponentialCorrelation(3.), + bg.shape[1:], + False)) bg_cov = inversion.linalg.DaskKroneckerProductOperator( temp_cov, spatial_cov) - same_tower_corr = scipy.linalg.toeplitz(np.exp(-np.arange(obs.shape[0], dtype=DTYPE))) + same_tower_corr = scipy.linalg.toeplitz( + np.exp(-np.arange(obs.shape[0], dtype=DTYPE))) other_tower_corr = np.zeros_like(same_tower_corr, dtype=DTYPE) - obs_corr = np.block([[same_tower_corr, other_tower_corr, other_tower_corr], - [other_tower_corr, same_tower_corr, other_tower_corr], - [other_tower_corr, other_tower_corr, same_tower_corr]]) + obs_corr = np.block( + [[same_tower_corr, other_tower_corr, other_tower_corr], + [other_tower_corr, same_tower_corr, other_tower_corr], + [other_tower_corr, other_tower_corr, same_tower_corr]]) obs_op = np.zeros(obs.shape + bg.shape, dtype=DTYPE) for i in range(obs.shape[0]): @@ -2566,7 +2570,8 @@ def test_multi_dim_correlated(self): 1. / np.product(bg.shape[1:]), dtype=DTYPE ).reshape(-1) - spatial_cov_reduced = spatial_remapper.dot(spatial_cov.dot(spatial_remapper)) + spatial_cov_reduced = spatial_remapper.dot( + spatial_cov.dot(spatial_remapper)) temp_cov_reduced = np.block( [[temp_cov[:times_in_first_group, :times_in_first_group].mean(), @@ -2582,13 +2587,17 @@ def test_multi_dim_correlated(self): obs_op_part_red[:, :, times_in_first_group:].sum(axis=-1)], axis=2) - fluxes_in_first_group = spatial_remapper.shape[0] * times_in_first_group - fluxes_in_second_group = spatial_remapper.shape[0] * times_in_second_group + fluxes_in_first_group = ( + spatial_remapper.shape[0] * times_in_first_group) + fluxes_in_second_group = ( + spatial_remapper.shape[0] * times_in_second_group) cov_remapper = np.block( - [[np.full(fluxes_in_first_group, 1. / fluxes_in_first_group, dtype=DTYPE), + [[np.full(fluxes_in_first_group, + 1. / fluxes_in_first_group, dtype=DTYPE), np.zeros(fluxes_in_second_group, dtype=DTYPE)], [np.zeros(fluxes_in_first_group, dtype=DTYPE), - np.full(fluxes_in_second_group, 1. / fluxes_in_second_group, dtype=DTYPE)]] + np.full(fluxes_in_second_group, + 1. / fluxes_in_second_group, dtype=DTYPE)]] ) np_tst.assert_allclose(cov_remapper.dot(bg_cov.dot(cov_remapper.T)), bg_cov_red) diff --git a/src/inversion/wrapper.py b/src/inversion/wrapper.py index 4580db8..db7cd58 100644 --- a/src/inversion/wrapper.py +++ b/src/inversion/wrapper.py @@ -361,6 +361,8 @@ def get_installed_modules(): python_versions = [pair[1] for pair in package_info if pair[0] == "python"] + # Only return information for the currently-running version of + # python. if ((LooseVersion(python_versions[0]) == LooseVersion(sys.version.split()[0]))): _PACKAGE_INFO = package_info From 127293e1226d73347da183838444650686b2f15c Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Wed, 20 Mar 2019 17:30:58 -0400 Subject: [PATCH 29/37] Check that the returned matrix stores flux times in increasing order. --- src/inversion/tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index a7d8a09..647b908 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -3220,6 +3220,8 @@ def test_align_full(self): fluxes=["flux_time", "dim_y", "dim_x"] ).transpose("observation", "fluxes").values ) + self.assertLess(xarray_aligned_ds.coords["flux_time"][0], + xarray_aligned_ds.coords["flux_time"][1]) def test_align_partial(self): """Test aligning the full influence function. @@ -3297,6 +3299,8 @@ def test_align_partial(self): fluxes=["flux_time", "dim_y", "dim_x"] ).values ) + self.assertLess(xarray_aligned_ds.coords["flux_time"][0], + xarray_aligned_ds.coords["flux_time"][1]) requested_shape = ( aligned_data.shape[0], From 92479584f20bfc4e346453f97a4580634bf02a5d Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Fri, 22 Mar 2019 19:12:40 -0400 Subject: [PATCH 30/37] Raise an error on unsupported inputs to cholesky. Cholesky doesn't understand linear operators. --- src/inversion/optimal_interpolation.py | 3 +-- src/inversion/tests.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/inversion/optimal_interpolation.py b/src/inversion/optimal_interpolation.py index 3757735..469f2f0 100644 --- a/src/inversion/optimal_interpolation.py +++ b/src/inversion/optimal_interpolation.py @@ -484,8 +484,7 @@ def scipy_chol(background, background_covariance, B_HT) if isinstance(observation_covariance, LinearOperator): - projected_background_covariance = tolinearoperator( - projected_background_covariance) + raise TypeError("Observation covariance must be array") covariance_sum = projected_background_covariance + observation_covariance cov_sum_chol_up = scipy.linalg.cho_factor(covariance_sum, overwrite_a=True) del covariance_sum diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 647b908..4b638d8 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -437,6 +437,29 @@ def test_bsr_ymkron_handling(self): method(background, bg_corr, obs, obs_cov, obs_op) + def test_scipy_chol_fails(self): + """Test that trying cholesky on linear operators fails nicely.""" + bg = np.zeros((10,), dtype=DTYPE) + obs = np.ones((5,), dtype=DTYPE) + + bg_cov = np.eye(10, dtype=DTYPE) + obs_cov = inversion.linalg.DiagonalOperator( + np.ones((5,), dtype=DTYPE)) + + obs_op = np.eye(5, 10, dtype=DTYPE) + + for method in ALL_METHODS: + name = getname(method) + if "chol" not in name.lower(): + continue + + with self.subTest(method=name): + with self.assertRaises( + TypeError, + msg="Observation covariance must be array" + ): + mean, cov = method(bg, bg_cov, obs, obs_cov, obs_op) + class TestGaussianNoise(unittest2.TestCase): """Test the properties of the gaussian noise.""" From 6bb0fe619b43dd76e9fd5a5c9d08fcb800fe6479 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Fri, 22 Mar 2019 19:14:26 -0400 Subject: [PATCH 31/37] Store the variational descent methods in constants. Clean up the code a bit, so future changes only have to touch the one area. --- src/inversion/variational.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/inversion/variational.py b/src/inversion/variational.py index c19cd9d..fb814fb 100644 --- a/src/inversion/variational.py +++ b/src/inversion/variational.py @@ -20,6 +20,12 @@ from inversion import ConvergenceError, MAX_ITERATIONS, GRAD_TOL from inversion.util import solve, method_common +# Scipy optimizers to use when requesting: +# A full covariance matrix +FULL_COVARIANCE = "BFGS" +# No covariance matrix at all +NO_COVARIANCE = "Newton-CG" + @method_common def simple(background, background_covariance, @@ -160,9 +166,9 @@ def cost_jacobian(test_state): # return bg_prod + obs_prod if reduced_background_covariance is None: - method = "BFGS" + method = FULL_COVARIANCE else: - method = "Newton-CG" + method = NO_COVARIANCE result = scipy.optimize.minimize( cost_function, background, @@ -323,9 +329,9 @@ def cost_jacobian(test_change): # return bg_prod + obs_prod if reduced_background_covariance is None: - method = "BFGS" + method = FULL_COVARIANCE else: - method = "Newton-CG" + method = NO_COVARIANCE result = scipy.optimize.minimize( cost_function, asarray(zeros_like(background)), @@ -497,9 +503,9 @@ def cost_jacobian(test_change): # return bg_prod + obs_prod if reduced_background_covariance is None: - method = "BFGS" + method = FULL_COVARIANCE else: - method = "Newton-CG" + method = NO_COVARIANCE result = scipy.optimize.minimize( cost_function, asarray(zeros_like(background)), From ca58b84cf767c1ab5954856397de2385bba052b2 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Fri, 22 Mar 2019 19:15:18 -0400 Subject: [PATCH 32/37] Test the output with a BSR matrix against that with a dense matrix. This currently fails. No idea why, especially for the iterative methods. --- src/inversion/tests.py | 46 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 4b638d8..9acb8d5 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -426,6 +426,8 @@ def test_bsr_ymkron_handling(self): obs = np.ones(obs_op.shape[0], dtype=DTYPE) obs_cov = np.eye(obs_op.shape[0]) + obs_op_matrix = obs_op.toarray() + for method in ALL_METHODS: name = getname(method) @@ -434,8 +436,48 @@ def test_bsr_ymkron_handling(self): raise unittest2.SkipTest( "Known Failure: " "Cholesky factorization of linear operators") - method(background, bg_corr, - obs, obs_cov, obs_op) + bsr_mean, bsr_cov = method( + background, bg_corr, + obs, obs_cov, obs_op) + + # Test against the dense matrix + # There should be no changes whatsoever + # This includes a bunch of 0*something in a sum + # The one above does not. + expected_mean, expected_cov = method( + background, bg_corr, + obs, obs_cov, obs_op_matrix) + + # Set a slightly higher tolerance for variational + # methods. As near as I can tell, H is only used in + # products with explicit vectors in var methods, so + # the difference would be from adding a bunch of + # zeros. + if "var" in name.lower(): + atol = 7e-8 + + self.assertIsInstance( + bsr_cov, + np.ndarray + ) + else: + self.assertIsInstance( + bsr_cov, + scipy.sparse.linalg.LinearOperator + ) + + if "psas" in name.lower(): + atol = 1.7e-7 + else: + atol = 0 + + np_tst.assert_allclose(bsr_mean, expected_mean, + atol=atol) + np_tst.assert_allclose( + bsr_cov.dot(np.eye(bsr_mean.shape[0])), + expected_cov, + atol=atol + ) def test_scipy_chol_fails(self): """Test that trying cholesky on linear operators fails nicely.""" From 19799225d6f8c6839b44bcdd241f2a16b68ab14c Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Mon, 25 Mar 2019 09:40:50 -0400 Subject: [PATCH 33/37] Decrease default planner effort to speed tests. The tests take over ten minutes each on Travis. They should take maybe five, even given extraneous load on the machines. --- src/inversion/correlations.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/inversion/correlations.py b/src/inversion/correlations.py index eb13adc..ee39e00 100644 --- a/src/inversion/correlations.py +++ b/src/inversion/correlations.py @@ -34,8 +34,23 @@ NUM_THREADS = 8 """Number of threads :mod:`pyfftw` should use for each transform.""" +ADVANCE_PLANNER_EFFORT = "FFTW_MEASURE" +"""Amount of advance planning FFTW should do. + +This is done when the :class:`HomogeneousIsotropicCorrelation` +instance is created. It is set to "FFTW_MEASURE" to keep test runs +fast, but applications should probably set this to do `a more +extensive exploration of the possibilities +`_. +""" PLANNER_EFFORT = "FFTW_ESTIMATE" -ADVANCE_PLANNER_EFFORT = "FFTW_EXHAUSTIVE" +"""How much effort to expend planning for subsequent inversions. + +Used by :meth:`HomogeneousIsotropicCorrelation.matmat`. This is used +more frequently than :data:`ADVANCE_PLANNER_EFFORT` and should +probably stay set to "FFTW_ESTIMATE". Increasing this can increase +application runtime by a factor of two. +""" pyfftw.interfaces.cache.enable() ROUNDOFF = 1e-13 From 766938fe365a5af06d89d049ee74b007763f8806 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 4 Apr 2019 09:12:40 -0400 Subject: [PATCH 34/37] Stop reversing the shape of a square matrix. --- src/inversion/linalg.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/inversion/linalg.py b/src/inversion/linalg.py index 67c5356..5733468 100644 --- a/src/inversion/linalg.py +++ b/src/inversion/linalg.py @@ -98,7 +98,6 @@ def solve(arr1, arr2): if `arr1` is not square """ if arr1.shape[0] != arr2.shape[0]: - print(arr1.shape[1], arr2.shape[0]) raise ValueError("Dimension mismatch") if arr1.shape[0] != arr1.shape[1]: raise LinAlgError("arr1 is not square") @@ -121,7 +120,7 @@ def solver(vec): The solution of the linear equation """ return solve(arr1, vec) - inverse = DaskLinearOperator(matvec=solver, shape=arr1.shape[::-1]) + inverse = DaskLinearOperator(matvec=solver, shape=arr1.shape) return inverse.dot(arr2) # arr2 is an array From af312fa97efb2387d7050c9d5c0b57800c5446e5 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Thu, 4 Apr 2019 09:18:41 -0400 Subject: [PATCH 35/37] Test a few more postconditions of the covariance. I should be able to do things with the returned covariance. This should also work if the thing inside the MatrixLinearOperator is a sparse matrix. Trying to narrow down the unrelated failures from the BSR obs op tests. --- src/inversion/tests.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index 9acb8d5..f7672f2 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -1890,6 +1890,7 @@ def setUp(self): self.bg_corr = (bg_corr, bg_corr.dot(np.eye(*bg_corr.shape))) self.obs_corr = (obs_corr, obs_corr.dot(np.eye(*obs_corr.shape))) self.obs_op = (inversion.linalg.tolinearoperator(obs_op.toarray()), + inversion.linalg.tolinearoperator(obs_op) # Dask requires subscripting; diagonal sparse # matrices don't do this. obs_op.toarray()) @@ -1910,6 +1911,7 @@ def test_combinations_produce_answer(self): self.bg_vals, bg_corr, self.obs_vals, obs_corr, obs_op) + post_cov.dot(np.zeros(post_cov.shape[0])) with self.subTest(method=getname(inversion_method), bg_corr=getname(type(bg_corr)), obs_corr=getname(type(obs_corr)), @@ -1919,6 +1921,7 @@ def test_combinations_produce_answer(self): self.bg_vals, bg_corr, self.obs_vals, obs_corr, obs_op, bg_corr, obs_op) + post_cov.dot(np.zeros(post_cov.shape[0])) class TestKroneckerQuadraticForm(unittest2.TestCase): From a8dc1c68f8b5d8220b97c13eedf5fd737516f030 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Mon, 8 Apr 2019 09:39:46 -0400 Subject: [PATCH 36/37] Include a transpose for a _SumLinearOperator. --- src/inversion/linalg_interface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/inversion/linalg_interface.py b/src/inversion/linalg_interface.py index 7f683fa..fd4a67b 100644 --- a/src/inversion/linalg_interface.py +++ b/src/inversion/linalg_interface.py @@ -393,7 +393,8 @@ def _adjoint(self): class _DaskSumLinearOperator(_SumLinearOperator, DaskLinearOperator): """Sum of two DaskLinearOperators.""" - pass + def _transpose(self): + return _DaskSumLinearOperator(self.args[1].T, self.args[0].T) class _DaskScaledLinearOperator(_ScaledLinearOperator, DaskLinearOperator): From a4b58009a7c6e65b5d2ef64e03ff2fc7f0933910 Mon Sep 17 00:00:00 2001 From: DWesl <22566757+DWesl@users.noreply.github.com> Date: Mon, 8 Apr 2019 09:42:25 -0400 Subject: [PATCH 37/37] Fix a syntax error in the tests. --- src/inversion/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/inversion/tests.py b/src/inversion/tests.py index f7672f2..d8a8758 100644 --- a/src/inversion/tests.py +++ b/src/inversion/tests.py @@ -1890,7 +1890,7 @@ def setUp(self): self.bg_corr = (bg_corr, bg_corr.dot(np.eye(*bg_corr.shape))) self.obs_corr = (obs_corr, obs_corr.dot(np.eye(*obs_corr.shape))) self.obs_op = (inversion.linalg.tolinearoperator(obs_op.toarray()), - inversion.linalg.tolinearoperator(obs_op) + inversion.linalg.tolinearoperator(obs_op), # Dask requires subscripting; diagonal sparse # matrices don't do this. obs_op.toarray())