From 6735f87c9935b3f832d7f13527a71832de3f04b4 Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Thu, 10 Oct 2024 13:04:08 +0100 Subject: [PATCH 1/8] Added matrix operations, needs tests --- sasdata/quantities/operations.py | 112 ++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) diff --git a/sasdata/quantities/operations.py b/sasdata/quantities/operations.py index 724e55d..67cb636 100644 --- a/sasdata/quantities/operations.py +++ b/sasdata/quantities/operations.py @@ -702,9 +702,119 @@ def __eq__(self, other): if isinstance(other, Pow): return self.a == other.a and self.power == other.power + + +# +# Matrix operations +# + +class Transpose(UnaryOperation): + """ Transpose operation - as per numpy""" + + serialisation_name = "transpose" + + def evaluate(self, variables: dict[int, T]) -> T: + return np.transpose(self.a.evaluate(variables)) + + def _derivative(self, hash_value: int) -> Operation: + return Transpose(self.a.derivative(hash_value)) # TODO: Check! + + def _clean(self): + clean_a = self.a._clean() + return Transpose(clean_a) + + @staticmethod + def _deserialise(parameters: dict) -> "Operation": + return Transpose(Operation.deserialise_json(parameters["a"])) + + def _summary_open(self): + return "Transpose" + + def __eq__(self, other): + if isinstance(other, Transpose): + return other.a == self.a + + +class Dot(BinaryOperation): + """ Dot product - backed by numpy's dot method""" + + serialisation_name = "dot" + + def _self_cls(self) -> type: + return Dot + + def evaluate(self, variables: dict[int, T]) -> T: + return np.dot(self.a.evaluate(variables) + self.b.evaluate(variables)) + + def _derivative(self, hash_value: int) -> Operation: + return Add( + Dot(self.a, + self.b._derivative(hash_value)), + Dot(self.a._derivative(hash_value), + self.b)) + + def _clean_ab(self, a, b): + return Dot(a, b) # Do nothing for now + + @staticmethod + def _deserialise(parameters: dict) -> "Operation": + return Dot(*BinaryOperation._deserialise_ab(parameters)) + + def _summary_open(self): + return "Dot" + + +# TODO: Add to base operation class, and to quantities +class MatMul(BinaryOperation): + """ Matrix multiplication, using __matmul__ dunder""" + + serialisation_name = "matmul" + + def _self_cls(self) -> type: + return MatMul + + def evaluate(self, variables: dict[int, T]) -> T: + return self.a.evaluate(variables) @ self.b.evaluate(variables) + + def _derivative(self, hash_value: int) -> Operation: + return Add( + MatMul(self.a, + self.b._derivative(hash_value)), + MatMul(self.a._derivative(hash_value), + self.b)) + + def _clean_ab(self, a, b): + + if isinstance(a, AdditiveIdentity) or isinstance(b, AdditiveIdentity): + # Convert 0*b or a*0 to 0 + return AdditiveIdentity() + + elif isinstance(a, ConstantBase) and isinstance(b, ConstantBase): + # Convert constant "a"@"b" to "a@b" + return Constant(a.evaluate({}) @ b.evaluate({}))._clean() + + elif isinstance(a, Neg): + return Neg(Mul(a.a, b)) + + elif isinstance(b, Neg): + return Neg(Mul(a, b.a)) + + return MatMul(a, b) + + + @staticmethod + def _deserialise(parameters: dict) -> "Operation": + return MatMul(*BinaryOperation._deserialise_ab(parameters)) + + def _summary_open(self): + return "MatMul" + + + _serialisable_classes = [AdditiveIdentity, MultiplicativeIdentity, Constant, Variable, Neg, Inv, - Add, Sub, Mul, Div, Pow] + Add, Sub, Mul, Div, Pow, + Transpose, Dot, MatMul] _serialisation_lookup = {cls.serialisation_name: cls for cls in _serialisable_classes} From 96f9b86fd7e8b0b570689abebea71de35d93bd26 Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Thu, 10 Oct 2024 13:05:22 +0100 Subject: [PATCH 2/8] Numpy import --- sasdata/quantities/operations.py | 1 + sasdata/quantities/quantity.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/sasdata/quantities/operations.py b/sasdata/quantities/operations.py index 67cb636..35f6fc7 100644 --- a/sasdata/quantities/operations.py +++ b/sasdata/quantities/operations.py @@ -1,4 +1,5 @@ from typing import Any, TypeVar, Union +import numpy as np import json diff --git a/sasdata/quantities/quantity.py b/sasdata/quantities/quantity.py index df70830..149beb6 100644 --- a/sasdata/quantities/quantity.py +++ b/sasdata/quantities/quantity.py @@ -233,6 +233,9 @@ def __rmul__(self: Self, other: ArrayLike | Self): self.history.operation_tree), self.history.references)) + + + def __truediv__(self: Self, other: float | Self) -> Self: if isinstance(other, Quantity): return DerivedQuantity( From 9fca154afaee7c7ae0a702e963d5ed96663d356a Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Thu, 10 Oct 2024 13:12:27 +0100 Subject: [PATCH 3/8] Added __matmul__ and __rmatmul__ to quantities --- sasdata/quantities/quantity.py | 36 ++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/sasdata/quantities/quantity.py b/sasdata/quantities/quantity.py index 149beb6..cb1194d 100644 --- a/sasdata/quantities/quantity.py +++ b/sasdata/quantities/quantity.py @@ -234,6 +234,42 @@ def __rmul__(self: Self, other: ArrayLike | Self): self.history.references)) + def __matmul__(self, other: ArrayLike | Self): + if isinstance(other, Quantity): + return DerivedQuantity( + self.value @ other.value, + self.units * other.units, + history=QuantityHistory.apply_operation( + operations.MatMul, + self.history, + other.history)) + else: + return DerivedQuantity( + self.value @ other, + self.units, + QuantityHistory( + operations.MatMul( + self.history.operation_tree, + operations.Constant(other)), + self.history.references)) + + def __rmatmul__(self, other: ArrayLike | Self): + if isinstance(other, Quantity): + return DerivedQuantity( + other.value @ self.value, + other.units * self.units, + history=QuantityHistory.apply_operation( + operations.MatMul, + other.history, + self.history)) + + else: + return DerivedQuantity(other @ self.value, self.units, + QuantityHistory( + operations.MatMul( + operations.Constant(other), + self.history.operation_tree), + self.history.references)) def __truediv__(self: Self, other: float | Self) -> Self: From 9061715e600d51f8192cb47e8a43b77433d40816 Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Thu, 10 Oct 2024 14:36:27 +0100 Subject: [PATCH 4/8] Entrypoint for rebinning --- sasdata/model_requirements.py | 2 +- sasdata/transforms/operation.py | 19 ------------------- sasdata/transforms/post_process.py | 0 sasdata/transforms/rebinning.py | 9 +++++++++ 4 files changed, 10 insertions(+), 20 deletions(-) delete mode 100644 sasdata/transforms/operation.py create mode 100644 sasdata/transforms/post_process.py create mode 100644 sasdata/transforms/rebinning.py diff --git a/sasdata/model_requirements.py b/sasdata/model_requirements.py index f186d2d..d043b2c 100644 --- a/sasdata/model_requirements.py +++ b/sasdata/model_requirements.py @@ -3,7 +3,7 @@ import numpy as np from sasdata.metadata import Metadata -from transforms.operation import Operation +from sasdata.quantities.operations import Operation @dataclass diff --git a/sasdata/transforms/operation.py b/sasdata/transforms/operation.py deleted file mode 100644 index 5912188..0000000 --- a/sasdata/transforms/operation.py +++ /dev/null @@ -1,19 +0,0 @@ -import numpy as np -from sasdata.quantities.quantity import Quantity - -class Operation: - """ Sketch of what model post-processing classes might look like """ - - children: list["Operation"] - named_children: dict[str, "Operation"] - - @property - def name(self) -> str: - raise NotImplementedError("No name for transform") - - def evaluate(self) -> Quantity[np.ndarray]: - pass - - def __call__(self, *children, **named_children): - self.children = children - self.named_children = named_children \ No newline at end of file diff --git a/sasdata/transforms/post_process.py b/sasdata/transforms/post_process.py new file mode 100644 index 0000000..e69de29 diff --git a/sasdata/transforms/rebinning.py b/sasdata/transforms/rebinning.py new file mode 100644 index 0000000..58d1912 --- /dev/null +++ b/sasdata/transforms/rebinning.py @@ -0,0 +1,9 @@ +""" Algorithms for interpolation and rebinning """ +from typing import TypeVar + +from numpy._typing import ArrayLike + +from sasdata.quantities.quantity import Quantity + +def rebin(data: Quantity[ArrayLike], axes: list[Quantity[ArrayLike]], new_axes: list[Quantity[ArrayLike]], interpolation_order=1): + pass \ No newline at end of file From 5b3561451538592b235e4335a81b48c952aad861 Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Thu, 10 Oct 2024 14:42:42 +0100 Subject: [PATCH 5/8] Some better commenting on Quantity --- sasdata/quantities/quantity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sasdata/quantities/quantity.py b/sasdata/quantities/quantity.py index cb1194d..6ee80d8 100644 --- a/sasdata/quantities/quantity.py +++ b/sasdata/quantities/quantity.py @@ -132,10 +132,10 @@ def __init__(self, self.hash_value = -1 """ Hash based on value and uncertainty for data, -1 if it is a derived hash value """ - """ Contains the variance if it is data driven, else it is """ + self._variance = None + """ Contains the variance if it is data driven """ if standard_error is None: - self._variance = None self.hash_value = hash_data_via_numpy(hash_seed, value) else: self._variance = standard_error ** 2 From 090808e8e89e1bfba53ee6aeb3d77101674bfe47 Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Thu, 10 Oct 2024 18:10:36 +0100 Subject: [PATCH 6/8] Work towards rebinning methods --- sasdata/data.py | 21 ++++++----- sasdata/transforms/rebinning.py | 66 ++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 10 deletions(-) diff --git a/sasdata/data.py b/sasdata/data.py index 7f0cbfb..544ba27 100644 --- a/sasdata/data.py +++ b/sasdata/data.py @@ -2,6 +2,8 @@ from typing import TypeVar, Any, Self from dataclasses import dataclass +import numpy as np + from quantities.quantity import NamedQuantity from sasdata.metadata import Metadata from sasdata.quantities.accessors import AccessorTarget @@ -9,7 +11,11 @@ class SasData: - def __init__(self, name: str, data_contents: list[NamedQuantity], raw_metadata: Group, verbose: bool=False): + def __init__(self, name: str, + data_contents: list[NamedQuantity], + raw_metadata: Group, + verbose: bool=False): + self.name = name self._data_contents = data_contents self._raw_metadata = raw_metadata @@ -17,14 +23,11 @@ def __init__(self, name: str, data_contents: list[NamedQuantity], raw_metadata: self.metadata = Metadata(AccessorTarget(raw_metadata, verbose=verbose)) - # TO IMPLEMENT - - # abscissae: list[NamedQuantity[np.ndarray]] - # ordinate: NamedQuantity[np.ndarray] - # other: list[NamedQuantity[np.ndarray]] - # - # metadata: Metadata - # model_requirements: ModellingRequirements + # Components that need to be organised after creation + self.ordinate: NamedQuantity[np.ndarray] = None # TODO: fill out + self.abscissae: list[NamedQuantity[np.ndarray]] = None # TODO: fill out + self.mask = None # TODO: fill out + self.model_requirements = None # TODO: fill out def summary(self, indent = " ", include_raw=False): s = f"{self.name}\n" diff --git a/sasdata/transforms/rebinning.py b/sasdata/transforms/rebinning.py index 58d1912..75f5376 100644 --- a/sasdata/transforms/rebinning.py +++ b/sasdata/transforms/rebinning.py @@ -1,9 +1,73 @@ """ Algorithms for interpolation and rebinning """ from typing import TypeVar +import numpy as np from numpy._typing import ArrayLike from sasdata.quantities.quantity import Quantity +from scipy.sparse import coo_matrix + +from enum import Enum + +class InterpolationOptions(Enum): + NEAREST_NEIGHBOUR = 0 + LINEAR = 1 + + + +def calculate_interpolation_matrix_1d(input_axis: Quantity[ArrayLike], + output_axis: Quantity[ArrayLike], + mask: ArrayLike | None = None, + order: InterpolationOptions = InterpolationOptions.NEAREST_NEIGHBOUR, + is_density=False): + + # We want the input values in terms of the output units, will implicitly check compatability + + working_units = output_axis.units + + input_x = input_axis.in_units_of(working_units) + output_x = output_axis.in_units_of(working_units) + + # Get the array indices that will map the array to a sorted one + input_sort = np.argsort(input_x) + output_sort = np.argsort(output_x) + + output_unsort = np.arange(len(input_x), dtype=int)[output_sort] + sorted_in = input_x[input_sort] + sorted_out = output_x[output_sort] + + match order: + case InterpolationOptions.NEAREST_NEIGHBOUR: + + # COO Sparse matrix definition data + values = [] + j_entries = [] + i_entries = [] + + # Find the output values nearest to each of the input values + for x_in in sorted_in: + + + case _: + raise ValueError(f"Unsupported interpolation order: {order}") + +def calculate_interpolation_matrix(input_axes: list[Quantity[ArrayLike]], + output_axes: list[Quantity[ArrayLike]], + data: ArrayLike | None = None, + mask: ArrayLike | None = None): + + pass + + + +def rebin(data: Quantity[ArrayLike], + axes: list[Quantity[ArrayLike]], + new_axes: list[Quantity[ArrayLike]], + mask: ArrayLike | None = None, + interpolation_order: int = 1): + + """ This algorithm is only for operations that preserve dimensionality, + i.e. non-projective rebinning. + """ -def rebin(data: Quantity[ArrayLike], axes: list[Quantity[ArrayLike]], new_axes: list[Quantity[ArrayLike]], interpolation_order=1): pass \ No newline at end of file From 8b95885f08e3e1117a0c0f7c9cef82fbce542632 Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Fri, 11 Oct 2024 11:27:53 +0100 Subject: [PATCH 7/8] Zeroth order rebinning sketch --- sasdata/transforms/rebinning.py | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/sasdata/transforms/rebinning.py b/sasdata/transforms/rebinning.py index 75f5376..c490f82 100644 --- a/sasdata/transforms/rebinning.py +++ b/sasdata/transforms/rebinning.py @@ -32,7 +32,9 @@ def calculate_interpolation_matrix_1d(input_axis: Quantity[ArrayLike], input_sort = np.argsort(input_x) output_sort = np.argsort(output_x) + input_unsort = np.arange(len(output_x), dtype=int)[input_sort] output_unsort = np.arange(len(input_x), dtype=int)[output_sort] + sorted_in = input_x[input_sort] sorted_out = output_x[output_sort] @@ -40,13 +42,33 @@ def calculate_interpolation_matrix_1d(input_axis: Quantity[ArrayLike], case InterpolationOptions.NEAREST_NEIGHBOUR: # COO Sparse matrix definition data - values = [] - j_entries = [] i_entries = [] + j_entries = [] + + crossing_points = 0.5*(sorted_out[1:] + sorted_out[:-1]) # Find the output values nearest to each of the input values - for x_in in sorted_in: - + n_i = len(sorted_in) + n_j = len(sorted_out) + i=0 + for k, crossing_point in enumerate(crossing_points): + while i < n_i and sorted_in[i] < crossing_point: + i_entries.append(i) + j_entries.append(k) + i += 1 + + # All the rest in the last bin + while i < n_i: + i_entries.append(i) + j_entries.append(n_j-1) + i += 1 + + i_entries = input_unsort[np.array(i_entries, dtype=int)] + j_entries = output_unsort[np.array(j_entries, dtype=int)] + values = np.ones_like(i_entries, dtype=float) + + return coo_matrix((values, (i_entries, j_entries)), shape=(n_i, n_j)) + case _: raise ValueError(f"Unsupported interpolation order: {order}") From a1db35f71d18563cf668c6881bfdae1ff8dc04c3 Mon Sep 17 00:00:00 2001 From: lucas-wilkins Date: Fri, 11 Oct 2024 12:08:04 +0100 Subject: [PATCH 8/8] First order rebinning --- sasdata/transforms/rebinning.py | 59 +++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 6 deletions(-) diff --git a/sasdata/transforms/rebinning.py b/sasdata/transforms/rebinning.py index c490f82..cd05cfc 100644 --- a/sasdata/transforms/rebinning.py +++ b/sasdata/transforms/rebinning.py @@ -3,6 +3,7 @@ import numpy as np from numpy._typing import ArrayLike +from scipy.interpolate import interp1d from sasdata.quantities.quantity import Quantity from scipy.sparse import coo_matrix @@ -38,6 +39,11 @@ def calculate_interpolation_matrix_1d(input_axis: Quantity[ArrayLike], sorted_in = input_x[input_sort] sorted_out = output_x[output_sort] + n_in = len(sorted_in) + n_out = len(sorted_out) + + conversion_matrix = None # output + match order: case InterpolationOptions.NEAREST_NEIGHBOUR: @@ -48,31 +54,72 @@ def calculate_interpolation_matrix_1d(input_axis: Quantity[ArrayLike], crossing_points = 0.5*(sorted_out[1:] + sorted_out[:-1]) # Find the output values nearest to each of the input values - n_i = len(sorted_in) - n_j = len(sorted_out) i=0 for k, crossing_point in enumerate(crossing_points): - while i < n_i and sorted_in[i] < crossing_point: + while i < n_in and sorted_in[i] < crossing_point: i_entries.append(i) j_entries.append(k) i += 1 # All the rest in the last bin - while i < n_i: + while i < n_in: i_entries.append(i) - j_entries.append(n_j-1) + j_entries.append(n_out-1) i += 1 i_entries = input_unsort[np.array(i_entries, dtype=int)] j_entries = output_unsort[np.array(j_entries, dtype=int)] values = np.ones_like(i_entries, dtype=float) - return coo_matrix((values, (i_entries, j_entries)), shape=(n_i, n_j)) + conversion_matrix = coo_matrix((values, (i_entries, j_entries)), shape=(n_in, n_out)) + + case InterpolationOptions.LINEAR: + + # Leverage existing linear interpolation methods to get the mapping + # do a linear interpolation on indices + # the floor should give the left bin + # the ceil should give the right bin + # the fractional part should give the relative weightings + + input_indices = np.arange(n_in, dtype=int) + output_indices = np.arange(n_out, dtype=int) + + fractional = np.interp(x=sorted_out, xp=sorted_in, fp=input_indices, left=0, right=n_in-1) + + left_bins = np.floor(fractional, dtype=int) + right_bins = np.ceil(fractional, dtype=int) + + right_weight = fractional % 1 + left_weight = 1 - right_weight + + # There *should* be no repeated entries for both i and j in the main part, but maybe at the ends + # If left bin is the same as right bin, then we only want one entry, and the weight should be 1 + same = left_bins == right_bins + not_same = ~same + + same_bins = left_bins[same] # could equally be right bins, they're the same + + same_indices = output_indices[same] + not_same_indices = output_indices[not_same] + + j_entries_sorted = np.concatenate((same_indices, not_same_indices, not_same_indices)) + i_entries_sorted = np.concatenate((same_bins, left_bins[not_same], right_bins[not_same])) + + i_entries = input_unsort[i_entries_sorted] + j_entries = output_unsort[j_entries_sorted] + + # weights don't need to be unsorted # TODO: check this is right, it should become obvious if we use unsorted data + weights = np.concatenate((np.ones_like(same_bins, dtype=float), left_weight[not_same], right_weight[not_same])) + + conversion_matrix = coo_matrix((weights, (i_entries, j_entries)), shape=(n_in, n_out)) case _: raise ValueError(f"Unsupported interpolation order: {order}") + + return conversion_matrix + def calculate_interpolation_matrix(input_axes: list[Quantity[ArrayLike]], output_axes: list[Quantity[ArrayLike]], data: ArrayLike | None = None,