Skip to content

Commit

Permalink
Merge branch 'refactor_24' into 88-make-readers-return-sasdata-objects
Browse files Browse the repository at this point in the history
  • Loading branch information
jamescrake-merani committed Jan 15, 2025
2 parents 44b9f56 + 2251f4a commit d54ee7a
Show file tree
Hide file tree
Showing 54 changed files with 19,250 additions and 17,210 deletions.
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,14 @@ lxml

# Calculation
numpy==1.*
scipy

# Unit testing
pytest
unittest-xml-reporting

# Documentation
sphinx

# Other stuff
matplotlib
21 changes: 11 additions & 10 deletions sasdata/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,29 @@
from dataclasses import dataclass

from sasdata.quantities.quantity import NamedQuantity
import numpy as np
from sasdata.metadata import Metadata
from sasdata.quantities.accessors import AccessorTarget
from sasdata.data_backing import Group, key_tree


class SasData:
def __init__(self, name: str, data_contents: list[NamedQuantity], raw_metadata: Group, verbose: bool=False):
def __init__(self, name: str,
data_contents: list[NamedQuantity],
raw_metadata: Group,
verbose: bool=False):

self.name = name
self._data_contents = data_contents
self._raw_metadata = raw_metadata
self._verbose = verbose

self.metadata = Metadata(AccessorTarget(raw_metadata, verbose=verbose))

# TO IMPLEMENT

# abscissae: list[NamedQuantity[np.ndarray]]
# ordinate: NamedQuantity[np.ndarray]
# other: list[NamedQuantity[np.ndarray]]
#
# metadata: Metadata
# model_requirements: ModellingRequirements
# Components that need to be organised after creation
self.ordinate: NamedQuantity[np.ndarray] = None # TODO: fill out
self.abscissae: list[NamedQuantity[np.ndarray]] = None # TODO: fill out
self.mask = None # TODO: fill out
self.model_requirements = None # TODO: fill out

def summary(self, indent = " ", include_raw=False):
s = f"{self.name}\n"
Expand Down
250 changes: 125 additions & 125 deletions sasdata/data_backing.py
Original file line number Diff line number Diff line change
@@ -1,126 +1,126 @@
from typing import TypeVar, Self
from dataclasses import dataclass
from enum import Enum

from sasdata.quantities.quantity import NamedQuantity

DataType = TypeVar("DataType")

""" Sasdata metadata tree """

def shorten_string(string):
lines = string.split("\n")
if len(lines) <= 1:
return string
else:
return lines[0][:30] + " ... " + lines[-1][-30:]

@dataclass
class Dataset[DataType]:
name: str
data: DataType
attributes: dict[str, Self | str]

def summary(self, indent_amount: int = 0, indent: str = " ") -> str:

s = f"{indent*indent_amount}{self.name.split("/")[-1]}:\n"
s += f"{indent*(indent_amount+1)}{shorten_string(str(self.data))}\n"
for key in self.attributes:
value = self.attributes[key]
if isinstance(value, (Group, Dataset)):
value_string = value.summary(indent_amount+1, indent)
else:
value_string = f"{indent * (indent_amount+1)}{key}: {shorten_string(repr(value))}\n"

s += value_string

return s

@dataclass
class Group:
name: str
children: dict[str, Self | Dataset]

def summary(self, indent_amount: int=0, indent=" "):
s = f"{indent*indent_amount}{self.name.split("/")[-1]}:\n"
for key in self.children:
s += self.children[key].summary(indent_amount+1, indent)

return s

class Function:
""" Representation of a (data driven) function, such as I vs Q """

def __init__(self, abscissae: list[NamedQuantity], ordinate: NamedQuantity):
self.abscissae = abscissae
self.ordinate = ordinate


class FunctionType(Enum):
""" What kind of function is this, should not be relied upon to be perfectly descriptive
The functions might be parametrised by more variables than the specification
"""
UNKNOWN = 0
SCATTERING_INTENSITY_VS_Q = 1
SCATTERING_INTENSITY_VS_Q_2D = 2
SCATTERING_INTENSITY_VS_Q_3D = 3
SCATTERING_INTENSITY_VS_ANGLE = 4
UNKNOWN_METADATA = 20
TRANSMISSION = 21
POLARISATION_EFFICIENCY = 22
UNKNOWN_REALSPACE = 30
SESANS = 31
CORRELATION_FUNCTION_1D = 32
CORRELATION_FUNCTION_2D = 33
CORRELATION_FUNCTION_3D = 34
INTERFACE_DISTRIBUTION_FUNCTION = 35
PROBABILITY_DISTRIBUTION = 40
PROBABILITY_DENSITY = 41

def function_type_identification_key(names):
""" Create a key from the names of data objects that can be used to assign a function type"""
return ":".join([s.lower() for s in sorted(names)])

function_fields_to_type = [
(["Q"], "I", FunctionType.SCATTERING_INTENSITY_VS_Q),
(["Qx", "Qy"], "I", FunctionType.SCATTERING_INTENSITY_VS_Q_2D),
(["Qx", "Qy", "Qz"], "I", FunctionType.SCATTERING_INTENSITY_VS_Q_3D),
(["Z"], "G", FunctionType.SESANS),
(["lambda"], "T", FunctionType.TRANSMISSION)
]

function_fields_lookup = {
function_type_identification_key(inputs + [output]): function_type for inputs, output, function_type in function_fields_to_type
}

def build_main_data(data: list[NamedQuantity]) -> Function:
names = [datum.name for datum in data]
identifier = function_type_identification_key(names)

if identifier in function_fields_lookup:
function_type = function_fields_lookup[identifier]
else:
function_type = FunctionType.UNKNOWN

match function_type:
case FunctionType.UNKNOWN:
pass
case _:
raise NotImplementedError("Unknown ")

def key_tree(data: Group | Dataset, indent_amount=0, indent: str = " ") -> str:
""" Show a metadata tree, showing the names of they keys used to access them"""
s = ""
if isinstance(data, Group):
for key in data.children:
s += indent*indent_amount + key + "\n"
s += key_tree(data.children[key], indent_amount=indent_amount+1, indent=indent)

if isinstance(data, Dataset):
s += indent*indent_amount + "[data]\n"
for key in data.attributes:
s += indent*indent_amount + key + "\n"
s += key_tree(data.attributes[key], indent_amount=indent_amount+1, indent=indent)

from typing import TypeVar, Self
from dataclasses import dataclass
from enum import Enum

from sasdata.quantities.quantity import NamedQuantity

DataType = TypeVar("DataType")

""" Sasdata metadata tree """

def shorten_string(string):
lines = string.split("\n")
if len(lines) <= 1:
return string
else:
return lines[0][:30] + " ... " + lines[-1][-30:]

@dataclass
class Dataset[DataType]:
name: str
data: DataType
attributes: dict[str, Self | str]

def summary(self, indent_amount: int = 0, indent: str = " ") -> str:

s = f"{indent*indent_amount}{self.name.split("/")[-1]}:\n"
s += f"{indent*(indent_amount+1)}{shorten_string(str(self.data))}\n"
for key in self.attributes:
value = self.attributes[key]
if isinstance(value, (Group, Dataset)):
value_string = value.summary(indent_amount+1, indent)
else:
value_string = f"{indent * (indent_amount+1)}{key}: {shorten_string(repr(value))}\n"

s += value_string

return s

@dataclass
class Group:
name: str
children: dict[str, Self | Dataset]

def summary(self, indent_amount: int=0, indent=" "):
s = f"{indent*indent_amount}{self.name.split("/")[-1]}:\n"
for key in self.children:
s += self.children[key].summary(indent_amount+1, indent)

return s

class Function:
""" Representation of a (data driven) function, such as I vs Q """

def __init__(self, abscissae: list[NamedQuantity], ordinate: NamedQuantity):
self.abscissae = abscissae
self.ordinate = ordinate


class FunctionType(Enum):
""" What kind of function is this, should not be relied upon to be perfectly descriptive
The functions might be parametrised by more variables than the specification
"""
UNKNOWN = 0
SCATTERING_INTENSITY_VS_Q = 1
SCATTERING_INTENSITY_VS_Q_2D = 2
SCATTERING_INTENSITY_VS_Q_3D = 3
SCATTERING_INTENSITY_VS_ANGLE = 4
UNKNOWN_METADATA = 20
TRANSMISSION = 21
POLARISATION_EFFICIENCY = 22
UNKNOWN_REALSPACE = 30
SESANS = 31
CORRELATION_FUNCTION_1D = 32
CORRELATION_FUNCTION_2D = 33
CORRELATION_FUNCTION_3D = 34
INTERFACE_DISTRIBUTION_FUNCTION = 35
PROBABILITY_DISTRIBUTION = 40
PROBABILITY_DENSITY = 41

def function_type_identification_key(names):
""" Create a key from the names of data objects that can be used to assign a function type"""
return ":".join([s.lower() for s in sorted(names)])

function_fields_to_type = [
(["Q"], "I", FunctionType.SCATTERING_INTENSITY_VS_Q),
(["Qx", "Qy"], "I", FunctionType.SCATTERING_INTENSITY_VS_Q_2D),
(["Qx", "Qy", "Qz"], "I", FunctionType.SCATTERING_INTENSITY_VS_Q_3D),
(["Z"], "G", FunctionType.SESANS),
(["lambda"], "T", FunctionType.TRANSMISSION)
]

function_fields_lookup = {
function_type_identification_key(inputs + [output]): function_type for inputs, output, function_type in function_fields_to_type
}

def build_main_data(data: list[NamedQuantity]) -> Function:
names = [datum.name for datum in data]
identifier = function_type_identification_key(names)

if identifier in function_fields_lookup:
function_type = function_fields_lookup[identifier]
else:
function_type = FunctionType.UNKNOWN

match function_type:
case FunctionType.UNKNOWN:
pass
case _:
raise NotImplementedError("Unknown ")

def key_tree(data: Group | Dataset, indent_amount=0, indent: str = " ") -> str:
""" Show a metadata tree, showing the names of they keys used to access them"""
s = ""
if isinstance(data, Group):
for key in data.children:
s += indent*indent_amount + key + "\n"
s += key_tree(data.children[key], indent_amount=indent_amount+1, indent=indent)

if isinstance(data, Dataset):
s += indent*indent_amount + "[data]\n"
for key in data.attributes:
s += indent*indent_amount + key + "\n"
s += key_tree(data.attributes[key], indent_amount=indent_amount+1, indent=indent)

return s
Loading

0 comments on commit d54ee7a

Please sign in to comment.