Skip to content

Commit

Permalink
Move DistributionDataFrameHistogram for typing purposes
Browse files Browse the repository at this point in the history
  • Loading branch information
ndaelman committed Jun 3, 2024
1 parent 78378df commit 156adf2
Showing 1 changed file with 61 additions and 61 deletions.
122 changes: 61 additions & 61 deletions src/nomad_simulations/model_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,67 @@ def normalize(self, archive, logger: BoundLogger):
)


class DistributionDataFrameHistogram:
def __init__(
self,
el_distrs: pd.DataFrame,
cutoffs: pd.DataFrame,
ll: int,
bins: pint.Quantity,
) -> None:
self._ll = ll
self._hists: dict[Mode, pd.DataFrame] = {}
self._cutoffs: dict[Mode, pint.Quantity] = {}
protocol = (
lambda x: self._sparsify(x, bins)
if isinstance(bins.magnitude, (int, float))
else self._bin(x, bins)
)

distr = self._sel_distr_cols(el_distrs)
for mode, df in distr.groupby(distr.columns[:-1]):
self._hists[mode] = self._cmp_hist(protocol(df))
self._cutoffs[mode] = cutoffs.loc[mode]

def _sel_distr_cols(self, df: pd.DataFrame) -> pd.DataFrame:
"""Select the appropriate elemental combinations."""
return df[df.count(axis=1) == self._ll].dropna(axis=0)

def _cmp_hist(self, df: pd.Series) -> pd.DataFrame:
"""Return a histogram of the distribution."""
hist = df[df['value'] > 0]
hist = hist.groupby('value').size().reset_index(name='count')
hist['freq'] = hist['count'].apply(lambda x: x / hist['count'].min())
return hist

def _sparsify(self, df: pd.DataFrame, prec: pint.Quantity) -> pd.DataFrame:
"""Sparsify the distribution by rounding the values to the nearest `prec`."""
return df['value'].map(lambda x: math.floor(x / prec) * prec, inplace=True)

def _bin(self, df: pd.DataFrame, binning: pint.Quantity) -> pd.DataFrame:
"""Bin the distribution by the `binning` values."""
return df['value'].map(lambda x: binning(np.min(np.where(x > binning))))

def get(self, mode: Mode) -> tuple[pint.Quantity, pd.DataFrame]:
return self._cutoffs[mode], self._hists[mode]

def to_nomad(self) -> list[GeometryDistribution]:
constructor_map = {
2: DistanceGeometryDistribution,
3: AngleGeometryDistribution,
4: DihedralGeometryDistribution,
}
return [
constructor_map[self._ll](
element_cutoff_selection=mode,
distance_cutoffs=self._cutoffs[mode],
bins=self._hists[mode]['value'],
frequencies=self._hists[mode]['freq'],
)
for mode in self._hists.keys()
]


class DistributionDataFrame:
def __init__(
self,
Expand Down Expand Up @@ -619,67 +680,6 @@ def to_hist(self, ll: int, bins: pint.Quantity) -> DistributionDataFrameHistogra
)


class DistributionDataFrameHistogram:
def __init__(
self,
el_distrs: pd.DataFrame,
cutoffs: pd.DataFrame,
ll: int,
bins: pint.Quantity,
) -> None:
self._ll = ll
self._hists: dict[Mode, pd.DataFrame] = {}
self._cutoffs: dict[Mode, pint.Quantity] = {}
protocol = (
lambda x: self._sparsify(x, bins)
if isinstance(bins.magnitude, (int, float))
else self._bin(x, bins)
)

distr = self._sel_distr_cols(el_distrs)
for mode, df in distr.groupby(distr.columns[:-1]):
self._hists[mode] = self._cmp_hist(protocol(df))
self._cutoffs[mode] = cutoffs.loc[mode]

def _sel_distr_cols(self, df: pd.DataFrame) -> pd.DataFrame:
"""Select the appropriate elemental combinations."""
return df[df.count(axis=1) == self._ll].dropna(axis=0)

def _cmp_hist(self, df: pd.Series) -> pd.DataFrame:
"""Return a histogram of the distribution."""
hist = df[df['value'] > 0]
hist = hist.groupby('value').size().reset_index(name='count')
hist['freq'] = hist['count'].apply(lambda x: x / hist['count'].min())
return hist

def _sparsify(self, df: pd.DataFrame, prec: pint.Quantity) -> pd.DataFrame:
"""Sparsify the distribution by rounding the values to the nearest `prec`."""
return df['value'].map(lambda x: math.floor(x / prec) * prec, inplace=True)

def _bin(self, df: pd.DataFrame, binning: pint.Quantity) -> pd.DataFrame:
"""Bin the distribution by the `binning` values."""
return df['value'].map(lambda x: binning(np.min(np.where(x > binning))))

def get(self, mode: Mode) -> tuple[pint.Quantity, pd.DataFrame]:
return self._cutoffs[mode], self._hists[mode]

def to_nomad(self) -> list[GeometryDistribution]:
constructor_map = {
2: DistanceGeometryDistribution,
3: AngleGeometryDistribution,
4: DihedralGeometryDistribution,
}
return [
constructor_map[self._ll](
element_cutoff_selection=mode,
distance_cutoffs=self._cutoffs[mode],
bins=self._hists[mode]['value'],
frequencies=self._hists[mode]['freq'],
)
for mode in self._hists.keys()
]


class AtomicCell(Cell):
"""
A base section used to specify the atomic cell information of a system.
Expand Down

0 comments on commit 156adf2

Please sign in to comment.