Release 2.3.2 (#47)

* improve marginal checking, add some more tests * update CI * fix coverage * tidy code
virgesmith · Oct 23, 2024 · 22c1582 · 22c1582
1 parent 12f9cb6
commit 22c1582
Show file tree

Hide file tree

Showing 15 changed files with 498 additions and 459 deletions.
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
@@ -1,4 +1,5 @@
-name: Python coverage
+
+name: Code coverage
 
 on:
   push:
@@ -13,9 +14,11 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ "3.11" ]
+        python-version: [ "3.12" ]
     steps:
     - uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
     - name: "pip: Python ${{ matrix.python-version }} coverage"
       uses: actions/setup-python@v5
       with:
@@ -26,11 +29,12 @@ jobs:
         python -m pip install pybind11 pytest
     - name: Build
       run: |
-        CFLAGS=-coverage python -m pip install .
+        CXXFLAGS=-coverage python -m pip install .
     - name: Test
       run: |
         python -m pytest
     - name: Upload
+      env:
+        CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
       run: |
         bash <(curl -s https://codecov.io/bash) -Z
-
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
@@ -19,7 +19,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.11", "3.12", "3.13"]
         os: [ubuntu-latest, windows-latest, macos-latest]
     steps:
     - uses: actions/checkout@v4

diff --git a/codecov.yml b/codecov.yml
@@ -0,0 +1,9 @@
+coverage:
+  status:
+    project:
+      default:
+        target: 90%
+        threshold: 1%  # leeway
+    patch:
+      default:
+        target: 75%
diff --git a/humanleague/__init__.pyi b/humanleague/__init__.pyi
@@ -1,6 +1,7 @@
 """
-    Microsynthesis using quasirandom sampling and IPF, plus related functionality
+Microsynthesis using quasirandom sampling and IPF, plus related functionality
 """
+
 from __future__ import annotations
 import typing
 import numpy as np
@@ -9,17 +10,9 @@ import numpy.typing as npt
 FloatArray1d = npt.NDArray[np.float64] | list[float]
 IntArray1d = typing.Sequence[int]
 
-__all__ = [
-    "SobolSequence",
-    "flatten",
-    "integerise",
-    "ipf",
-    "qis",
-    "qisi"
-]
-
+__all__ = ["SobolSequence", "flatten", "integerise", "ipf", "qis", "qisi"]
 
-class SobolSequence():
+class SobolSequence:
     @typing.overload
     def __init__(self, dim: int) -> None:
         """
@@ -57,10 +50,12 @@ class SobolSequence():
         __next__ dunder
         """
     pass
+
 def _unittest() -> dict:
     """
     For developers. Runs the C++ unit tests.
     """
+
 def flatten(pop: npt.NDArray[np.int64]) -> list:
     """
     Converts an n-dimensional array of counts into an n-column table with a row for each unit
@@ -73,8 +68,11 @@ def flatten(pop: npt.NDArray[np.int64]) -> list:
 
         A 2-d array of size n by sum(pop).
     """
+
 @typing.overload
-def integerise(frac: FloatArray1d, pop: int) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
+def integerise(
+    frac: FloatArray1d, pop: int
+) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
     """
     Computes the closest integer frequencies given fractional counts and a total population.
 
@@ -88,8 +86,11 @@ def integerise(frac: FloatArray1d, pop: int) -> tuple[npt.NDArray[np.int64], dic
 
         A tuple containing the result and summary statistics
     """
+
 @typing.overload
-def integerise(pop: npt.NDArray[np.float64]) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
+def integerise(
+    pop: npt.NDArray[np.float64],
+) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
     """
     Tries to construct an integer multidimensional array that has identical marginal sums to the fractional input array (which of course must have
     integer marginal sums). The algorithm may not always find a solution and will return an approximate array in this case.
@@ -102,7 +103,12 @@ def integerise(pop: npt.NDArray[np.float64]) -> tuple[npt.NDArray[np.int64], dic
 
         A tuple containing the result and summary statistics
     """
-def ipf(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.float64]]) -> tuple[npt.NDArray[np.float64], dict[str, typing.Any]]:
+
+def ipf(
+    seed: npt.NDArray[np.float64],
+    indices: typing.Sequence[IntArray1d],
+    marginals: typing.Sequence[npt.NDArray[np.float64]],
+) -> tuple[npt.NDArray[np.float64], dict[str, typing.Any]]:
     """
     Uses iterative proportional fitting to construct an n-dimensional array from a seed population that matches the specified marginal sums.
 
@@ -116,8 +122,12 @@ def ipf(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], mar
 
         A tuple containing the result and summary statistics
     """
+
 @typing.overload
-def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.int64]]) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
+def qis(
+    indices: typing.Sequence[IntArray1d],
+    marginals: typing.Sequence[npt.NDArray[np.int64]],
+) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
     """
     Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
 
@@ -129,8 +139,13 @@ def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDA
 
         A tuple containing the result and summary statistics
     """
+
 @typing.overload
-def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.int64]], skips: int) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
+def qis(
+    indices: typing.Sequence[IntArray1d],
+    marginals: typing.Sequence[npt.NDArray[np.int64]],
+    skips: int,
+) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
     """
     Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
 
@@ -144,8 +159,13 @@ def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDA
 
         A tuple containing the result and summary statistics
     """
+
 @typing.overload
-def qisi(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.int64]]) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
+def qisi(
+    seed: npt.NDArray[np.float64],
+    indices: typing.Sequence[IntArray1d],
+    marginals: typing.Sequence[npt.NDArray[np.int64]],
+) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
     """
     Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
 
@@ -159,8 +179,14 @@ def qisi(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], ma
 
         A tuple containing the result and summary statistics
     """
+
 @typing.overload
-def qisi(seed: npt.NDArray[np.float64], indices: list[IntArray1d], marginals: list[npt.NDArray[np.int64]], skips: int) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
+def qisi(
+    seed: npt.NDArray[np.float64],
+    indices: list[IntArray1d],
+    marginals: list[npt.NDArray[np.int64]],
+    skips: int,
+) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
     """
     Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
 
@@ -176,4 +202,5 @@ def qisi(seed: npt.NDArray[np.float64], indices: list[IntArray1d], marginals: li
 
         A tuple containing the result and summary statistics
     """
+
 __version__: str
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,9 +19,9 @@ description = "Microsynthesis using quasirandom sampling and/or IPF"
 readme = "README.md"
 requires-python = ">=3.10"
 classifiers = [
-    "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
 ]
@@ -45,9 +45,9 @@ testpaths = [
   "tests"
 ]
 
-[tool.ruff]
+[tool.ruff.lint]
 select = ["E", "F"]
 ignore = ["E501"]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "**/__init__.py" = ["F401", "F403"]
diff --git a/setup.py b/setup.py
@@ -6,43 +6,41 @@
 
 
 def source_files():
-  sources = glob.glob("src/*.cpp")
-  # can't use compile skips as some files are auto-generated
-  skip = ["RcppExports.cpp", "rcpp_api.cpp"]
-  for s in skip:
-    sources = [f for f in sources if s not in f]
+    sources = glob.glob("src/*.cpp")
+    # can't use compile skips as some files are auto-generated
+    skip = ["RcppExports.cpp", "rcpp_api.cpp"]
+    for s in skip:
+        sources = [f for f in sources if s not in f]
 
-  return sources
+    return sources
 
 
 def header_files():
-  return glob.glob("src/*.h")
+    return glob.glob("src/*.h")
 
 
 def defines():
-  return [
-    ("PYTHON_MODULE", None)
-  ]
+    return [("PYTHON_MODULE", None)]
 
 
 ext_modules = [
-  Pybind11Extension(
-    '_humanleague',
-    sources=source_files(),
-    include_dirs=["src"],
-    define_macros=defines(),
-    depends=["setup.py", "src/docstr.inl"] + header_files(),
-    cxx_std=20,
-  )
+    Pybind11Extension(
+        "_humanleague",
+        sources=source_files(),
+        include_dirs=["src"],
+        define_macros=defines(),
+        depends=["setup.py", "src/docstr.inl"] + header_files(),
+        cxx_std=20,
+    )
 ]
 
 
 ParallelCompile().install()
 
 setup(
-  name='humanleague',
-  packages=["humanleague"],
-  package_data={"humanleague": ["py.typed", "*.pyi"]},
-  ext_modules=ext_modules,
-  zip_safe=False,
+    name="humanleague",
+    packages=["humanleague"],
+    package_data={"humanleague": ["py.typed", "*.pyi"]},
+    ext_modules=ext_modules,
+    zip_safe=False,
 )
diff --git a/src/Integerise.cpp b/src/Integerise.cpp
@@ -81,7 +81,6 @@ Integeriser::Integeriser(const NDArray<double>& seed) : m_seed(seed)
     // TODO check (close to) integers
     m_indices[d] = {(int64_t)d};
     m_marginals[d].resize({(int64_t)mf.size()});
-    //std::cout << "%%: %% %% %%" % m_indices[d] % m_marginals[d].dim() % m_marginals[d].sizes() % mf << std::endl;
     for (size_t i = 0; i < mf.size(); ++i)
     {
       *(m_marginals[d].begin() + i) = checked_round(mf[i]);

diff --git a/src/Microsynthesis.h b/src/Microsynthesis.h
@@ -40,7 +40,6 @@ class Microsynthesis
     {
       if (m_indices[k].size() != m_marginals[k].dim())
         throw std::runtime_error("index/marginal dimension mismatch %% vs %%"s % m_indices[k].size() % m_marginals[k].dim());
-      //std::cout << "index " << k << std::endl;
       for (size_t j = 0; j < m_indices[k].size(); ++j)
       {
         int64_t dim = m_indices[k][j];
@@ -136,7 +135,6 @@ class Microsynthesis
   // TODO move to a more appropriate place
   std::vector<int64_t> invert(size_t max, const std::vector<int64_t>& excluded)
   {
-    //std::cout << "invert " << max << std::endl;
     //print(excluded);
     std::vector<int64_t> included;
     included.reserve(max - excluded.size());
@@ -174,16 +172,13 @@ class Microsynthesis
     for (size_t k = 0; k < m_indices.size(); ++k)
     {
       const NDArray<double>& r = reduce<double>(m_array, m_indices[k]);
-      // std::cout << k << ":";
       // print(r.rawData(), r.storageSize());
 
       Index main_index(m_array.sizes());
-      //std::cout << m_array.sizes()[m_indices[1-k][0]] << std::endl;
       for (MappedIndex oindex(main_index, invert(m_array.dim(), m_indices[k])); !oindex.end(); ++oindex)
       {
         for (MappedIndex index(main_index, m_indices[k]); !index.end(); ++index)
         {
-          //print((std::vector<int64_t>)main_index);
 #ifndef NDEBUG
           if (r[index] == 0.0 && m_marginals[k][index] != 0.0)
             throw std::runtime_error("div0 in rScale with m>0");
@@ -215,12 +210,13 @@ class Microsynthesis
 
     // more validation
 
-    // check marginal sums all the same
-    m_population = static_cast<int64_t>(sum(m_marginals[0]));
+    // check marginal sums all the same (round to nearest)
+    m_population = static_cast<int64_t>(sum(m_marginals[0]) + 0.5);
     for (size_t i = 1; i < m_marginals.size(); ++i)
     {
-      if (static_cast<int64_t>(sum(m_marginals[i])) != m_population)
-        throw std::runtime_error("marginal sum mismatch at index %%: %% vs %%"s % i % sum(m_marginals[i]) % m_population);
+      auto marginal_sum = static_cast<int64_t>(sum(m_marginals[i]) + 0.5);
+      if (marginal_sum != m_population)
+        throw std::runtime_error("marginal sum mismatch at index %%: %% vs %%"s % i % marginal_sum % m_population);
     }
 
     // check that for each dimension included in more than one marginal, the partial sums in that dimension are equal
@@ -231,11 +227,12 @@ class Microsynthesis
       if (mi.size() < 2)
         continue;
       //                                marginal index            marginal dimension
-      const std::vector<M>& ms = reduce(m_marginals[mi[0].first], mi[0].second);
+      const std::vector<M>& ms0 = reduce(m_marginals[mi[0].first], mi[0].second);
       for (size_t i = 1; i < mi.size(); ++i)
       {
-        if (reduce(m_marginals[mi[i].first], mi[i].second) != ms)
-          throw std::runtime_error("marginal partial sum mismatch");
+        const auto& msi = reduce(m_marginals[mi[i].first], mi[i].second);
+        if (!allclose(msi, ms0))
+          throw std::runtime_error("marginal partial sum mismatch in dimension %% index %%: %% vs %%"s % d % i % msi % ms0);
       }
     }
   }

diff --git a/src/NDArrayUtils.h b/src/NDArrayUtils.h
@@ -226,3 +226,22 @@ std::vector<std::vector<int>> listify(const size_t pop, const NDArray<T>& t, int
   return list;
 }
 
+template<typename T1, typename T2>
+bool allclose(const std::vector<T1>& a, const std::vector<T2>& b, double abstol = 1e-8)
+{
+  return a == b;
+}
+
+template<>
+inline bool allclose(const std::vector<double>& a, const std::vector<double>& b, double abstol)
+{
+  if (a.size() != b.size()) {
+    return false;
+  }
+  for (size_t i = 0; i < a.size() ; ++i) {
+    if (abs(a[i] - b[i]) > abstol) {
+      return false;
+    }
+  }
+  return true;
+}