Skip to content

Commit

Permalink
Release 2.3.2 (#47)
Browse files Browse the repository at this point in the history
* improve marginal checking, add some more tests
* update CI
* fix coverage
* tidy code
  • Loading branch information
virgesmith authored Oct 23, 2024
1 parent 12f9cb6 commit 22c1582
Show file tree
Hide file tree
Showing 15 changed files with 498 additions and 459 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
name: Python coverage

name: Code coverage

on:
push:
Expand All @@ -13,9 +14,11 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [ "3.11" ]
python-version: [ "3.12" ]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: "pip: Python ${{ matrix.python-version }} coverage"
uses: actions/setup-python@v5
with:
Expand All @@ -26,11 +29,12 @@ jobs:
python -m pip install pybind11 pytest
- name: Build
run: |
CFLAGS=-coverage python -m pip install .
CXXFLAGS=-coverage python -m pip install .
- name: Test
run: |
python -m pytest
- name: Upload
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: |
bash <(curl -s https://codecov.io/bash) -Z
2 changes: 1 addition & 1 deletion .github/workflows/python-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.11", "3.12", "3.13"]
os: [ubuntu-latest, windows-latest, macos-latest]
steps:
- uses: actions/checkout@v4
Expand Down
9 changes: 9 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
coverage:
status:
project:
default:
target: 90%
threshold: 1% # leeway
patch:
default:
target: 75%
63 changes: 45 additions & 18 deletions humanleague/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Microsynthesis using quasirandom sampling and IPF, plus related functionality
Microsynthesis using quasirandom sampling and IPF, plus related functionality
"""

from __future__ import annotations
import typing
import numpy as np
Expand All @@ -9,17 +10,9 @@ import numpy.typing as npt
FloatArray1d = npt.NDArray[np.float64] | list[float]
IntArray1d = typing.Sequence[int]

__all__ = [
"SobolSequence",
"flatten",
"integerise",
"ipf",
"qis",
"qisi"
]

__all__ = ["SobolSequence", "flatten", "integerise", "ipf", "qis", "qisi"]

class SobolSequence():
class SobolSequence:
@typing.overload
def __init__(self, dim: int) -> None:
"""
Expand Down Expand Up @@ -57,10 +50,12 @@ class SobolSequence():
__next__ dunder
"""
pass

def _unittest() -> dict:
"""
For developers. Runs the C++ unit tests.
"""

def flatten(pop: npt.NDArray[np.int64]) -> list:
"""
Converts an n-dimensional array of counts into an n-column table with a row for each unit
Expand All @@ -73,8 +68,11 @@ def flatten(pop: npt.NDArray[np.int64]) -> list:
A 2-d array of size n by sum(pop).
"""

@typing.overload
def integerise(frac: FloatArray1d, pop: int) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
def integerise(
frac: FloatArray1d, pop: int
) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
"""
Computes the closest integer frequencies given fractional counts and a total population.
Expand All @@ -88,8 +86,11 @@ def integerise(frac: FloatArray1d, pop: int) -> tuple[npt.NDArray[np.int64], dic
A tuple containing the result and summary statistics
"""

@typing.overload
def integerise(pop: npt.NDArray[np.float64]) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
def integerise(
pop: npt.NDArray[np.float64],
) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
"""
Tries to construct an integer multidimensional array that has identical marginal sums to the fractional input array (which of course must have
integer marginal sums). The algorithm may not always find a solution and will return an approximate array in this case.
Expand All @@ -102,7 +103,12 @@ def integerise(pop: npt.NDArray[np.float64]) -> tuple[npt.NDArray[np.int64], dic
A tuple containing the result and summary statistics
"""
def ipf(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.float64]]) -> tuple[npt.NDArray[np.float64], dict[str, typing.Any]]:

def ipf(
seed: npt.NDArray[np.float64],
indices: typing.Sequence[IntArray1d],
marginals: typing.Sequence[npt.NDArray[np.float64]],
) -> tuple[npt.NDArray[np.float64], dict[str, typing.Any]]:
"""
Uses iterative proportional fitting to construct an n-dimensional array from a seed population that matches the specified marginal sums.
Expand All @@ -116,8 +122,12 @@ def ipf(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], mar
A tuple containing the result and summary statistics
"""

@typing.overload
def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.int64]]) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
def qis(
indices: typing.Sequence[IntArray1d],
marginals: typing.Sequence[npt.NDArray[np.int64]],
) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
"""
Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
Expand All @@ -129,8 +139,13 @@ def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDA
A tuple containing the result and summary statistics
"""

@typing.overload
def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.int64]], skips: int) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
def qis(
indices: typing.Sequence[IntArray1d],
marginals: typing.Sequence[npt.NDArray[np.int64]],
skips: int,
) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
"""
Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
Expand All @@ -144,8 +159,13 @@ def qis(indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDA
A tuple containing the result and summary statistics
"""

@typing.overload
def qisi(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], marginals: typing.Sequence[npt.NDArray[np.int64]]) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
def qisi(
seed: npt.NDArray[np.float64],
indices: typing.Sequence[IntArray1d],
marginals: typing.Sequence[npt.NDArray[np.int64]],
) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
"""
Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
Expand All @@ -159,8 +179,14 @@ def qisi(seed: npt.NDArray[np.float64], indices: typing.Sequence[IntArray1d], ma
A tuple containing the result and summary statistics
"""

@typing.overload
def qisi(seed: npt.NDArray[np.float64], indices: list[IntArray1d], marginals: list[npt.NDArray[np.int64]], skips: int) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
def qisi(
seed: npt.NDArray[np.float64],
indices: list[IntArray1d],
marginals: list[npt.NDArray[np.int64]],
skips: int,
) -> tuple[npt.NDArray[np.int64], dict[str, typing.Any]]:
"""
Uses quasirandom integer sampling to construct an n-dimensional population array that matches the specified marginal sums.
Expand All @@ -176,4 +202,5 @@ def qisi(seed: npt.NDArray[np.float64], indices: list[IntArray1d], marginals: li
A tuple containing the result and summary statistics
"""

__version__: str
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ description = "Microsynthesis using quasirandom sampling and/or IPF"
readme = "README.md"
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
Expand All @@ -45,9 +45,9 @@ testpaths = [
"tests"
]

[tool.ruff]
[tool.ruff.lint]
select = ["E", "F"]
ignore = ["E501"]

[tool.ruff.per-file-ignores]
[tool.ruff.lint.per-file-ignores]
"**/__init__.py" = ["F401", "F403"]
44 changes: 21 additions & 23 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,41 @@


def source_files():
sources = glob.glob("src/*.cpp")
# can't use compile skips as some files are auto-generated
skip = ["RcppExports.cpp", "rcpp_api.cpp"]
for s in skip:
sources = [f for f in sources if s not in f]
sources = glob.glob("src/*.cpp")
# can't use compile skips as some files are auto-generated
skip = ["RcppExports.cpp", "rcpp_api.cpp"]
for s in skip:
sources = [f for f in sources if s not in f]

return sources
return sources


def header_files():
return glob.glob("src/*.h")
return glob.glob("src/*.h")


def defines():
return [
("PYTHON_MODULE", None)
]
return [("PYTHON_MODULE", None)]


ext_modules = [
Pybind11Extension(
'_humanleague',
sources=source_files(),
include_dirs=["src"],
define_macros=defines(),
depends=["setup.py", "src/docstr.inl"] + header_files(),
cxx_std=20,
)
Pybind11Extension(
"_humanleague",
sources=source_files(),
include_dirs=["src"],
define_macros=defines(),
depends=["setup.py", "src/docstr.inl"] + header_files(),
cxx_std=20,
)
]


ParallelCompile().install()

setup(
name='humanleague',
packages=["humanleague"],
package_data={"humanleague": ["py.typed", "*.pyi"]},
ext_modules=ext_modules,
zip_safe=False,
name="humanleague",
packages=["humanleague"],
package_data={"humanleague": ["py.typed", "*.pyi"]},
ext_modules=ext_modules,
zip_safe=False,
)
1 change: 0 additions & 1 deletion src/Integerise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ Integeriser::Integeriser(const NDArray<double>& seed) : m_seed(seed)
// TODO check (close to) integers
m_indices[d] = {(int64_t)d};
m_marginals[d].resize({(int64_t)mf.size()});
//std::cout << "%%: %% %% %%" % m_indices[d] % m_marginals[d].dim() % m_marginals[d].sizes() % mf << std::endl;
for (size_t i = 0; i < mf.size(); ++i)
{
*(m_marginals[d].begin() + i) = checked_round(mf[i]);
Expand Down
21 changes: 9 additions & 12 deletions src/Microsynthesis.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ class Microsynthesis
{
if (m_indices[k].size() != m_marginals[k].dim())
throw std::runtime_error("index/marginal dimension mismatch %% vs %%"s % m_indices[k].size() % m_marginals[k].dim());
//std::cout << "index " << k << std::endl;
for (size_t j = 0; j < m_indices[k].size(); ++j)
{
int64_t dim = m_indices[k][j];
Expand Down Expand Up @@ -136,7 +135,6 @@ class Microsynthesis
// TODO move to a more appropriate place
std::vector<int64_t> invert(size_t max, const std::vector<int64_t>& excluded)
{
//std::cout << "invert " << max << std::endl;
//print(excluded);
std::vector<int64_t> included;
included.reserve(max - excluded.size());
Expand Down Expand Up @@ -174,16 +172,13 @@ class Microsynthesis
for (size_t k = 0; k < m_indices.size(); ++k)
{
const NDArray<double>& r = reduce<double>(m_array, m_indices[k]);
// std::cout << k << ":";
// print(r.rawData(), r.storageSize());

Index main_index(m_array.sizes());
//std::cout << m_array.sizes()[m_indices[1-k][0]] << std::endl;
for (MappedIndex oindex(main_index, invert(m_array.dim(), m_indices[k])); !oindex.end(); ++oindex)
{
for (MappedIndex index(main_index, m_indices[k]); !index.end(); ++index)
{
//print((std::vector<int64_t>)main_index);
#ifndef NDEBUG
if (r[index] == 0.0 && m_marginals[k][index] != 0.0)
throw std::runtime_error("div0 in rScale with m>0");
Expand Down Expand Up @@ -215,12 +210,13 @@ class Microsynthesis

// more validation

// check marginal sums all the same
m_population = static_cast<int64_t>(sum(m_marginals[0]));
// check marginal sums all the same (round to nearest)
m_population = static_cast<int64_t>(sum(m_marginals[0]) + 0.5);
for (size_t i = 1; i < m_marginals.size(); ++i)
{
if (static_cast<int64_t>(sum(m_marginals[i])) != m_population)
throw std::runtime_error("marginal sum mismatch at index %%: %% vs %%"s % i % sum(m_marginals[i]) % m_population);
auto marginal_sum = static_cast<int64_t>(sum(m_marginals[i]) + 0.5);
if (marginal_sum != m_population)
throw std::runtime_error("marginal sum mismatch at index %%: %% vs %%"s % i % marginal_sum % m_population);
}

// check that for each dimension included in more than one marginal, the partial sums in that dimension are equal
Expand All @@ -231,11 +227,12 @@ class Microsynthesis
if (mi.size() < 2)
continue;
// marginal index marginal dimension
const std::vector<M>& ms = reduce(m_marginals[mi[0].first], mi[0].second);
const std::vector<M>& ms0 = reduce(m_marginals[mi[0].first], mi[0].second);
for (size_t i = 1; i < mi.size(); ++i)
{
if (reduce(m_marginals[mi[i].first], mi[i].second) != ms)
throw std::runtime_error("marginal partial sum mismatch");
const auto& msi = reduce(m_marginals[mi[i].first], mi[i].second);
if (!allclose(msi, ms0))
throw std::runtime_error("marginal partial sum mismatch in dimension %% index %%: %% vs %%"s % d % i % msi % ms0);
}
}
}
Expand Down
19 changes: 19 additions & 0 deletions src/NDArrayUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,3 +226,22 @@ std::vector<std::vector<int>> listify(const size_t pop, const NDArray<T>& t, int
return list;
}

template<typename T1, typename T2>
bool allclose(const std::vector<T1>& a, const std::vector<T2>& b, double abstol = 1e-8)
{
return a == b;
}

template<>
inline bool allclose(const std::vector<double>& a, const std::vector<double>& b, double abstol)
{
if (a.size() != b.size()) {
return false;
}
for (size_t i = 0; i < a.size() ; ++i) {
if (abs(a[i] - b[i]) > abstol) {
return false;
}
}
return true;
}
Loading

0 comments on commit 22c1582

Please sign in to comment.