From 9a71776d68ef168e0e8375b46ee66498cd748f43 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 2 Aug 2024 14:25:32 -0400
Subject: [PATCH 01/58] Split `ci.yml` into separate actions

Split the "unit-tests" action into multiple actions, currently one for
each package contained within the `flepiMoP` repo. Also updated checkout
from v3 to v4 to address node16 deprecation warnings and swapped ubuntu
20.04 for ubuntu latest. Changed the gempyor ci to not print stdout and
exit on first failure.
---
 .github/workflows/ci.yml             | 70 ----------------------------
 .github/workflows/flepicommon-ci.yml | 40 ++++++++++++++++
 .github/workflows/gempyor-ci.yml     | 45 ++++++++++++++++++
 .github/workflows/inference-ci.yml   | 40 ++++++++++++++++
 4 files changed, 125 insertions(+), 70 deletions(-)
 delete mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/flepicommon-ci.yml
 create mode 100644 .github/workflows/gempyor-ci.yml
 create mode 100644 .github/workflows/inference-ci.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
deleted file mode 100644
index 29e9a186d..000000000
--- a/.github/workflows/ci.yml
+++ /dev/null
@@ -1,70 +0,0 @@
-name: unit-tests
-
-on:
-  workflow_dispatch:
-  push:
-    branches:
-      - main
-      - dev
-  pull_request:
-    branches:
-      - main
-      - dev
-      - breaking-improvments
-
-jobs:
-  unit-tests:
-    runs-on: ubuntu-20.04
-    container:
-      image: hopkinsidd/flepimop:latest-dev
-      options: --user root
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          lfs: true
-      - name: Set up Rprofile
-        run: |
-          cp build/docker/Docker.Rprofile $HOME/.Rprofile
-          cp /home/app/.bashrc $HOME/.bashrc
-        shell: bash
-      - name: Install the gempyor package
-        run: |
-          source /var/python/3.10/virtualenv/bin/activate
-          python -m pip install --upgrade pip
-          python -m pip install "flepimop/gempyor_pkg[test]"
-        shell: bash
-      - name: Install local R packages
-        run: Rscript build/local_install.R
-        shell: bash
-      - name: Run gempyor tests
-        run: |
-          source /var/python/3.10/virtualenv/bin/activate
-          cd flepimop/gempyor_pkg
-          pytest -s
-        shell: bash
-      - name: Run gempyor-cli integration tests from examples
-        run: |
-          source /var/python/3.10/virtualenv/bin/activate
-          cd examples
-          pytest -s
-        shell: bash
-      - name: Run flepicommon tests
-        run: |
-          setwd("flepimop/R_packages/flepicommon")
-          devtools::test(stop_on_failure=TRUE)
-        shell: Rscript {0}
-      - name: Run inference tests
-        run: |
-          setwd("flepimop/R_packages/inference")
-          devtools::test(stop_on_failure=TRUE)
-        shell: Rscript {0}
-#      - name: Run integration tests
-#        env:
-#          CENSUS_API_KEY: ${{ secrets.CENSUS_API_KEY }}
-#        run: |
-#          Rscript build/local_install.R
-#          cd test
-#          source /var/python/3.10/virtualenv/bin/activate
-#          pytest run_tests.py
-#        shell: bash
diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
new file mode 100644
index 000000000..b7c0cc5f3
--- /dev/null
+++ b/.github/workflows/flepicommon-ci.yml
@@ -0,0 +1,40 @@
+name: flepicommon-ci
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - flepimop/R_packages/flepicommon/**/*
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+      - breaking-improvements
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    container:
+      image: hopkinsidd/flepimop:latest-dev
+      options: --user root
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Set up Rprofile
+        run: |
+          cp build/docker/Docker.Rprofile $HOME/.Rprofile
+          cp /home/app/.bashrc $HOME/.bashrc
+        shell: bash
+      - name: Install local R packages
+        run: Rscript build/local_install.R
+        shell: bash
+      - name: Run flepicommon tests
+        run: |
+          setwd("flepimop/R_packages/flepicommon")
+          devtools::test(stop_on_failure=TRUE)
+        shell: Rscript {0}
diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
new file mode 100644
index 000000000..d9c9b1133
--- /dev/null
+++ b/.github/workflows/gempyor-ci.yml
@@ -0,0 +1,45 @@
+name: gempyor-ci
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - examples/**/*
+      - flepimop/gempyor_pkg/**/*
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+      - breaking-improvements
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    container:
+      image: hopkinsidd/flepimop:latest-dev
+      options: --user root
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Install the gempyor package
+        run: |
+          source /var/python/3.10/virtualenv/bin/activate
+          python -m pip install --upgrade pip
+          python -m pip install "flepimop/gempyor_pkg[test]"
+        shell: bash
+      - name: Run gempyor tests
+        run: |
+          source /var/python/3.10/virtualenv/bin/activate
+          cd flepimop/gempyor_pkg
+          pytest --exitfirst
+        shell: bash
+      - name: Run gempyor-cli integration tests from examples
+        run: |
+          source /var/python/3.10/virtualenv/bin/activate
+          cd examples
+          pytest --exitfirst
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
new file mode 100644
index 000000000..f04e34594
--- /dev/null
+++ b/.github/workflows/inference-ci.yml
@@ -0,0 +1,40 @@
+name: inference-ci
+
+on:
+  workflow_dispatch:
+  push:
+    paths:
+      - flepimop/R_packages/inference/**/*
+    branches:
+      - main
+      - dev
+  pull_request:
+    branches:
+      - main
+      - dev
+      - breaking-improvements
+
+jobs:
+  unit-tests:
+    runs-on: ubuntu-latest
+    container:
+      image: hopkinsidd/flepimop:latest-dev
+      options: --user root
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+      - name: Set up Rprofile
+        run: |
+          cp build/docker/Docker.Rprofile $HOME/.Rprofile
+          cp /home/app/.bashrc $HOME/.bashrc
+        shell: bash
+      - name: Install local R packages
+        run: Rscript build/local_install.R
+        shell: bash
+      - name: Run inference tests
+        run: |
+          setwd("inference/R_packages/inference")
+          devtools::test(stop_on_failure=TRUE)
+        shell: Rscript {0}

From e3adcbab3416a4aaf98fc24eb63564f78d8f4a1b Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 2 Aug 2024 14:44:43 -0400
Subject: [PATCH 02/58] Correct working dir in `inference-ci.yml`

Typo in `setwd` call causes error about not being able to change to
directory that doesn't exist.
---
 .github/workflows/inference-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index f04e34594..ed6698dde 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -35,6 +35,6 @@ jobs:
         shell: bash
       - name: Run inference tests
         run: |
-          setwd("inference/R_packages/inference")
+          setwd("flepimop/R_packages/inference")
           devtools::test(stop_on_failure=TRUE)
         shell: Rscript {0}

From 240b25773a1205400e836811df9cbe7a84a286c3 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 2 Aug 2024 14:52:17 -0400
Subject: [PATCH 03/58] Set `gempyor` integration tests shell

Set the shell to bash so the `source` function is available.
---
 .github/workflows/gempyor-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index d9c9b1133..ce26a5750 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -43,3 +43,4 @@ jobs:
           source /var/python/3.10/virtualenv/bin/activate
           cd examples
           pytest --exitfirst
+        shell: bash

From e49109acdd6cfa99fdadaa260c34bfaee6bd6ea9 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 2 Aug 2024 14:58:29 -0400
Subject: [PATCH 04/58] Limit paths for PRs in actions

Add the same path related limits from the on push to on pull_requests as
well.
---
 .github/workflows/flepicommon-ci.yml | 2 ++
 .github/workflows/gempyor-ci.yml     | 3 +++
 .github/workflows/inference-ci.yml   | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index b7c0cc5f3..da1f07ba6 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -9,6 +9,8 @@ on:
       - main
       - dev
   pull_request:
+    paths:
+      - flepimop/R_packages/flepicommon/**/*
     branches:
       - main
       - dev
diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index ce26a5750..4f93d7250 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -10,6 +10,9 @@ on:
       - main
       - dev
   pull_request:
+    paths:
+      - examples/**/*
+      - flepimop/gempyor_pkg/**/*
     branches:
       - main
       - dev
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index ed6698dde..c708b8a4b 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -9,6 +9,8 @@ on:
       - main
       - dev
   pull_request:
+    paths:
+      - flepimop/R_packages/inference/**/*
     branches:
       - main
       - dev

From 49475afa8627b49adf5fe36275b2aaf34d11fc4d Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Tue, 6 Aug 2024 17:07:29 -0400
Subject: [PATCH 05/58] Remove 'breaking-improvements' branch

Removed the `breaking-improvements` branch from special consideration in
GitHub actions.
---
 .github/workflows/flepicommon-ci.yml | 1 -
 .github/workflows/gempyor-ci.yml     | 1 -
 .github/workflows/inference-ci.yml   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index da1f07ba6..5314c1b4f 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -14,7 +14,6 @@ on:
     branches:
       - main
       - dev
-      - breaking-improvements
 
 jobs:
   unit-tests:
diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index 4f93d7250..a2cb6e313 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -16,7 +16,6 @@ on:
     branches:
       - main
       - dev
-      - breaking-improvements
 
 jobs:
   unit-tests:
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index c708b8a4b..2ca3d4897 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -14,7 +14,6 @@ on:
     branches:
       - main
       - dev
-      - breaking-improvements
 
 jobs:
   unit-tests:

From c2f423e737492ae1781653abcd7b828cd1851d71 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Mon, 19 Aug 2024 12:20:07 -0400
Subject: [PATCH 06/58] Draft documentation for `gempyor.statistics`

* Wrote draft documentation for the `statistics` module in Google style
  guide.
---
 .../gempyor_pkg/src/gempyor/statistics.py     | 157 +++++++++++++++---
 1 file changed, 135 insertions(+), 22 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index ea2cc72a3..7591a91fc 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -1,29 +1,58 @@
-import xarray as xr
-import pandas as pd
-import numpy as np
+"""
+Abstractions for interacting with output statistic configurations.
+
+This module provides the `Statistic` class which represents a entry in the inference ->
+statistics section.
+"""
+
+__all__ = ["Statistic"]
+
+
 import confuse
+import numpy as np
 import scipy.stats
+import xarray as xr
 
 
 class Statistic:
     """
-    A statistic is a function that takes two time series and returns a scalar value.
-    It applies resample, scale, and regularization to the data before computing the statistic's log-loss.
-    Configuration:
-    - sim_var: the variable in the simulation data
-    - data_var: the variable in the ground truth data
-    - resample: resample the data before computing the statistic
-        - freq: the frequency to resample the data to
-        - aggregator: the aggregation function to use
-        - skipna: whether to skip NA values
-    - regularize: apply a regularization term to the data before computing the statistic
-
-    # SkipNA is False by default, which results in NA values broadcasting when resampling (e.g a NA withing a sum makes the whole sum a NA)
-    # if True, then NA are replaced with 0 (for sum), 1 for product, ...
-    # In doubt, plot stat.plot_transformed() to see the effect of the resampling
+    Encapsulates logic for representing/implementing output statistic configurations.
+    
+    A statistic is a function that takes two time series and returns a scalar value. It 
+    applies resample, scale, and regularization to the data before computing the 
+    statistic's log-loss.
+    
+    Attributes:
+        data_var: The variable in the ground truth data.
+        dist: The name of the distribution to use for calculating log-likelihood.
+        name: The human readable name for the statistic given during instantiation.
+        params: Distribution parameters used in the log-likelihood calculation and
+            dependent on `dist`.
+        regularizations: Regularization functions that are added to the log loss of this
+            statistic.
+        resample: If the data should be resampled before computing the statistic.
+        resample_aggregator_name: The name of the aggregation function to use.
+        resample_freq: The frequency to resample the data to if the `resample` attribute
+            is `True`.
+        resample_skipna: If NAs should be skipped when aggregating. `False` by default.
+        scale: If the data should be rescaled before computing the statistic.
+        scale_func: The function to use when rescaling the data. Can be any function 
+            exported by `numpy`.
+        sim_var: The variable in the simulation data.
+        zero_to_one: Should non-zero values be coerced to 1 when calculating 
+            log-likelihood.
     """
-
-    def __init__(self, name, statistic_config: confuse.ConfigView):
+    
+    def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
+        """
+        Create an `Statistic` instance from a confuse config view.
+        
+        Args:
+            name: A human readable name for the statistic, mostly used for error 
+                messages.
+            statistic_config: A confuse configuration view object describing an output
+                statistic.
+        """
         self.sim_var = statistic_config["sim_var"].as_str()
         self.data_var = statistic_config["data_var"].as_str()
         self.name = name
@@ -62,12 +91,28 @@ def __init__(self, name, statistic_config: confuse.ConfigView):
             self.params = {}
 
         self.zero_to_one = False
-        # TODO: this should be set_zeros_to and only do it for the probabilily
+        # TODO: this should be set_zeros_to and only do it for the probability
         if statistic_config["zero_to_one"].exists():
             self.zero_to_one = statistic_config["zero_to_one"].get()
 
     def _forecast_regularize(self, model_data, gt_data, **kwargs):
-        # scale the data so that the lastest X items are more important
+        """
+        Regularization function to add weight to more recent forecasts.
+        
+        Args:
+            model_data: An xarray Dataset of the model data with date and subpop
+                dimensions.
+            gt_data: An xarray Dataset of the ground truth data with date and subpop
+                dimensions.
+            **kwargs: Optional keyword arguments that influence regularization. 
+                Currently uses `last_n` for the number of observations to up weight and
+                `mult` for the coefficient of the regularization value. 
+        
+        Returns:
+            The log-likelihood of the `last_n` observation up weighted by a factor of 
+            `mult`.
+        """
+        # scale the data so that the latest X items are more important
         last_n = kwargs.get("last_n", 4)
         mult = kwargs.get("mult", 2)
 
@@ -76,7 +121,20 @@ def _forecast_regularize(self, model_data, gt_data, **kwargs):
         return mult * last_n_llik.sum().sum().values
 
     def _allsubpop_regularize(self, model_data, gt_data, **kwargs):
-        """add a regularization term that is the sum of all subpopulations"""
+        """
+        Regularization function to add the sum of all subpopulations.
+        
+        Args:
+            model_data: An xarray Dataset of the model data with date and subpop
+                dimensions.
+            gt_data: An xarray Dataset of the ground truth data with date and subpop
+                dimensions.
+            **kwargs: Optional keyword arguments that influence regularization. 
+                Currently uses `mult` for the coefficient of the regularization value.
+        
+        Returns:
+            The sum of the subpopulations multiplied by `mult`. 
+        """
         mult = kwargs.get("mult", 1)
         llik_total = self.llik(model_data.sum("subpop"), gt_data.sum("subpop"))
         return mult * llik_total.sum().sum().values
@@ -88,6 +146,15 @@ def __repr__(self) -> str:
         return f"A Statistic(): {self.__str__()}"
 
     def apply_resample(self, data):
+        """
+        Resample a data set to the given frequency using the specified aggregation.
+        
+        Args:
+            data: An xarray dataset with "date" and "subpop" dimensions.
+        
+        Returns:
+            A resample dataset with similar dimensions to `data`.
+        """
         if self.resample:
             aggregator_method = getattr(data.resample(date=self.resample_freq), self.resample_aggregator_name)
             return aggregator_method(skipna=self.resample_skipna)
@@ -95,16 +162,49 @@ def apply_resample(self, data):
             return data
 
     def apply_scale(self, data):
+        """
+        Scale a data set using the specified scaling function.
+        
+        Args:
+            data: An xarray dataset with "date" and "subpop" dimensions.
+        
+        Returns:
+            An xarray dataset of the same shape and dimensions as `data` with the 
+            `scale_func` attribute applied.
+        """
         if self.scale:
             return self.scale_func(data)
         else:
             return data
 
     def apply_transforms(self, data):
+        """
+        Convenient wrapper for resampling and scaling a data set.
+        
+        The resampling is applied *before* scaling which can affect the log-likelihood.
+        
+        Args:
+            data: An xarray dataset with "date" and "subpop" dimensions.
+        
+        Returns:
+            An scaled and resampled dataset with similar dimensions to `data`.
+        """
         data_scaled_resampled = self.apply_scale(self.apply_resample(data))
         return data_scaled_resampled
 
     def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray):
+        """
+        Compute the log-likelihood of observing the ground truth given model output.
+        
+        Args:
+            model_data: An xarray Dataset of the model data with date and subpop
+                dimensions.
+            gt_data: An xarray Dataset of the ground truth data with date and subpop
+                dimensions.
+        
+        Returns:
+            The log-likelihood of observing `gt_data` from the model `model_data`.
+        """
         dist_map = {
             "pois": scipy.stats.poisson.logpmf,
             "norm": lambda x, loc, scale: scipy.stats.norm.logpdf(
@@ -137,6 +237,19 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray):
         return likelihood
 
     def compute_logloss(self, model_data, gt_data):
+        """
+        Compute the logistic loss of observing the ground truth given model output.
+        
+        Args:
+            model_data: An xarray Dataset of the model data with date and subpop
+                dimensions.
+            gt_data: An xarray Dataset of the ground truth data with date and subpop
+                dimensions.
+        
+        Returns:
+            The logistic loss of observing `gt_data` from the model `model_data` 
+            decomposed into the log-likelihood and regularizations.
+        """
         model_data = self.apply_transforms(model_data[self.sim_var])
         gt_data = self.apply_transforms(gt_data[self.data_var])
 

From cc3a691e55eeba0ef78088a128853997933aecee Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Mon, 19 Aug 2024 16:52:34 -0400
Subject: [PATCH 07/58] Applied black formatter

---
 .../gempyor_pkg/src/gempyor/statistics.py     | 84 +++++++++++--------
 1 file changed, 48 insertions(+), 36 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index 7591a91fc..2ed7d496a 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -17,11 +17,11 @@
 class Statistic:
     """
     Encapsulates logic for representing/implementing output statistic configurations.
-    
-    A statistic is a function that takes two time series and returns a scalar value. It 
-    applies resample, scale, and regularization to the data before computing the 
+
+    A statistic is a function that takes two time series and returns a scalar value. It
+    applies resample, scale, and regularization to the data before computing the
     statistic's log-loss.
-    
+
     Attributes:
         data_var: The variable in the ground truth data.
         dist: The name of the distribution to use for calculating log-likelihood.
@@ -36,19 +36,19 @@ class Statistic:
             is `True`.
         resample_skipna: If NAs should be skipped when aggregating. `False` by default.
         scale: If the data should be rescaled before computing the statistic.
-        scale_func: The function to use when rescaling the data. Can be any function 
+        scale_func: The function to use when rescaling the data. Can be any function
             exported by `numpy`.
         sim_var: The variable in the simulation data.
-        zero_to_one: Should non-zero values be coerced to 1 when calculating 
+        zero_to_one: Should non-zero values be coerced to 1 when calculating
             log-likelihood.
     """
-    
+
     def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
         """
         Create an `Statistic` instance from a confuse config view.
-        
+
         Args:
-            name: A human readable name for the statistic, mostly used for error 
+            name: A human readable name for the statistic, mostly used for error
                 messages.
             statistic_config: A confuse configuration view object describing an output
                 statistic.
@@ -77,7 +77,10 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
             if resample_config["aggregator"].exists():
                 self.resample_aggregator_name = resample_config["aggregator"].get()
             self.resample_skipna = False  # TODO
-            if resample_config["aggregator"].exists() and resample_config["skipna"].exists():
+            if (
+                resample_config["aggregator"].exists()
+                and resample_config["skipna"].exists()
+            ):
                 self.resample_skipna = resample_config["skipna"].get()
 
         self.scale = False
@@ -98,42 +101,45 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
     def _forecast_regularize(self, model_data, gt_data, **kwargs):
         """
         Regularization function to add weight to more recent forecasts.
-        
+
         Args:
             model_data: An xarray Dataset of the model data with date and subpop
                 dimensions.
             gt_data: An xarray Dataset of the ground truth data with date and subpop
                 dimensions.
-            **kwargs: Optional keyword arguments that influence regularization. 
+            **kwargs: Optional keyword arguments that influence regularization.
                 Currently uses `last_n` for the number of observations to up weight and
-                `mult` for the coefficient of the regularization value. 
-        
+                `mult` for the coefficient of the regularization value.
+
         Returns:
-            The log-likelihood of the `last_n` observation up weighted by a factor of 
+            The log-likelihood of the `last_n` observation up weighted by a factor of
             `mult`.
         """
         # scale the data so that the latest X items are more important
         last_n = kwargs.get("last_n", 4)
         mult = kwargs.get("mult", 2)
 
-        last_n_llik = self.llik(model_data.isel(date=slice(-last_n, None)), gt_data.isel(date=slice(-last_n, None)))
+        last_n_llik = self.llik(
+            model_data.isel(date=slice(-last_n, None)),
+            gt_data.isel(date=slice(-last_n, None)),
+        )
 
         return mult * last_n_llik.sum().sum().values
 
     def _allsubpop_regularize(self, model_data, gt_data, **kwargs):
         """
         Regularization function to add the sum of all subpopulations.
-        
+
         Args:
             model_data: An xarray Dataset of the model data with date and subpop
                 dimensions.
             gt_data: An xarray Dataset of the ground truth data with date and subpop
                 dimensions.
-            **kwargs: Optional keyword arguments that influence regularization. 
+            **kwargs: Optional keyword arguments that influence regularization.
                 Currently uses `mult` for the coefficient of the regularization value.
-        
+
         Returns:
-            The sum of the subpopulations multiplied by `mult`. 
+            The sum of the subpopulations multiplied by `mult`.
         """
         mult = kwargs.get("mult", 1)
         llik_total = self.llik(model_data.sum("subpop"), gt_data.sum("subpop"))
@@ -148,15 +154,17 @@ def __repr__(self) -> str:
     def apply_resample(self, data):
         """
         Resample a data set to the given frequency using the specified aggregation.
-        
+
         Args:
             data: An xarray dataset with "date" and "subpop" dimensions.
-        
+
         Returns:
             A resample dataset with similar dimensions to `data`.
         """
         if self.resample:
-            aggregator_method = getattr(data.resample(date=self.resample_freq), self.resample_aggregator_name)
+            aggregator_method = getattr(
+                data.resample(date=self.resample_freq), self.resample_aggregator_name
+            )
             return aggregator_method(skipna=self.resample_skipna)
         else:
             return data
@@ -164,12 +172,12 @@ def apply_resample(self, data):
     def apply_scale(self, data):
         """
         Scale a data set using the specified scaling function.
-        
+
         Args:
             data: An xarray dataset with "date" and "subpop" dimensions.
-        
+
         Returns:
-            An xarray dataset of the same shape and dimensions as `data` with the 
+            An xarray dataset of the same shape and dimensions as `data` with the
             `scale_func` attribute applied.
         """
         if self.scale:
@@ -180,12 +188,12 @@ def apply_scale(self, data):
     def apply_transforms(self, data):
         """
         Convenient wrapper for resampling and scaling a data set.
-        
+
         The resampling is applied *before* scaling which can affect the log-likelihood.
-        
+
         Args:
             data: An xarray dataset with "date" and "subpop" dimensions.
-        
+
         Returns:
             An scaled and resampled dataset with similar dimensions to `data`.
         """
@@ -195,13 +203,13 @@ def apply_transforms(self, data):
     def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray):
         """
         Compute the log-likelihood of observing the ground truth given model output.
-        
+
         Args:
             model_data: An xarray Dataset of the model data with date and subpop
                 dimensions.
             gt_data: An xarray Dataset of the ground truth data with date and subpop
                 dimensions.
-        
+
         Returns:
             The log-likelihood of observing `gt_data` from the model `model_data`.
         """
@@ -213,7 +221,9 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray):
             "norm_cov": lambda x, loc, scale: scipy.stats.norm.logpdf(
                 x, loc=loc, scale=scale * loc.where(loc > 5, 5)
             ),  # TODO: check, that it's really the loc
-            "nbinom": lambda x, n, p: scipy.stats.nbinom.logpmf(x, n=self.params.get("n"), p=model_data),
+            "nbinom": lambda x, n, p: scipy.stats.nbinom.logpmf(
+                x, n=self.params.get("n"), p=model_data
+            ),
             "rmse": lambda x, y: -np.log(np.nansum(np.sqrt((x - y) ** 2))),
             "absolute_error": lambda x, y: -np.log(np.nansum(np.abs(x - y))),
         }
@@ -239,15 +249,15 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray):
     def compute_logloss(self, model_data, gt_data):
         """
         Compute the logistic loss of observing the ground truth given model output.
-        
+
         Args:
             model_data: An xarray Dataset of the model data with date and subpop
                 dimensions.
             gt_data: An xarray Dataset of the ground truth data with date and subpop
                 dimensions.
-        
+
         Returns:
-            The logistic loss of observing `gt_data` from the model `model_data` 
+            The logistic loss of observing `gt_data` from the model `model_data`
             decomposed into the log-likelihood and regularizations.
         """
         model_data = self.apply_transforms(model_data[self.sim_var])
@@ -260,6 +270,8 @@ def compute_logloss(self, model_data, gt_data):
 
         regularization = 0
         for reg_func, reg_config in self.regularizations:
-            regularization += reg_func(model_data=model_data, gt_data=gt_data, **reg_config)  # Pass config parameters
+            regularization += reg_func(
+                model_data=model_data, gt_data=gt_data, **reg_config
+            )  # Pass config parameters
 
         return self.llik(model_data, gt_data).sum("date"), regularization

From 867924a29a1c6701b8b9cbdcb127a6a1b924351d Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Tue, 20 Aug 2024 10:24:05 -0400
Subject: [PATCH 08/58] Type annotations, black formatter

* Added missing type annotations and corrected already existing ones.
* Applied black formatter to the file, including manually correcting
  some line-length issues.
* Rearranged dunder methods.
---
 .../gempyor_pkg/src/gempyor/statistics.py     | 47 +++++++++++++------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index 2ed7d496a..1f52516e4 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -98,7 +98,21 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
         if statistic_config["zero_to_one"].exists():
             self.zero_to_one = statistic_config["zero_to_one"].get()
 
-    def _forecast_regularize(self, model_data, gt_data, **kwargs):
+    def __str__(self) -> str:
+        return (
+            f"{self.name}: {self.dist} between {self.sim_var} "
+            f"(sim) and {self.data_var} (data)."
+        )
+
+    def __repr__(self) -> str:
+        return f"A Statistic(): {self.__str__()}"
+
+    def _forecast_regularize(
+        self,
+        model_data: xr.DataArray,
+        gt_data: xr.DataArray,
+        **kwargs: dict[str, int | float],
+    ) -> float:
         """
         Regularization function to add weight to more recent forecasts.
 
@@ -126,7 +140,12 @@ def _forecast_regularize(self, model_data, gt_data, **kwargs):
 
         return mult * last_n_llik.sum().sum().values
 
-    def _allsubpop_regularize(self, model_data, gt_data, **kwargs):
+    def _allsubpop_regularize(
+        self,
+        model_data: xr.DataArray,
+        gt_data: xr.DataArray,
+        **kwargs: dict[str, int | float],
+    ) -> float:
         """
         Regularization function to add the sum of all subpopulations.
 
@@ -145,13 +164,7 @@ def _allsubpop_regularize(self, model_data, gt_data, **kwargs):
         llik_total = self.llik(model_data.sum("subpop"), gt_data.sum("subpop"))
         return mult * llik_total.sum().sum().values
 
-    def __str__(self) -> str:
-        return f"{self.name}: {self.dist} between {self.sim_var} (sim) and {self.data_var} (data)."
-
-    def __repr__(self) -> str:
-        return f"A Statistic(): {self.__str__()}"
-
-    def apply_resample(self, data):
+    def apply_resample(self, data: xr.DataArray) -> xr.DataArray:
         """
         Resample a data set to the given frequency using the specified aggregation.
 
@@ -169,7 +182,7 @@ def apply_resample(self, data):
         else:
             return data
 
-    def apply_scale(self, data):
+    def apply_scale(self, data: xr.DataArray) -> xr.DataArray:
         """
         Scale a data set using the specified scaling function.
 
@@ -185,7 +198,7 @@ def apply_scale(self, data):
         else:
             return data
 
-    def apply_transforms(self, data):
+    def apply_transforms(self, data: xr.DataArray):
         """
         Convenient wrapper for resampling and scaling a data set.
 
@@ -200,7 +213,7 @@ def apply_transforms(self, data):
         data_scaled_resampled = self.apply_scale(self.apply_resample(data))
         return data_scaled_resampled
 
-    def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray):
+    def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> float:
         """
         Compute the log-likelihood of observing the ground truth given model output.
 
@@ -246,7 +259,9 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray):
         # TODO: check the order of the arguments
         return likelihood
 
-    def compute_logloss(self, model_data, gt_data):
+    def compute_logloss(
+        self, model_data: xr.DataArray, gt_data: xr.DataArray
+    ) -> tuple[float, float]:
         """
         Compute the logistic loss of observing the ground truth given model output.
 
@@ -265,7 +280,11 @@ def compute_logloss(self, model_data, gt_data):
 
         if not model_data.shape == gt_data.shape:
             raise ValueError(
-                f"{self.name} Statistic error: data and groundtruth do not have the same shape: model_data.shape={model_data.shape} != gt_data.shape={gt_data.shape}"
+                (
+                    f"{self.name} Statistic error: data and groundtruth do not have "
+                    f"the same shape: model_data.shape={model_data.shape} != "
+                    f"gt_data.shape={gt_data.shape}"
+                )
             )
 
         regularization = 0

From 8fb81cdce36118124ed3b8d5d5b2d21ab66067e5 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Tue, 20 Aug 2024 13:22:34 -0400
Subject: [PATCH 09/58] Initial unit test infra for `Statistic` class

* Created initial unit testing infrastructure for the `Statistic` class
  from `gempyor.statistics`, starting with invalid regularization name
  value error.
* Added default to `getattr` call to make unsupported regularization
  value error reachable. Should obsolete with better documentation.
---
 .../gempyor_pkg/src/gempyor/statistics.py     |  2 +-
 .../tests/statistics/test_statistic_class.py  | 72 +++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index 1f52516e4..53e2241ca 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -61,7 +61,7 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
         if statistic_config["regularize"].exists():
             for reg_config in statistic_config["regularize"]:  # Iterate over the list
                 reg_name = reg_config["name"].get()
-                reg_func = getattr(self, f"_{reg_name}_regularize")
+                reg_func = getattr(self, f"_{reg_name}_regularize", None)
                 if reg_func is None:
                     raise ValueError(f"Unsupported regularization: {reg_name}")
                 self.regularizations.append((reg_func, reg_config.get()))
diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
new file mode 100644
index 000000000..9347cd936
--- /dev/null
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -0,0 +1,72 @@
+import pathlib
+from typing import Any, Callable
+
+import confuse
+import pytest
+import xarray as xr
+
+from gempyor.statistics import Statistic
+from gempyor.testing import create_confuse_configview_from_dict
+
+
+class MockStatisticInput:
+    def __init__(
+        self,
+        name: str,
+        config: dict[str, Any],
+        model_data: xr.DataArray | None = None,
+        gt_data: xr.DataArray | None = None,
+    ) -> None:
+        self.name = name
+        self.config = config
+        self.model_data = model_data
+        self.gt_data = gt_data
+        self._confuse_subview = None
+
+    def create_confuse_subview(self) -> confuse.Subview:
+        if self._confuse_subview is None:
+            self._confuse_subview = create_confuse_configview_from_dict(
+                self.config, name=self.name
+            )
+        return self._confuse_subview
+
+    def create_statistic_instance(self) -> Statistic:
+        return Statistic(self.name, self.create_confuse_subview())
+
+
+def invalid_regularization_factory(tmp_path: pathlib.Path) -> MockStatisticInput:
+    return MockStatisticInput(
+        "total_hospitalizations",
+        {
+            "name": "sum_hospitalizations",
+            "aggregator": "sum",
+            "period": "1 months",
+            "sim_var": "incidH",
+            "data_var": "incidH",
+            "remove_na": True,
+            "add_one": True,
+            "likelihood": {"dist": "pois"},
+            "regularize": [{"name": "forecast"}, {"name": "invalid"}],
+        },
+    )
+
+
+class TestStatistic:
+    @pytest.mark.parametrize("factory", [(invalid_regularization_factory)])
+    def test_unsupported_regularizations_value_error(
+        self,
+        tmp_path: pathlib.Path,
+        factory: Callable[[pathlib.Path], MockStatisticInput],
+    ) -> None:
+        mock_inputs = factory(tmp_path)
+        unsupported_name = next(
+            reg_name
+            for reg_name in [
+                reg["name"] for reg in mock_inputs.config.get("regularize", [])
+            ]
+            if reg_name not in ["forecast", "allsubpop"]
+        )
+        with pytest.raises(
+            ValueError, match=rf"^Unsupported regularization\: {unsupported_name}$"
+        ):
+            mock_inputs.create_statistic_instance()

From b27ec5269160d88afded4e6ac0bc3917ba2d0680 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:38:26 -0400
Subject: [PATCH 10/58] Add `Statistic` attributes test fixture

* Added a test fixture to test the attributes of the `Statistic` class.
* Removed unnecessary `tmp_path` pytest fixture dependency.
* Improved documentation on the `Statistic` class' attributes and added
  a raises section for the constructor.
---
 .../gempyor_pkg/src/gempyor/statistics.py     | 19 ++++-
 .../tests/statistics/test_statistic_class.py  | 82 +++++++++++++++++--
 2 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index 53e2241ca..461b5dd41 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -31,14 +31,20 @@ class Statistic:
         regularizations: Regularization functions that are added to the log loss of this
             statistic.
         resample: If the data should be resampled before computing the statistic.
-        resample_aggregator_name: The name of the aggregation function to use.
+            Defaults to `False`.
+        resample_aggregator_name: The name of the aggregation function to use. This
+            attribute is not set when a "resample" section is not defined in the
+            `statistic_config` arg.
         resample_freq: The frequency to resample the data to if the `resample` attribute
-            is `True`.
+            is `True`. This attribute is not set when a "resample" section is not
+            defined in the `statistic_config` arg.
         resample_skipna: If NAs should be skipped when aggregating. `False` by default.
+            This attribute is not set when a "resample" section is not defined in the
+            `statistic_config` arg.
         scale: If the data should be rescaled before computing the statistic.
         scale_func: The function to use when rescaling the data. Can be any function
-            exported by `numpy`.
-        sim_var: The variable in the simulation data.
+            exported by `numpy`. This attribute is not set when a "scale" value is not
+            defined in the `statistic_config` arg.
         zero_to_one: Should non-zero values be coerced to 1 when calculating
             log-likelihood.
     """
@@ -52,6 +58,11 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
                 messages.
             statistic_config: A confuse configuration view object describing an output
                 statistic.
+        
+        Raises:
+            ValueError: If an unsupported regularization name is provided via the
+                `statistic_config` arg. Currently only 'forecast' and 'allsubpop' are
+                supported.
         """
         self.sim_var = statistic_config["sim_var"].as_str()
         self.data_var = statistic_config["data_var"].as_str()
diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 9347cd936..94a129cf0 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -34,7 +34,7 @@ def create_statistic_instance(self) -> Statistic:
         return Statistic(self.name, self.create_confuse_subview())
 
 
-def invalid_regularization_factory(tmp_path: pathlib.Path) -> MockStatisticInput:
+def invalid_regularization_factory() -> MockStatisticInput:
     return MockStatisticInput(
         "total_hospitalizations",
         {
@@ -51,14 +51,28 @@ def invalid_regularization_factory(tmp_path: pathlib.Path) -> MockStatisticInput
     )
 
 
+def simple_valid_factory() -> MockStatisticInput:
+    return MockStatisticInput(
+        "total_hospitalizations",
+        {
+            "name": "sum_hospitalizations",
+            "aggregator": "sum",
+            "period": "1 months",
+            "sim_var": "incidH",
+            "data_var": "incidH",
+            "remove_na": True,
+            "add_one": True,
+            "likelihood": {"dist": "pois"},
+        },
+    )
+
+
 class TestStatistic:
     @pytest.mark.parametrize("factory", [(invalid_regularization_factory)])
     def test_unsupported_regularizations_value_error(
-        self,
-        tmp_path: pathlib.Path,
-        factory: Callable[[pathlib.Path], MockStatisticInput],
+        self, factory: Callable[[], MockStatisticInput]
     ) -> None:
-        mock_inputs = factory(tmp_path)
+        mock_inputs = factory()
         unsupported_name = next(
             reg_name
             for reg_name in [
@@ -70,3 +84,61 @@ def test_unsupported_regularizations_value_error(
             ValueError, match=rf"^Unsupported regularization\: {unsupported_name}$"
         ):
             mock_inputs.create_statistic_instance()
+
+    @pytest.mark.parametrize("factory", [(simple_valid_factory)])
+    def test_statistic_instance_attributes(
+        self, factory: Callable[[], MockStatisticInput]
+    ) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # `data_var` attribute
+        assert statistic.data_var == mock_inputs.config["data_var"]
+
+        # `dist` attribute
+        assert statistic.dist == mock_inputs.config["likelihood"]["dist"]
+
+        # `name` attribute
+        assert statistic.name == mock_inputs.name
+
+        # `params` attribute
+        assert statistic.params == mock_inputs.config["likelihood"].get("params", {})
+
+        # `regularizations` attribute
+        assert statistic.regularizations == [
+            (r["name"], r) for r in mock_inputs.config.get("regularize", [])
+        ]
+
+        # `resample` attribute
+        resample_config = mock_inputs.config.get("resample", {})
+        assert statistic.resample == (resample_config != {})
+
+        if resample_config:
+            # `resample_aggregator_name` attribute
+            assert statistic.resample_aggregator_name == resample_config.get(
+                "aggregator", ""
+            )
+
+            # `resample_freq` attribute
+            assert statistic.resample_freq == resample_config.get("freq", "")
+
+            # `resample_skipna` attribute
+            assert (
+                statistic.resample_skipna == resample_config.get("skipna", False)
+                if resample_config.get("aggregator") is not None
+                else False
+            )
+
+        # `scale` attribute
+        assert statistic.scale == (mock_inputs.config.get("scale") is not None)
+
+        # `scale_func` attribute
+        if scale_func := mock_inputs.config.get("scale") is not None:
+            assert statistic.scale_func == scale_func
+
+        # `sim_var` attribute
+        assert statistic.sim_var == mock_inputs.config["sim_var"]
+
+        # `zero_to_one` attribute
+        assert statistic.zero_to_one == mock_inputs.config.get("zero_to_one", False)

From f1b559c59a98866d144fec93b233da651b92de0d Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:53:44 -0400
Subject: [PATCH 11/58] Add fixture for `str` and `repr` of `Statistic`

Added a test fixture for the result of calling `str` and `repr` on an
instance of the `Statistic` class.
---
 .../tests/statistics/test_statistic_class.py    | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 94a129cf0..544d89f0d 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -142,3 +142,20 @@ def test_statistic_instance_attributes(
 
         # `zero_to_one` attribute
         assert statistic.zero_to_one == mock_inputs.config.get("zero_to_one", False)
+
+    @pytest.mark.parametrize("factory", [(simple_valid_factory)])
+    def test_statistic_str_and_repr(
+        self, factory: Callable[[], MockStatisticInput]
+    ) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        statistic_str = (
+            f"{mock_inputs.name}: {mock_inputs.config['likelihood']['dist']} between "
+            f"{mock_inputs.config['sim_var']} (sim) and "
+            f"{mock_inputs.config['data_var']} (data)."
+        )
+        assert str(statistic) == statistic_str
+        assert repr(statistic) == f"A Statistic(): {statistic_str}"

From b8891be4f0e02539fe76559a61e00a0af214da28 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Tue, 20 Aug 2024 16:48:59 -0400
Subject: [PATCH 12/58] Corrected `llik` return type hint

The return of `Statistic.llik` is actually an xarray DataArray instead
of a float, but summed along the date dimension.
---
 flepimop/gempyor_pkg/src/gempyor/statistics.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index 461b5dd41..1659e6558 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -58,7 +58,7 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
                 messages.
             statistic_config: A confuse configuration view object describing an output
                 statistic.
-        
+
         Raises:
             ValueError: If an unsupported regularization name is provided via the
                 `statistic_config` arg. Currently only 'forecast' and 'allsubpop' are
@@ -224,7 +224,7 @@ def apply_transforms(self, data: xr.DataArray):
         data_scaled_resampled = self.apply_scale(self.apply_resample(data))
         return data_scaled_resampled
 
-    def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> float:
+    def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> xr.DataArray:
         """
         Compute the log-likelihood of observing the ground truth given model output.
 
@@ -235,7 +235,8 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> float:
                 dimensions.
 
         Returns:
-            The log-likelihood of observing `gt_data` from the model `model_data`.
+            The log-likelihood of observing `gt_data` from the model `model_data` as an
+            xarray DataArray with a "subpop" dimension.
         """
         dist_map = {
             "pois": scipy.stats.poisson.logpmf,
@@ -272,7 +273,7 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> float:
 
     def compute_logloss(
         self, model_data: xr.DataArray, gt_data: xr.DataArray
-    ) -> tuple[float, float]:
+    ) -> tuple[xr.DataArray, float]:
         """
         Compute the logistic loss of observing the ground truth given model output.
 
@@ -284,7 +285,8 @@ def compute_logloss(
 
         Returns:
             The logistic loss of observing `gt_data` from the model `model_data`
-            decomposed into the log-likelihood and regularizations.
+            decomposed into the log-likelihood along the "subpop" dimension and 
+            regularizations.
         """
         model_data = self.apply_transforms(model_data[self.sim_var])
         gt_data = self.apply_transforms(gt_data[self.data_var])

From 239ced62e7a725f96919c9ace57e9b9a8c80ced7 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 21 Aug 2024 09:31:07 -0400
Subject: [PATCH 13/58] Move TODO comments to GH-300

In particular see
https://github.com/HopkinsIDD/flepiMoP/issues/300#issuecomment-2302065476.
---
 flepimop/gempyor_pkg/src/gempyor/statistics.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index 1659e6558..aed46357e 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -87,7 +87,7 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
             self.resample_aggregator = ""
             if resample_config["aggregator"].exists():
                 self.resample_aggregator_name = resample_config["aggregator"].get()
-            self.resample_skipna = False  # TODO
+            self.resample_skipna = False
             if (
                 resample_config["aggregator"].exists()
                 and resample_config["skipna"].exists()
@@ -105,7 +105,6 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
             self.params = {}
 
         self.zero_to_one = False
-        # TODO: this should be set_zeros_to and only do it for the probability
         if statistic_config["zero_to_one"].exists():
             self.zero_to_one = statistic_config["zero_to_one"].get()
 
@@ -245,7 +244,7 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> xr.DataArray:
             ),  # wrong:
             "norm_cov": lambda x, loc, scale: scipy.stats.norm.logpdf(
                 x, loc=loc, scale=scale * loc.where(loc > 5, 5)
-            ),  # TODO: check, that it's really the loc
+            ),
             "nbinom": lambda x, n, p: scipy.stats.nbinom.logpmf(
                 x, n=self.params.get("n"), p=model_data
             ),
@@ -268,7 +267,6 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> xr.DataArray:
 
         likelihood = xr.DataArray(likelihood, coords=gt_data.coords, dims=gt_data.dims)
 
-        # TODO: check the order of the arguments
         return likelihood
 
     def compute_logloss(
@@ -285,7 +283,7 @@ def compute_logloss(
 
         Returns:
             The logistic loss of observing `gt_data` from the model `model_data`
-            decomposed into the log-likelihood along the "subpop" dimension and 
+            decomposed into the log-likelihood along the "subpop" dimension and
             regularizations.
         """
         model_data = self.apply_transforms(model_data[self.sim_var])

From f5a9cb9c81439356cc7bcf14e474aa26fde75bca Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 21 Aug 2024 10:46:30 -0400
Subject: [PATCH 14/58] Initial tests for Statistic regularization

Added the initial unit tests for the regularization methods,
`_forecast_regularize` and `_allsubpop_regularize`, of the `Statistic`
class. The tests are general and do not make claims about correctness
for now.
---
 .../tests/statistics/test_statistic_class.py  | 50 ++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 544d89f0d..ceb7ea87a 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -1,7 +1,9 @@
-import pathlib
+from datetime import date
 from typing import Any, Callable
 
 import confuse
+import numpy as np
+import pandas as pd
 import pytest
 import xarray as xr
 
@@ -52,6 +54,22 @@ def invalid_regularization_factory() -> MockStatisticInput:
 
 
 def simple_valid_factory() -> MockStatisticInput:
+    model_data = xr.DataArray(
+        data=np.random.randn(10, 3),
+        dims=("date", "subpop"),
+        coords={
+            "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)),
+            "subpop": ["01", "02", "03"],
+        },
+    )
+    gt_data = xr.DataArray(
+        data=np.random.randn(10, 3),
+        dims=("date", "subpop"),
+        coords={
+            "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)),
+            "subpop": ["01", "02", "03"],
+        },
+    )
     return MockStatisticInput(
         "total_hospitalizations",
         {
@@ -64,6 +82,8 @@ def simple_valid_factory() -> MockStatisticInput:
             "add_one": True,
             "likelihood": {"dist": "pois"},
         },
+        model_data=model_data,
+        gt_data=gt_data,
     )
 
 
@@ -159,3 +179,31 @@ def test_statistic_str_and_repr(
         )
         assert str(statistic) == statistic_str
         assert repr(statistic) == f"A Statistic(): {statistic_str}"
+
+    @pytest.mark.parametrize("factory,last_n,mult", [(simple_valid_factory, 4, 2.0)])
+    def test_forecast_regularize(
+        self, factory: Callable[[], MockStatisticInput], last_n: int, mult: int | float
+    ) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        forecast_regularization = statistic._forecast_regularize(
+            mock_inputs.model_data, mock_inputs.gt_data, last_n=last_n, mult=mult
+        )
+        assert isinstance(forecast_regularization, float)
+
+    @pytest.mark.parametrize("factory,mult", [(simple_valid_factory, 2.0)])
+    def test_allsubpop_regularize(
+        self, factory: Callable[[], MockStatisticInput], mult: int | float
+    ) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        forecast_regularization = statistic._allsubpop_regularize(
+            mock_inputs.model_data, mock_inputs.gt_data, mult=mult
+        )
+        assert isinstance(forecast_regularization, float)

From 878c5392267aefffc58cde32d36ca4824f7ade81 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 21 Aug 2024 11:46:44 -0400
Subject: [PATCH 15/58] Unit tests for `Statistic.apply_resample`

Added unit tests for the `apply_resample` method, including creating a
new factory, `simple_valid_resample_factory`, which hits the "resample
config present" of the code path.
---
 .../tests/statistics/test_statistic_class.py  | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index ceb7ea87a..3865a4657 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -87,6 +87,44 @@ def simple_valid_factory() -> MockStatisticInput:
     )
 
 
+def simple_valid_resample_factory() -> MockStatisticInput:
+    date_coords = pd.date_range(date(2024, 1, 1), date(2024, 12, 31))
+    subpop_coords = ["01", "02", "03", "04"]
+    dim = (len(date_coords), len(subpop_coords))
+    model_data = xr.DataArray(
+        data=np.random.randn(*dim),
+        dims=("date", "subpop"),
+        coords={
+            "date": date_coords,
+            "subpop": subpop_coords,
+        },
+    )
+    gt_data = xr.DataArray(
+        data=np.random.randn(*dim),
+        dims=("date", "subpop"),
+        coords={
+            "date": date_coords,
+            "subpop": subpop_coords,
+        },
+    )
+    return MockStatisticInput(
+        "total_hospitalizations",
+        {
+            "name": "sum_hospitalizations",
+            "aggregator": "sum",
+            "period": "1 months",
+            "sim_var": "incidH",
+            "data_var": "incidH",
+            "remove_na": True,
+            "add_one": True,
+            "likelihood": {"dist": "pois"},
+            "resample": {"freq": "MS", "aggregator": "sum"},
+        },
+        model_data=model_data,
+        gt_data=gt_data,
+    )
+
+
 class TestStatistic:
     @pytest.mark.parametrize("factory", [(invalid_regularization_factory)])
     def test_unsupported_regularizations_value_error(
@@ -207,3 +245,33 @@ def test_allsubpop_regularize(
             mock_inputs.model_data, mock_inputs.gt_data, mult=mult
         )
         assert isinstance(forecast_regularization, float)
+
+    @pytest.mark.parametrize(
+        "factory", [(simple_valid_factory), (simple_valid_resample_factory)]
+    )
+    def test_apply_resample(self, factory: Callable[[], MockStatisticInput]) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        resampled_data = statistic.apply_resample(mock_inputs.model_data)
+        if resample_config := mock_inputs.config.get("resample", {}):
+            # Resample config
+            expected_resampled_data = mock_inputs.model_data.resample(
+                date=resample_config.get("freq", "")
+            )
+            aggregation_func = getattr(
+                expected_resampled_data, resample_config.get("aggregator", "")
+            )
+            expected_resampled_data = aggregation_func(
+                skipna=(
+                    resample_config.get("skipna", False)
+                    if resample_config.get("aggregator") is not None
+                    else False
+                )
+            )
+            assert resampled_data.identical(expected_resampled_data)
+        else:
+            # No resample config, `apply_resample` returns our input
+            assert resampled_data.identical(mock_inputs.model_data)

From 31368be36cd5aeec56f73b3668349d98b2e09d79 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 21 Aug 2024 13:35:18 -0400
Subject: [PATCH 16/58] Added unit tests for `Statistic.apply_scale`

Added unit tests for `apply_scale` method including a new factory that
produces an input set with a 'scale' config. Fixed a bug where the scale
function was not applied even if provided. This is a *breaking* change,
but doesn't affect currently existing test suite, need to see if this
affects any currently existing config files.
---
 .../gempyor_pkg/src/gempyor/statistics.py     |  1 +
 .../tests/statistics/test_statistic_class.py  | 56 +++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index aed46357e..fd1dd1a43 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -96,6 +96,7 @@ def __init__(self, name: str, statistic_config: confuse.ConfigView) -> None:
 
         self.scale = False
         if statistic_config["scale"].exists():
+            self.scale = True
             self.scale_func = getattr(np, statistic_config["scale"].get())
 
         self.dist = statistic_config["likelihood"]["dist"].get()
diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 3865a4657..8f51d1716 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -125,6 +125,44 @@ def simple_valid_resample_factory() -> MockStatisticInput:
     )
 
 
+def simple_valid_scale_factory() -> MockStatisticInput:
+    date_coords = pd.date_range(date(2024, 1, 1), date(2024, 12, 31))
+    subpop_coords = ["01", "02", "03", "04"]
+    dim = (len(date_coords), len(subpop_coords))
+    model_data = xr.DataArray(
+        data=np.random.randn(*dim),
+        dims=("date", "subpop"),
+        coords={
+            "date": date_coords,
+            "subpop": subpop_coords,
+        },
+    )
+    gt_data = xr.DataArray(
+        data=np.random.randn(*dim),
+        dims=("date", "subpop"),
+        coords={
+            "date": date_coords,
+            "subpop": subpop_coords,
+        },
+    )
+    return MockStatisticInput(
+        "total_hospitalizations",
+        {
+            "name": "sum_hospitalizations",
+            "aggregator": "sum",
+            "period": "1 months",
+            "sim_var": "incidH",
+            "data_var": "incidH",
+            "remove_na": True,
+            "add_one": True,
+            "likelihood": {"dist": "pois"},
+            "scale": "exp",
+        },
+        model_data=model_data,
+        gt_data=gt_data,
+    )
+
+
 class TestStatistic:
     @pytest.mark.parametrize("factory", [(invalid_regularization_factory)])
     def test_unsupported_regularizations_value_error(
@@ -275,3 +313,21 @@ def test_apply_resample(self, factory: Callable[[], MockStatisticInput]) -> None
         else:
             # No resample config, `apply_resample` returns our input
             assert resampled_data.identical(mock_inputs.model_data)
+
+    @pytest.mark.parametrize(
+        "factory", [(simple_valid_factory), (simple_valid_scale_factory)]
+    )
+    def test_apply_scale(self, factory: Callable[[], MockStatisticInput]) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        scaled_data = statistic.apply_scale(mock_inputs.model_data)
+        if (scale_func := mock_inputs.config.get("scale")) is not None:
+            # Scale config
+            expected_scaled_data = getattr(np, scale_func)(mock_inputs.model_data)
+            assert scaled_data.identical(expected_scaled_data)
+        else:
+            # No scale config, `apply_scale` is a no-op
+            assert scaled_data.identical(mock_inputs.model_data)

From 84f36325b6639ce17d3e4223c7f7db34c53f275a Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:42:20 -0400
Subject: [PATCH 17/58] Added unit test for `Statistic.apply_transforms`

Added unit tests for the `apply_transforms` method of `Statistic`,
including making a new factory that includes both resampling and scaling
configuration.
---
 .../tests/statistics/test_statistic_class.py  | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 8f51d1716..041206f6f 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -163,6 +163,45 @@ def simple_valid_scale_factory() -> MockStatisticInput:
     )
 
 
+def simple_valid_resample_and_scale_factory() -> MockStatisticInput:
+    date_coords = pd.date_range(date(2024, 1, 1), date(2024, 12, 31))
+    subpop_coords = ["01", "02", "03", "04"]
+    dim = (len(date_coords), len(subpop_coords))
+    model_data = xr.DataArray(
+        data=np.random.randn(*dim),
+        dims=("date", "subpop"),
+        coords={
+            "date": date_coords,
+            "subpop": subpop_coords,
+        },
+    )
+    gt_data = xr.DataArray(
+        data=np.random.randn(*dim),
+        dims=("date", "subpop"),
+        coords={
+            "date": date_coords,
+            "subpop": subpop_coords,
+        },
+    )
+    return MockStatisticInput(
+        "total_hospitalizations",
+        {
+            "name": "sum_hospitalizations",
+            "aggregator": "sum",
+            "period": "1 months",
+            "sim_var": "incidH",
+            "data_var": "incidH",
+            "remove_na": True,
+            "add_one": True,
+            "likelihood": {"dist": "pois"},
+            "resample": {"freq": "W", "aggregator": "max"},
+            "scale": "sin",
+        },
+        model_data=model_data,
+        gt_data=gt_data,
+    )
+
+
 class TestStatistic:
     @pytest.mark.parametrize("factory", [(invalid_regularization_factory)])
     def test_unsupported_regularizations_value_error(
@@ -331,3 +370,42 @@ def test_apply_scale(self, factory: Callable[[], MockStatisticInput]) -> None:
         else:
             # No scale config, `apply_scale` is a no-op
             assert scaled_data.identical(mock_inputs.model_data)
+
+    @pytest.mark.parametrize(
+        "factory",
+        [
+            (simple_valid_factory),
+            (simple_valid_resample_factory),
+            (simple_valid_scale_factory),
+            (simple_valid_resample_and_scale_factory),
+        ],
+    )
+    def test_apply_transforms(self, factory: Callable[[], MockStatisticInput]) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        transformed_data = statistic.apply_transforms(mock_inputs.model_data)
+        expected_transformed_data = mock_inputs.model_data.copy()
+        if resample_config := mock_inputs.config.get("resample", {}):
+            # Resample config
+            expected_transformed_data = expected_transformed_data.resample(
+                date=resample_config.get("freq", "")
+            )
+            aggregation_func = getattr(
+                expected_transformed_data, resample_config.get("aggregator", "")
+            )
+            expected_transformed_data = aggregation_func(
+                skipna=(
+                    resample_config.get("skipna", False)
+                    if resample_config.get("aggregator") is not None
+                    else False
+                )
+            )
+        if (scale_func := mock_inputs.config.get("scale")) is not None:
+            # Scale config
+            expected_transformed_data = getattr(np, scale_func)(
+                expected_transformed_data
+            )
+        assert transformed_data.identical(expected_transformed_data)

From a518f36b13b0858a7aa205c6c13739eae4c24c15 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 21 Aug 2024 15:54:00 -0400
Subject: [PATCH 18/58] Consolidate valid factories into global var

Created global `all_valid_factories` that can be passed directly to the
`pytest.mark.parametrize` decorator to test methods of the `Statistic`
class against many configurations.
---
 .../tests/statistics/test_statistic_class.py  | 34 ++++++++-----------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 041206f6f..e466af0a5 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -202,6 +202,14 @@ def simple_valid_resample_and_scale_factory() -> MockStatisticInput:
     )
 
 
+all_valid_factories = [
+    (simple_valid_factory),
+    (simple_valid_resample_factory),
+    (simple_valid_resample_factory),
+    (simple_valid_resample_and_scale_factory),
+]
+
+
 class TestStatistic:
     @pytest.mark.parametrize("factory", [(invalid_regularization_factory)])
     def test_unsupported_regularizations_value_error(
@@ -220,7 +228,7 @@ def test_unsupported_regularizations_value_error(
         ):
             mock_inputs.create_statistic_instance()
 
-    @pytest.mark.parametrize("factory", [(simple_valid_factory)])
+    @pytest.mark.parametrize("factory", all_valid_factories)
     def test_statistic_instance_attributes(
         self, factory: Callable[[], MockStatisticInput]
     ) -> None:
@@ -269,8 +277,8 @@ def test_statistic_instance_attributes(
         assert statistic.scale == (mock_inputs.config.get("scale") is not None)
 
         # `scale_func` attribute
-        if scale_func := mock_inputs.config.get("scale") is not None:
-            assert statistic.scale_func == scale_func
+        if (scale_func := mock_inputs.config.get("scale")) is not None:
+            assert statistic.scale_func == getattr(np, scale_func)
 
         # `sim_var` attribute
         assert statistic.sim_var == mock_inputs.config["sim_var"]
@@ -278,7 +286,7 @@ def test_statistic_instance_attributes(
         # `zero_to_one` attribute
         assert statistic.zero_to_one == mock_inputs.config.get("zero_to_one", False)
 
-    @pytest.mark.parametrize("factory", [(simple_valid_factory)])
+    @pytest.mark.parametrize("factory", all_valid_factories)
     def test_statistic_str_and_repr(
         self, factory: Callable[[], MockStatisticInput]
     ) -> None:
@@ -323,9 +331,7 @@ def test_allsubpop_regularize(
         )
         assert isinstance(forecast_regularization, float)
 
-    @pytest.mark.parametrize(
-        "factory", [(simple_valid_factory), (simple_valid_resample_factory)]
-    )
+    @pytest.mark.parametrize("factory", all_valid_factories)
     def test_apply_resample(self, factory: Callable[[], MockStatisticInput]) -> None:
         # Setup
         mock_inputs = factory()
@@ -353,9 +359,7 @@ def test_apply_resample(self, factory: Callable[[], MockStatisticInput]) -> None
             # No resample config, `apply_resample` returns our input
             assert resampled_data.identical(mock_inputs.model_data)
 
-    @pytest.mark.parametrize(
-        "factory", [(simple_valid_factory), (simple_valid_scale_factory)]
-    )
+    @pytest.mark.parametrize("factory", all_valid_factories)
     def test_apply_scale(self, factory: Callable[[], MockStatisticInput]) -> None:
         # Setup
         mock_inputs = factory()
@@ -371,15 +375,7 @@ def test_apply_scale(self, factory: Callable[[], MockStatisticInput]) -> None:
             # No scale config, `apply_scale` is a no-op
             assert scaled_data.identical(mock_inputs.model_data)
 
-    @pytest.mark.parametrize(
-        "factory",
-        [
-            (simple_valid_factory),
-            (simple_valid_resample_factory),
-            (simple_valid_scale_factory),
-            (simple_valid_resample_and_scale_factory),
-        ],
-    )
+    @pytest.mark.parametrize("factory", all_valid_factories)
     def test_apply_transforms(self, factory: Callable[[], MockStatisticInput]) -> None:
         # Setup
         mock_inputs = factory()

From f94ba0f12534110a3d577094123fb0386743d959 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 22 Aug 2024 11:39:35 -0400
Subject: [PATCH 19/58] Add unit tests for `Statistic.llik`

Added unit tests for the `llik` method of the `Statistic` class. Had to
change factories to use RMSE by default for likelihood distribution
since the poisson distribution only has integer support.
---
 .../tests/statistics/test_statistic_class.py  | 61 +++++++++++++++++--
 1 file changed, 56 insertions(+), 5 deletions(-)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index e466af0a5..489dde515 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+import scipy
 import xarray as xr
 
 from gempyor.statistics import Statistic
@@ -47,7 +48,7 @@ def invalid_regularization_factory() -> MockStatisticInput:
             "data_var": "incidH",
             "remove_na": True,
             "add_one": True,
-            "likelihood": {"dist": "pois"},
+            "likelihood": {"dist": "rmse"},
             "regularize": [{"name": "forecast"}, {"name": "invalid"}],
         },
     )
@@ -80,7 +81,7 @@ def simple_valid_factory() -> MockStatisticInput:
             "data_var": "incidH",
             "remove_na": True,
             "add_one": True,
-            "likelihood": {"dist": "pois"},
+            "likelihood": {"dist": "norm", "params": {"scale": 2.0}},
         },
         model_data=model_data,
         gt_data=gt_data,
@@ -117,7 +118,7 @@ def simple_valid_resample_factory() -> MockStatisticInput:
             "data_var": "incidH",
             "remove_na": True,
             "add_one": True,
-            "likelihood": {"dist": "pois"},
+            "likelihood": {"dist": "rmse"},
             "resample": {"freq": "MS", "aggregator": "sum"},
         },
         model_data=model_data,
@@ -155,7 +156,7 @@ def simple_valid_scale_factory() -> MockStatisticInput:
             "data_var": "incidH",
             "remove_na": True,
             "add_one": True,
-            "likelihood": {"dist": "pois"},
+            "likelihood": {"dist": "rmse"},
             "scale": "exp",
         },
         model_data=model_data,
@@ -193,7 +194,7 @@ def simple_valid_resample_and_scale_factory() -> MockStatisticInput:
             "data_var": "incidH",
             "remove_na": True,
             "add_one": True,
-            "likelihood": {"dist": "pois"},
+            "likelihood": {"dist": "rmse"},
             "resample": {"freq": "W", "aggregator": "max"},
             "scale": "sin",
         },
@@ -405,3 +406,53 @@ def test_apply_transforms(self, factory: Callable[[], MockStatisticInput]) -> No
                 expected_transformed_data
             )
         assert transformed_data.identical(expected_transformed_data)
+
+    @pytest.mark.parametrize("factory", all_valid_factories)
+    def test_llik(self, factory: Callable[[], MockStatisticInput]) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        log_likelihood = statistic.llik(mock_inputs.model_data, mock_inputs.gt_data)
+
+        assert isinstance(log_likelihood, xr.DataArray)
+        assert log_likelihood.dims == mock_inputs.gt_data.dims
+        assert log_likelihood.coords.equals(mock_inputs.gt_data.coords)
+        dist_name = mock_inputs.config["likelihood"]["dist"]
+        if dist_name in {"absolute_error", "rmse"}:
+            # MAE produces a single repeated number
+            assert np.allclose(
+                log_likelihood.values,
+                -np.log(
+                    np.nansum(np.abs(mock_inputs.model_data - mock_inputs.gt_data))
+                ),
+            )
+        elif dist_name == "pois":
+            assert np.allclose(
+                log_likelihood.values,
+                scipy.stats.poisson.logpmf(
+                    mock_inputs.gt_data.values, mock_inputs.model_data.values
+                ),
+            )
+        elif dist_name == {"norm", "norm_cov"}:
+            scale = mock_inputs.config["likelihood"]["params"]["scale"]
+            if dist_name == "norm_cov":
+                scale *= mock_inputs.model_data.where(mock_inputs.model_data > 5, 5)
+            assert np.allclose(
+                log_likelihood.values,
+                scipy.stats.norm.logpdf(
+                    mock_inputs.gt_data.values,
+                    mock_inputs.model_data.values,
+                    scale=scale,
+                ),
+            )
+        elif dist_name == "nbinom":
+            assert np.allclose(
+                log_likelihood.values,
+                scipy.stats.nbinom.logpmf(
+                    mock_inputs.gt_data.values,
+                    n=mock_inputs.config["likelihood"]["params"]["n"],
+                    p=mock_inputs.model_data.values,
+                ),
+            )

From 52bad21617d9af4d51322adef12e8816ecf02067 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 23 Aug 2024 08:43:14 -0400
Subject: [PATCH 20/58] Change model/gt data to use xarray Dataset

Was previously using `xarray.DataArray` for `model_data` and `gt_data`
in unit testing the `Statistic` class since that is what many methods
expect. It seems though the main entry to the class, `compute_logloss`
takes an `xarray.DataSet` that the class splices into
`xarray.DataArray`s. The unit tests now more accurately reflect this.
---
 .../tests/statistics/test_statistic_class.py  | 214 +++++++++++-------
 1 file changed, 135 insertions(+), 79 deletions(-)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 489dde515..6b15014eb 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -1,4 +1,5 @@
 from datetime import date
+from itertools import product
 from typing import Any, Callable
 
 import confuse
@@ -55,21 +56,24 @@ def invalid_regularization_factory() -> MockStatisticInput:
 
 
 def simple_valid_factory() -> MockStatisticInput:
-    model_data = xr.DataArray(
-        data=np.random.randn(10, 3),
-        dims=("date", "subpop"),
-        coords={
-            "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)),
-            "subpop": ["01", "02", "03"],
+    data_coords = {
+        "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)),
+        "subpop": ["01", "02", "03"],
+    }
+    data_dim = [len(v) for v in data_coords.values()]
+    model_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
-    gt_data = xr.DataArray(
-        data=np.random.randn(10, 3),
-        dims=("date", "subpop"),
-        coords={
-            "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)),
-            "subpop": ["01", "02", "03"],
+    gt_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
     return MockStatisticInput(
         "total_hospitalizations",
@@ -89,24 +93,24 @@ def simple_valid_factory() -> MockStatisticInput:
 
 
 def simple_valid_resample_factory() -> MockStatisticInput:
-    date_coords = pd.date_range(date(2024, 1, 1), date(2024, 12, 31))
-    subpop_coords = ["01", "02", "03", "04"]
-    dim = (len(date_coords), len(subpop_coords))
-    model_data = xr.DataArray(
-        data=np.random.randn(*dim),
-        dims=("date", "subpop"),
-        coords={
-            "date": date_coords,
-            "subpop": subpop_coords,
+    data_coords = {
+        "date": pd.date_range(date(2024, 1, 1), date(2024, 12, 31)),
+        "subpop": ["01", "02", "03", "04"],
+    }
+    data_dim = [len(v) for v in data_coords.values()]
+    model_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
-    gt_data = xr.DataArray(
-        data=np.random.randn(*dim),
-        dims=("date", "subpop"),
-        coords={
-            "date": date_coords,
-            "subpop": subpop_coords,
+    gt_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
     return MockStatisticInput(
         "total_hospitalizations",
@@ -127,24 +131,24 @@ def simple_valid_resample_factory() -> MockStatisticInput:
 
 
 def simple_valid_scale_factory() -> MockStatisticInput:
-    date_coords = pd.date_range(date(2024, 1, 1), date(2024, 12, 31))
-    subpop_coords = ["01", "02", "03", "04"]
-    dim = (len(date_coords), len(subpop_coords))
-    model_data = xr.DataArray(
-        data=np.random.randn(*dim),
-        dims=("date", "subpop"),
-        coords={
-            "date": date_coords,
-            "subpop": subpop_coords,
+    data_coords = {
+        "date": pd.date_range(date(2024, 1, 1), date(2024, 12, 31)),
+        "subpop": ["01", "02", "03", "04"],
+    }
+    data_dim = [len(v) for v in data_coords.values()]
+    model_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
-    gt_data = xr.DataArray(
-        data=np.random.randn(*dim),
-        dims=("date", "subpop"),
-        coords={
-            "date": date_coords,
-            "subpop": subpop_coords,
+    gt_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
     return MockStatisticInput(
         "total_hospitalizations",
@@ -165,24 +169,24 @@ def simple_valid_scale_factory() -> MockStatisticInput:
 
 
 def simple_valid_resample_and_scale_factory() -> MockStatisticInput:
-    date_coords = pd.date_range(date(2024, 1, 1), date(2024, 12, 31))
-    subpop_coords = ["01", "02", "03", "04"]
-    dim = (len(date_coords), len(subpop_coords))
-    model_data = xr.DataArray(
-        data=np.random.randn(*dim),
-        dims=("date", "subpop"),
-        coords={
-            "date": date_coords,
-            "subpop": subpop_coords,
+    data_coords = {
+        "date": pd.date_range(date(2024, 1, 1), date(2024, 12, 31)),
+        "subpop": ["01", "02", "03", "04"],
+    }
+    data_dim = [len(v) for v in data_coords.values()]
+    model_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
-    gt_data = xr.DataArray(
-        data=np.random.randn(*dim),
-        dims=("date", "subpop"),
-        coords={
-            "date": date_coords,
-            "subpop": subpop_coords,
+    gt_data = xr.Dataset(
+        data_vars={
+            "incidH": (list(data_coords.keys()), np.random.randn(*data_dim)),
+            "incidD": (list(data_coords.keys()), np.random.randn(*data_dim)),
         },
+        coords=data_coords,
     )
     return MockStatisticInput(
         "total_hospitalizations",
@@ -314,7 +318,10 @@ def test_forecast_regularize(
 
         # Tests
         forecast_regularization = statistic._forecast_regularize(
-            mock_inputs.model_data, mock_inputs.gt_data, last_n=last_n, mult=mult
+            mock_inputs.model_data[mock_inputs.config["sim_var"]],
+            mock_inputs.gt_data[mock_inputs.config["data_var"]],
+            last_n=last_n,
+            mult=mult,
         )
         assert isinstance(forecast_regularization, float)
 
@@ -328,7 +335,9 @@ def test_allsubpop_regularize(
 
         # Tests
         forecast_regularization = statistic._allsubpop_regularize(
-            mock_inputs.model_data, mock_inputs.gt_data, mult=mult
+            mock_inputs.model_data[mock_inputs.config["sim_var"]],
+            mock_inputs.gt_data[mock_inputs.config["data_var"]],
+            mult=mult,
         )
         assert isinstance(forecast_regularization, float)
 
@@ -339,12 +348,14 @@ def test_apply_resample(self, factory: Callable[[], MockStatisticInput]) -> None
         statistic = mock_inputs.create_statistic_instance()
 
         # Tests
-        resampled_data = statistic.apply_resample(mock_inputs.model_data)
+        resampled_data = statistic.apply_resample(
+            mock_inputs.model_data[mock_inputs.config["sim_var"]]
+        )
         if resample_config := mock_inputs.config.get("resample", {}):
             # Resample config
-            expected_resampled_data = mock_inputs.model_data.resample(
-                date=resample_config.get("freq", "")
-            )
+            expected_resampled_data = mock_inputs.model_data[
+                mock_inputs.config["sim_var"]
+            ].resample(date=resample_config.get("freq", ""))
             aggregation_func = getattr(
                 expected_resampled_data, resample_config.get("aggregator", "")
             )
@@ -358,7 +369,9 @@ def test_apply_resample(self, factory: Callable[[], MockStatisticInput]) -> None
             assert resampled_data.identical(expected_resampled_data)
         else:
             # No resample config, `apply_resample` returns our input
-            assert resampled_data.identical(mock_inputs.model_data)
+            assert resampled_data.identical(
+                mock_inputs.model_data[mock_inputs.config["sim_var"]]
+            )
 
     @pytest.mark.parametrize("factory", all_valid_factories)
     def test_apply_scale(self, factory: Callable[[], MockStatisticInput]) -> None:
@@ -367,14 +380,20 @@ def test_apply_scale(self, factory: Callable[[], MockStatisticInput]) -> None:
         statistic = mock_inputs.create_statistic_instance()
 
         # Tests
-        scaled_data = statistic.apply_scale(mock_inputs.model_data)
+        scaled_data = statistic.apply_scale(
+            mock_inputs.model_data[mock_inputs.config["sim_var"]]
+        )
         if (scale_func := mock_inputs.config.get("scale")) is not None:
             # Scale config
-            expected_scaled_data = getattr(np, scale_func)(mock_inputs.model_data)
+            expected_scaled_data = getattr(np, scale_func)(
+                mock_inputs.model_data[mock_inputs.config["sim_var"]]
+            )
             assert scaled_data.identical(expected_scaled_data)
         else:
             # No scale config, `apply_scale` is a no-op
-            assert scaled_data.identical(mock_inputs.model_data)
+            assert scaled_data.identical(
+                mock_inputs.model_data[mock_inputs.config["sim_var"]]
+            )
 
     @pytest.mark.parametrize("factory", all_valid_factories)
     def test_apply_transforms(self, factory: Callable[[], MockStatisticInput]) -> None:
@@ -383,8 +402,12 @@ def test_apply_transforms(self, factory: Callable[[], MockStatisticInput]) -> No
         statistic = mock_inputs.create_statistic_instance()
 
         # Tests
-        transformed_data = statistic.apply_transforms(mock_inputs.model_data)
-        expected_transformed_data = mock_inputs.model_data.copy()
+        transformed_data = statistic.apply_transforms(
+            mock_inputs.model_data[mock_inputs.config["sim_var"]]
+        )
+        expected_transformed_data = mock_inputs.model_data[
+            mock_inputs.config["sim_var"]
+        ].copy()
         if resample_config := mock_inputs.config.get("resample", {}):
             # Resample config
             expected_transformed_data = expected_transformed_data.resample(
@@ -414,36 +437,52 @@ def test_llik(self, factory: Callable[[], MockStatisticInput]) -> None:
         statistic = mock_inputs.create_statistic_instance()
 
         # Tests
-        log_likelihood = statistic.llik(mock_inputs.model_data, mock_inputs.gt_data)
+        log_likelihood = statistic.llik(
+            mock_inputs.model_data[mock_inputs.config["sim_var"]],
+            mock_inputs.gt_data[mock_inputs.config["data_var"]],
+        )
 
         assert isinstance(log_likelihood, xr.DataArray)
-        assert log_likelihood.dims == mock_inputs.gt_data.dims
-        assert log_likelihood.coords.equals(mock_inputs.gt_data.coords)
+        assert (
+            log_likelihood.dims
+            == mock_inputs.gt_data[mock_inputs.config["data_var"]].dims
+        )
+        assert log_likelihood.coords.equals(
+            mock_inputs.gt_data[mock_inputs.config["data_var"]].coords
+        )
         dist_name = mock_inputs.config["likelihood"]["dist"]
         if dist_name in {"absolute_error", "rmse"}:
             # MAE produces a single repeated number
             assert np.allclose(
                 log_likelihood.values,
                 -np.log(
-                    np.nansum(np.abs(mock_inputs.model_data - mock_inputs.gt_data))
+                    np.nansum(
+                        np.abs(
+                            mock_inputs.model_data[mock_inputs.config["sim_var"]]
+                            - mock_inputs.gt_data[mock_inputs.config["data_var"]]
+                        )
+                    )
                 ),
             )
         elif dist_name == "pois":
             assert np.allclose(
                 log_likelihood.values,
                 scipy.stats.poisson.logpmf(
-                    mock_inputs.gt_data.values, mock_inputs.model_data.values
+                    mock_inputs.gt_data[mock_inputs.config["data_var"]].values,
+                    mock_inputs.model_data[mock_inputs.config["data_var"]].values,
                 ),
             )
         elif dist_name == {"norm", "norm_cov"}:
             scale = mock_inputs.config["likelihood"]["params"]["scale"]
             if dist_name == "norm_cov":
-                scale *= mock_inputs.model_data.where(mock_inputs.model_data > 5, 5)
+                scale *= mock_inputs.model_data[mock_inputs.config["sim_var"]].where(
+                    mock_inputs.model_data[mock_inputs.config["sim_var"]] > 5, 5
+                )
             assert np.allclose(
                 log_likelihood.values,
                 scipy.stats.norm.logpdf(
-                    mock_inputs.gt_data.values,
-                    mock_inputs.model_data.values,
+                    mock_inputs.gt_data[mock_inputs.config["data_var"]].values,
+                    mock_inputs.model_data[mock_inputs.config["sim_var"]].values,
                     scale=scale,
                 ),
             )
@@ -451,8 +490,25 @@ def test_llik(self, factory: Callable[[], MockStatisticInput]) -> None:
             assert np.allclose(
                 log_likelihood.values,
                 scipy.stats.nbinom.logpmf(
-                    mock_inputs.gt_data.values,
+                    mock_inputs.gt_data[mock_inputs.config["data_var"]].values,
                     n=mock_inputs.config["likelihood"]["params"]["n"],
-                    p=mock_inputs.model_data.values,
+                    p=mock_inputs.model_data[mock_inputs.config["sim_var"]].values,
                 ),
             )
+
+    @pytest.mark.parametrize("factory", all_valid_factories)
+    def test_compute_logloss(self, factory: Callable[[], MockStatisticInput]) -> None:
+        # Setup
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        # Tests
+        log_likelihood, regularization = statistic.compute_logloss(
+            mock_inputs.model_data, mock_inputs.gt_data
+        )
+
+        assert True
+
+        # print(regularization)
+
+        # assert isinstance(regularization, float)

From 02c9dc4a3e71749c4ee8a14617a318dfcde1f771 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 23 Aug 2024 09:29:43 -0400
Subject: [PATCH 21/58] Initial unit tests on `Statistic.compute_logloss`

* Created initial unit tests on the `compute_logloss` method of
  `Statistic`, checking for structure but not correctness.
* Updated documentation for `compute_logloss` to reflect the possible
  `ValueError` and the correct input types expected.
* Changed internal variable of that method to a float to get a
  consistent float return for the second tuple entry from
  `compute_logloss`.
---
 .../gempyor_pkg/src/gempyor/statistics.py     |  7 +++--
 .../tests/statistics/test_statistic_class.py  | 26 ++++++++++++-------
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/statistics.py b/flepimop/gempyor_pkg/src/gempyor/statistics.py
index fd1dd1a43..ea10e7aa1 100644
--- a/flepimop/gempyor_pkg/src/gempyor/statistics.py
+++ b/flepimop/gempyor_pkg/src/gempyor/statistics.py
@@ -271,7 +271,7 @@ def llik(self, model_data: xr.DataArray, gt_data: xr.DataArray) -> xr.DataArray:
         return likelihood
 
     def compute_logloss(
-        self, model_data: xr.DataArray, gt_data: xr.DataArray
+        self, model_data: xr.Dataset, gt_data: xr.Dataset
     ) -> tuple[xr.DataArray, float]:
         """
         Compute the logistic loss of observing the ground truth given model output.
@@ -286,6 +286,9 @@ def compute_logloss(
             The logistic loss of observing `gt_data` from the model `model_data`
             decomposed into the log-likelihood along the "subpop" dimension and
             regularizations.
+
+        Raises:
+            ValueError: If `model_data` and `gt_data` do not have the same shape.
         """
         model_data = self.apply_transforms(model_data[self.sim_var])
         gt_data = self.apply_transforms(gt_data[self.data_var])
@@ -299,7 +302,7 @@ def compute_logloss(
                 )
             )
 
-        regularization = 0
+        regularization = 0.0
         for reg_func, reg_config in self.regularizations:
             regularization += reg_func(
                 model_data=model_data, gt_data=gt_data, **reg_config
diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index 6b15014eb..fb92afe4b 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -194,8 +194,8 @@ def simple_valid_resample_and_scale_factory() -> MockStatisticInput:
             "name": "sum_hospitalizations",
             "aggregator": "sum",
             "period": "1 months",
-            "sim_var": "incidH",
-            "data_var": "incidH",
+            "sim_var": "incidD",
+            "data_var": "incidD",
             "remove_na": True,
             "add_one": True,
             "likelihood": {"dist": "rmse"},
@@ -447,7 +447,7 @@ def test_llik(self, factory: Callable[[], MockStatisticInput]) -> None:
             log_likelihood.dims
             == mock_inputs.gt_data[mock_inputs.config["data_var"]].dims
         )
-        assert log_likelihood.coords.equals(
+        assert log_likelihood.coords.identical(
             mock_inputs.gt_data[mock_inputs.config["data_var"]].coords
         )
         dist_name = mock_inputs.config["likelihood"]["dist"]
@@ -501,14 +501,22 @@ def test_compute_logloss(self, factory: Callable[[], MockStatisticInput]) -> Non
         # Setup
         mock_inputs = factory()
         statistic = mock_inputs.create_statistic_instance()
-
-        # Tests
         log_likelihood, regularization = statistic.compute_logloss(
             mock_inputs.model_data, mock_inputs.gt_data
         )
+        regularization_config = mock_inputs.config.get("regularize", [])
 
-        assert True
-
-        # print(regularization)
+        # Assertions on log_likelihood
+        assert isinstance(log_likelihood, xr.DataArray)
+        assert log_likelihood.coords.identical(
+            xr.Coordinates(coords={"subpop": mock_inputs.gt_data.coords.get("subpop")})
+        )
 
-        # assert isinstance(regularization, float)
+        # Assertions on regularization
+        assert isinstance(regularization, float)
+        if regularization_config:
+            # Regularizations on logistic loss
+            assert regularization != 0.0
+        else:
+            # No regularizations on logistic loss
+            assert regularization == 0.0

From 4ff66823f705d9d1b3308d46d79eb6cf81e585e1 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 23 Aug 2024 10:38:06 -0400
Subject: [PATCH 22/58] Test fixture for data misshape `ValueError`

Added a test fixture that confirms the `ValueError` raised when model
data and ground truth data do not have the same shapes in
`Statistic.compute_logloss`.
---
 .../tests/statistics/test_statistic_class.py  | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index fb92afe4b..b7258bad2 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -55,6 +55,36 @@ def invalid_regularization_factory() -> MockStatisticInput:
     )
 
 
+def invalid_misshaped_data_factory() -> MockStatisticInput:
+    model_data = xr.Dataset(
+        data_vars={"incidH": (["date", "subpop"], np.random.randn(10, 3))},
+        coords={
+            "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)),
+            "subpop": ["01", "02", "03"],
+        },
+    )
+    gt_data = xr.Dataset(
+        data_vars={"incidH": (["date", "subpop"], np.random.randn(11, 2))},
+        coords={
+            "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 11)),
+            "subpop": ["02", "03"],
+        },
+    )
+    return MockStatisticInput(
+        "total_hospitalizations",
+        {
+            "name": "sum_hospitalizations",
+            "sim_var": "incidH",
+            "data_var": "incidH",
+            "remove_na": True,
+            "add_one": True,
+            "likelihood": {"dist": "norm", "params": {"scale": 2.0}},
+        },
+        model_data=model_data,
+        gt_data=gt_data,
+    )
+
+
 def simple_valid_factory() -> MockStatisticInput:
     data_coords = {
         "date": pd.date_range(date(2024, 1, 1), date(2024, 1, 10)),
@@ -496,6 +526,25 @@ def test_llik(self, factory: Callable[[], MockStatisticInput]) -> None:
                 ),
             )
 
+    @pytest.mark.parametrize("factory", [(invalid_misshaped_data_factory)])
+    def test_compute_logloss_data_misshape_value_error(
+        self, factory: Callable[[], MockStatisticInput]
+    ) -> None:
+        mock_inputs = factory()
+        statistic = mock_inputs.create_statistic_instance()
+
+        model_rows, model_cols = mock_inputs.model_data[
+            mock_inputs.config["sim_var"]
+        ].shape
+        gt_rows, gt_cols = mock_inputs.gt_data[mock_inputs.config["data_var"]].shape
+        expected_match = (
+            rf"^{mock_inputs.name} Statistic error\: data and groundtruth do not have "
+            rf"the same shape\: model\_data\.shape\=\({model_rows}\, {model_cols}\) "
+            rf"\!\= gt\_data\.shape\=\({gt_rows}\, {gt_cols}\)$"
+        )
+        with pytest.raises(ValueError, match=expected_match):
+            statistic.compute_logloss(mock_inputs.model_data, mock_inputs.gt_data)
+
     @pytest.mark.parametrize("factory", all_valid_factories)
     def test_compute_logloss(self, factory: Callable[[], MockStatisticInput]) -> None:
         # Setup

From 15864c760f1738a2625995bc936d11a29540cc19 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 23 Aug 2024 11:01:47 -0400
Subject: [PATCH 23/58] Remove unnecessary entries from mock configs

There were entries in the mock configs, modeled on existing configs,
that are not considered by the `Statistic` class at all. Removed for
clarity.
---
 .../tests/statistics/test_statistic_class.py           | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
index b7258bad2..e18861e9e 100644
--- a/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
+++ b/flepimop/gempyor_pkg/tests/statistics/test_statistic_class.py
@@ -43,8 +43,6 @@ def invalid_regularization_factory() -> MockStatisticInput:
         "total_hospitalizations",
         {
             "name": "sum_hospitalizations",
-            "aggregator": "sum",
-            "period": "1 months",
             "sim_var": "incidH",
             "data_var": "incidH",
             "remove_na": True,
@@ -109,8 +107,6 @@ def simple_valid_factory() -> MockStatisticInput:
         "total_hospitalizations",
         {
             "name": "sum_hospitalizations",
-            "aggregator": "sum",
-            "period": "1 months",
             "sim_var": "incidH",
             "data_var": "incidH",
             "remove_na": True,
@@ -146,8 +142,6 @@ def simple_valid_resample_factory() -> MockStatisticInput:
         "total_hospitalizations",
         {
             "name": "sum_hospitalizations",
-            "aggregator": "sum",
-            "period": "1 months",
             "sim_var": "incidH",
             "data_var": "incidH",
             "remove_na": True,
@@ -184,8 +178,6 @@ def simple_valid_scale_factory() -> MockStatisticInput:
         "total_hospitalizations",
         {
             "name": "sum_hospitalizations",
-            "aggregator": "sum",
-            "period": "1 months",
             "sim_var": "incidH",
             "data_var": "incidH",
             "remove_na": True,
@@ -222,8 +214,6 @@ def simple_valid_resample_and_scale_factory() -> MockStatisticInput:
         "total_hospitalizations",
         {
             "name": "sum_hospitalizations",
-            "aggregator": "sum",
-            "period": "1 months",
             "sim_var": "incidD",
             "data_var": "incidD",
             "remove_na": True,

From b56d2d9d039cc67f63dd0c67f215021ce6bb340a Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:07:28 -0400
Subject: [PATCH 24/58] Update gempyor action to test 3.11 and 3.10

Minor edits to run the gempyor tests with python 3.10 and 3.11. Remove
usage of custom docker container.
---
 .github/workflows/gempyor-ci.yml | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index a2cb6e313..d3e968b8a 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -20,29 +20,30 @@ on:
 jobs:
   unit-tests:
     runs-on: ubuntu-latest
-    container:
-      image: hopkinsidd/flepimop:latest-dev
-      options: --user root
+    strategy:
+      matrix:
+        python-version: ["3.10", "3.11"]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
         with:
           lfs: true
-      - name: Install the gempyor package
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install gempyor
         run: |
-          source /var/python/3.10/virtualenv/bin/activate
           python -m pip install --upgrade pip
-          python -m pip install "flepimop/gempyor_pkg[test]"
+          python -m pip install "flepimop/gempyor_pkg[dev]"
         shell: bash
       - name: Run gempyor tests
         run: |
-          source /var/python/3.10/virtualenv/bin/activate
           cd flepimop/gempyor_pkg
           pytest --exitfirst
         shell: bash
       - name: Run gempyor-cli integration tests from examples
         run: |
-          source /var/python/3.10/virtualenv/bin/activate
           cd examples
           pytest --exitfirst
         shell: bash

From ea3ebc6816babcfa04619112ee2b79843edfce00 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:22:44 -0400
Subject: [PATCH 25/58] Delete line to trigger GH Action

Very minor edit to `__init__.py` to trigger the `gempyor` CI GitHub
action.
---
 flepimop/gempyor_pkg/src/gempyor/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/flepimop/gempyor_pkg/src/gempyor/__init__.py b/flepimop/gempyor_pkg/src/gempyor/__init__.py
index 432754c6c..bdf56083b 100644
--- a/flepimop/gempyor_pkg/src/gempyor/__init__.py
+++ b/flepimop/gempyor_pkg/src/gempyor/__init__.py
@@ -1,4 +1,3 @@
 ## All functions are in minimal inference.
-
 from .inference import *
 from .utils import *

From 3d087c7a0ec4949daa7c2bb704e0d4cac09c8f34 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:27:04 -0400
Subject: [PATCH 26/58] Correct extra install name

Need to install `test` extra installs to get pytest instead of `dev`.
---
 .github/workflows/gempyor-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index d3e968b8a..5f495ebed 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -35,7 +35,7 @@ jobs:
       - name: Install gempyor
         run: |
           python -m pip install --upgrade pip
-          python -m pip install "flepimop/gempyor_pkg[dev]"
+          python -m pip install "flepimop/gempyor_pkg[test]"
         shell: bash
       - name: Run gempyor tests
         run: |

From 516909aa0b21206cc68aa9cfc21752f2277a7ba7 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 9 Oct 2024 14:46:05 -0400
Subject: [PATCH 27/58] Add dependency in GitHub workflows

`inference-ci` now triggers whenever `flepicommon-ci` or `gempyor-ci`
runs.
---
 .github/workflows/flepicommon-ci.yml | 2 +-
 .github/workflows/gempyor-ci.yml     | 2 +-
 .github/workflows/inference-ci.yml   | 7 ++++++-
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 5314c1b4f..813a4b35c 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -16,7 +16,7 @@ on:
       - dev
 
 jobs:
-  unit-tests:
+  ci:
     runs-on: ubuntu-latest
     container:
       image: hopkinsidd/flepimop:latest-dev
diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index 5f495ebed..86b4645a3 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -18,7 +18,7 @@ on:
       - dev
 
 jobs:
-  unit-tests:
+  ci:
     runs-on: ubuntu-latest
     strategy:
       matrix:
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 2ca3d4897..4bb2a5b8b 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -2,6 +2,10 @@ name: inference-ci
 
 on:
   workflow_dispatch:
+  workflow_run:
+    workflows: ['flepicommon-ci', 'gempyor-ci']
+    types:
+      - completed
   push:
     paths:
       - flepimop/R_packages/inference/**/*
@@ -16,8 +20,9 @@ on:
       - dev
 
 jobs:
-  unit-tests:
+  ci:
     runs-on: ubuntu-latest
+    if: ${{ github.event.workflow_run.conclusion == 'success' }}
     container:
       image: hopkinsidd/flepimop:latest-dev
       options: --user root

From 4b9d019fb87f2e3539c75d126a8d2a798319fe4b Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 07:54:47 -0400
Subject: [PATCH 28/58] Remove custom docker from flepicommon CI

Also move to using standard GitHub actions to simplify the workflow.
---
 .github/workflows/flepicommon-ci.yml | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 813a4b35c..04c76ddce 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -18,24 +18,24 @@ on:
 jobs:
   ci:
     runs-on: ubuntu-latest
-    container:
-      image: hopkinsidd/flepimop:latest-dev
-      options: --user root
+    strategy:
+      matrix:
+        R-version: ["4.3.3"]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
         with:
           lfs: true
-      - name: Set up Rprofile
-        run: |
-          cp build/docker/Docker.Rprofile $HOME/.Rprofile
-          cp /home/app/.bashrc $HOME/.bashrc
-        shell: bash
-      - name: Install local R packages
-        run: Rscript build/local_install.R
-        shell: bash
-      - name: Run flepicommon tests
+      - name: Setup R ${{ matrix.R-version }}
+        uses: r-lib/actions/setup-r@v2
+      - name: Install Dependencies
+        uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::devtools
+          working-directory: 'flepimop/R_packages/flepicommon'
+      - name: Run Unit Tests
         run: |
+          library(devtools)
           setwd("flepimop/R_packages/flepicommon")
           devtools::test(stop_on_failure=TRUE)
         shell: Rscript {0}

From 07d93cdf8b3a540a04c01e225c95235f827813d6 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 07:56:51 -0400
Subject: [PATCH 29/58] Add empty space to invoke flepicommon CI

---
 flepimop/R_packages/flepicommon/R/config.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flepimop/R_packages/flepicommon/R/config.R b/flepimop/R_packages/flepicommon/R/config.R
index 6f65ff562..e4e2dd91d 100644
--- a/flepimop/R_packages/flepicommon/R/config.R
+++ b/flepimop/R_packages/flepicommon/R/config.R
@@ -159,6 +159,7 @@ check_within_bounds <- function(value, obj) {
   }
 }
 
+
 #' @name prettyprint_optlist
 #' @description Print a list of options such that it does not take the whole screen
 #' Display `name : value` \n for all elements.

From bd91d7bd79963b0a60851cbd5ac850b9810725bf Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 09:18:14 -0400
Subject: [PATCH 30/58] Attempt to speed up flepicommon CI

Test run at speeding up the flepicommon CI compared to using the default
r-lib/actions by doing only the required work for tests manually.
---
 .github/workflows/flepicommon-ci.yml | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 04c76ddce..5dc00117e 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -28,14 +28,27 @@ jobs:
           lfs: true
       - name: Setup R ${{ matrix.R-version }}
         uses: r-lib/actions/setup-r@v2
-      - name: Install Dependencies
-        uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::devtools
-          working-directory: 'flepimop/R_packages/flepicommon'
+          r-version: ${{ matrix.R-version }}
+      - name: Build flepicommon
+        run: |
+          R CMD build flepimop/R_packages/flepicommon/
+        shell: bash
+      - name: Install Dependencies
+        run: |
+          files <- list.files()
+          pkg <- files[startsWith(files, "flepicommon_")]
+          install.packages(pkg, dependencies=TRUE)
+          install.packages("testthat")
+        shell: Rscript {0}
+      - name: Install flepicommon
+        run: |
+          TAR_GZ=$( find . -maxdepth 1 -regex ".*flepicommon.*" -printf "%P\n" )
+          DEST=$( R -s -e "cat(.libPaths()[1L])" | xargs )
+          R CMD install $TAR_GZ $DEST
       - name: Run Unit Tests
         run: |
-          library(devtools)
-          setwd("flepimop/R_packages/flepicommon")
-          devtools::test(stop_on_failure=TRUE)
+          library(flepicommon)
+          library(testthat)
+          test_local("flepimop/R_packages/flepicommon")
         shell: Rscript {0}

From 132ba536e7b7897ac6f2be2b65558175e4e9917e Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 14:17:08 -0400
Subject: [PATCH 31/58] Install `flepicommon` with `install.packages`

Having lots of trouble getting `R CMD install` to work in a GitHub
action. I suspect because `r-lib/actions/setup-r` does something clever
to install just R without associated tools that cause bloat. But in this
cause cause problems because the version of `R CMD install` does not
match `R`.
---
 .github/workflows/flepicommon-ci.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 5dc00117e..95b31bd44 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -34,18 +34,14 @@ jobs:
         run: |
           R CMD build flepimop/R_packages/flepicommon/
         shell: bash
-      - name: Install Dependencies
+      - name: Install Dependencies And flepicommon
         run: |
           files <- list.files()
           pkg <- files[startsWith(files, "flepicommon_")]
           install.packages(pkg, dependencies=TRUE)
           install.packages("testthat")
+          install.packages(pkg, repos = NULL, type = "source")
         shell: Rscript {0}
-      - name: Install flepicommon
-        run: |
-          TAR_GZ=$( find . -maxdepth 1 -regex ".*flepicommon.*" -printf "%P\n" )
-          DEST=$( R -s -e "cat(.libPaths()[1L])" | xargs )
-          R CMD install $TAR_GZ $DEST
       - name: Run Unit Tests
         run: |
           library(flepicommon)

From dbd692ed98fe4222b27f4745c7c76e7ead109cc4 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 14:24:49 -0400
Subject: [PATCH 32/58] Avoid installing `flepicommon`

Now having issues with `install.packages`, an error along the lines of
"ERROR: dependencies 'dplyr', 'yaml', ..., 'tidyselect' are not
available for package 'flepicommon'". However, these should be installed
by the `install.packages` line.
---
 .github/workflows/flepicommon-ci.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 95b31bd44..7645a0980 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -38,13 +38,13 @@ jobs:
         run: |
           files <- list.files()
           pkg <- files[startsWith(files, "flepicommon_")]
-          install.packages(pkg, dependencies=TRUE)
-          install.packages("testthat")
-          install.packages(pkg, repos = NULL, type = "source")
+          install.packages(pkg, dependencies = TRUE)
+          install.packages("devtools")
+          packageVersion("flepicommon")
         shell: Rscript {0}
       - name: Run Unit Tests
         run: |
-          library(flepicommon)
-          library(testthat)
-          test_local("flepimop/R_packages/flepicommon")
+          setwd("flepimop/R_packages/flepicommon")
+          library(devtools)
+          devtools::test(stop_on_failure = TRUE)
         shell: Rscript {0}

From 58ab3b42ef9eacd11bde60dfd8942327bb9aff16 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:15:23 -0400
Subject: [PATCH 33/58] Install specific R from posit deb

---
 .github/workflows/flepicommon-ci.yml | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 7645a0980..c79795724 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -26,10 +26,18 @@ jobs:
         uses: actions/checkout@v4
         with:
           lfs: true
-      - name: Setup R ${{ matrix.R-version }}
-        uses: r-lib/actions/setup-r@v2
-        with:
-          r-version: ${{ matrix.R-version }}
+      - name: Install R
+        run: |
+          UBUNTU_VERSION=$( echo "$( lsb_release -r )" | awk '{print $2}' | sed 's/\.//g' )
+          curl -O https://cdn.rstudio.com/r/ubuntu-${UBUNTU_VERSION}/pkgs/r-${R_VERSION}_1_amd64.deb
+          sudo apt-get update
+          sudo apt-get install ./r-${R_VERSION}_1_amd64.deb
+          sudo ln -s /opt/R/${R_VERSION}/bin/R /usr/local/bin/R
+          sudo ln -s /opt/R/${R_VERSION}/bin/Rscript /usr/local/bin/Rscript
+          R --version
+        shell: bash
+        env:
+          R_VERSION: ${{ matrix.R-version }}
       - name: Build flepicommon
         run: |
           R CMD build flepimop/R_packages/flepicommon/

From 9bdbbc8b69c2b7678f99003b9057d4cd251db239 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:34:41 -0400
Subject: [PATCH 34/58] Remove sudo usage

---
 .github/workflows/flepicommon-ci.yml | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index c79795724..127603eab 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -31,10 +31,9 @@ jobs:
           UBUNTU_VERSION=$( echo "$( lsb_release -r )" | awk '{print $2}' | sed 's/\.//g' )
           curl -O https://cdn.rstudio.com/r/ubuntu-${UBUNTU_VERSION}/pkgs/r-${R_VERSION}_1_amd64.deb
           sudo apt-get update
-          sudo apt-get install ./r-${R_VERSION}_1_amd64.deb
-          sudo ln -s /opt/R/${R_VERSION}/bin/R /usr/local/bin/R
-          sudo ln -s /opt/R/${R_VERSION}/bin/Rscript /usr/local/bin/Rscript
-          R --version
+          apt-get install ./r-${R_VERSION}_1_amd64.deb
+          ln -s /opt/R/${R_VERSION}/bin/R /usr/local/bin/R
+          ln -s /opt/R/${R_VERSION}/bin/Rscript /usr/local/bin/Rscript
         shell: bash
         env:
           R_VERSION: ${{ matrix.R-version }}
@@ -42,14 +41,19 @@ jobs:
         run: |
           R CMD build flepimop/R_packages/flepicommon/
         shell: bash
-      - name: Install Dependencies And flepicommon
+      - name: Install Dependencies
         run: |
           files <- list.files()
           pkg <- files[startsWith(files, "flepicommon_")]
           install.packages(pkg, dependencies = TRUE)
           install.packages("devtools")
-          packageVersion("flepicommon")
         shell: Rscript {0}
+      - name: Install flepicommon
+        run: |
+          R CMD install --help
+          echo "-"
+          TAR_GZ=$( find . -maxdepth 1 -regex ".*flepicommon.*" -printf "%P\n" )
+          R CMD install $TAR_GZ
       - name: Run Unit Tests
         run: |
           setwd("flepimop/R_packages/flepicommon")

From 7587dfd47ea2ab84e5ad5dc3781a2b320d7b0791 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:41:00 -0400
Subject: [PATCH 35/58] Debug dir permission issue.

---
 .github/workflows/flepicommon-ci.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 127603eab..e2ae2e73f 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -31,9 +31,12 @@ jobs:
           UBUNTU_VERSION=$( echo "$( lsb_release -r )" | awk '{print $2}' | sed 's/\.//g' )
           curl -O https://cdn.rstudio.com/r/ubuntu-${UBUNTU_VERSION}/pkgs/r-${R_VERSION}_1_amd64.deb
           sudo apt-get update
-          apt-get install ./r-${R_VERSION}_1_amd64.deb
+          sudo apt-get install ./r-${R_VERSION}_1_amd64.deb
           ln -s /opt/R/${R_VERSION}/bin/R /usr/local/bin/R
           ln -s /opt/R/${R_VERSION}/bin/Rscript /usr/local/bin/Rscript
+          echo "-"
+          sudo ls -lah /opt/R/4.3.3/lib/R/library
+          echo "-"
         shell: bash
         env:
           R_VERSION: ${{ matrix.R-version }}

From 651965ec6e5674f1a3bdaed5ac9213059cbc8bc4 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:42:50 -0400
Subject: [PATCH 36/58] Restore prior version of `__init__.py`

Add back the white space to avoid triggering `gempyor` CI.
---
 flepimop/gempyor_pkg/src/gempyor/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flepimop/gempyor_pkg/src/gempyor/__init__.py b/flepimop/gempyor_pkg/src/gempyor/__init__.py
index bdf56083b..432754c6c 100644
--- a/flepimop/gempyor_pkg/src/gempyor/__init__.py
+++ b/flepimop/gempyor_pkg/src/gempyor/__init__.py
@@ -1,3 +1,4 @@
 ## All functions are in minimal inference.
+
 from .inference import *
 from .utils import *

From e62b0548ba39d56866806b10f807cd5438cc649c Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:45:29 -0400
Subject: [PATCH 37/58] Change R library perms

---
 .github/workflows/flepicommon-ci.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index e2ae2e73f..248250fd5 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -34,9 +34,7 @@ jobs:
           sudo apt-get install ./r-${R_VERSION}_1_amd64.deb
           ln -s /opt/R/${R_VERSION}/bin/R /usr/local/bin/R
           ln -s /opt/R/${R_VERSION}/bin/Rscript /usr/local/bin/Rscript
-          echo "-"
-          sudo ls -lah /opt/R/4.3.3/lib/R/library
-          echo "-"
+          sudo chmod -R 777 /opt/R/4.3.3/lib/R/library
         shell: bash
         env:
           R_VERSION: ${{ matrix.R-version }}

From 5b02c221dd982ee989a0d3bdb268162db3895c85 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Wed, 16 Oct 2024 15:49:44 -0400
Subject: [PATCH 38/58] Add missing `repos` arg

---
 .github/workflows/flepicommon-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 248250fd5..ae4ecc0e8 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -46,7 +46,7 @@ jobs:
         run: |
           files <- list.files()
           pkg <- files[startsWith(files, "flepicommon_")]
-          install.packages(pkg, dependencies = TRUE)
+          install.packages(pkg, repos = "https://cloud.r-project.org", dependencies = TRUE)
           install.packages("devtools")
         shell: Rscript {0}
       - name: Install flepicommon

From 782d677608c6d8dbe1f50d3c0c48205f93afda76 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 12:35:42 -0400
Subject: [PATCH 39/58] Install flepicommon with devtools, caching

* Now using devtools to install the flepicommon package in the GitHub
  action container.
* Use caching of R library to minimize repeat runs of flepicommon ci
  action.
---
 .github/workflows/flepicommon-ci.yml | 73 +++++++++++++++++-----------
 1 file changed, 44 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index ae4ecc0e8..e80825ec0 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -26,38 +26,53 @@ jobs:
         uses: actions/checkout@v4
         with:
           lfs: true
-      - name: Install R
-        run: |
-          UBUNTU_VERSION=$( echo "$( lsb_release -r )" | awk '{print $2}' | sed 's/\.//g' )
-          curl -O https://cdn.rstudio.com/r/ubuntu-${UBUNTU_VERSION}/pkgs/r-${R_VERSION}_1_amd64.deb
-          sudo apt-get update
-          sudo apt-get install ./r-${R_VERSION}_1_amd64.deb
-          ln -s /opt/R/${R_VERSION}/bin/R /usr/local/bin/R
-          ln -s /opt/R/${R_VERSION}/bin/Rscript /usr/local/bin/Rscript
-          sudo chmod -R 777 /opt/R/4.3.3/lib/R/library
-        shell: bash
-        env:
-          R_VERSION: ${{ matrix.R-version }}
-      - name: Build flepicommon
+      - name: Setup R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.R-version }}
+          update-rtools: true
+      - name: Install System Dependencies
+        run: sudo apt install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev libtiff5-dev
+      - name: Determine R Library Location
         run: |
-          R CMD build flepimop/R_packages/flepicommon/
-        shell: bash
-      - name: Install Dependencies
+          R_LIBPATH=$( R -s -e "cat(.libPaths()[1L])" | xargs )
+          echo "R_LIBPATH=$R_LIBPATH" >> $GITHUB_ENV
+          R_LIBPATH_CKSUM=$( echo "$R_LIBPATH" | cksum | cut -d ' ' -f 1 )
+          echo "R_LIBPATH_CKSUM=$R_LIBPATH_CKSUM" >> $GITHUB_ENV
+          CACHE_DATE=$( date -d "last Sunday" +%Y%m%d )
+          echo "CACHE_DATE=$CACHE_DATE" >> $GITHUB_ENV
+      - name: R Library Cache
+        uses: actions/cache@v4
+        with:
+          key: flepicommon-rlibs-${{ runner.os }}-${{ hashFiles('flepimop/R_packages/flepicommon/DESCRIPTION', 'flepimop/R_packages/flepicommon/NAMESPACE') }}-${{ env.R_LIBPATH_CKSUM }}-${{ env.CACHE_DATE }}
+          path: ${{ env.R_LIBPATH }}
+      - name: Install R Dependencies
+        if: steps.r-library-cache.outputs.cache-hit != 'true'
         run: |
-          files <- list.files()
-          pkg <- files[startsWith(files, "flepicommon_")]
-          install.packages(pkg, repos = "https://cloud.r-project.org", dependencies = TRUE)
-          install.packages("devtools")
+          install.packages(
+            "devtools",
+            repos = "https://cloud.r-project.org",
+          )
+          library(devtools)
+          devtools::install_deps(
+            pkg = "flepimop/R_packages/flepicommon"
+            dependencies = TRUE
+          )
         shell: Rscript {0}
-      - name: Install flepicommon
+      - name: Install The flepicommon Package
         run: |
-          R CMD install --help
-          echo "-"
-          TAR_GZ=$( find . -maxdepth 1 -regex ".*flepicommon.*" -printf "%P\n" )
-          R CMD install $TAR_GZ
-      - name: Run Unit Tests
+          if ("flepicommon" %in% installed.packages()[,"Package"]) {
+            devtools::uninstall(pkg = "flepicommon")
+          }
+          devtools::install(
+            pkg = "flepimop/R_packages/flepicommon"
+            args = c(getOption("devtools.install.args"), "--install-tests"),
+            quick = TRUE,
+            dependencies = TRUE,
+          )
+        shell: Rscript {0}
+      - name: Run Tests
         run: |
-          setwd("flepimop/R_packages/flepicommon")
-          library(devtools)
-          devtools::test(stop_on_failure = TRUE)
+          library(testthat)
+          test_package("flepicommon")
         shell: Rscript {0}

From 457e94b500b9fe4f4f546d98f0cb1b0da9b9bc53 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 12:53:35 -0400
Subject: [PATCH 40/58] Add missing commas

---
 .github/workflows/flepicommon-ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index e80825ec0..06d7fd8d5 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -55,7 +55,7 @@ jobs:
           )
           library(devtools)
           devtools::install_deps(
-            pkg = "flepimop/R_packages/flepicommon"
+            pkg = "flepimop/R_packages/flepicommon",
             dependencies = TRUE
           )
         shell: Rscript {0}
@@ -65,7 +65,7 @@ jobs:
             devtools::uninstall(pkg = "flepicommon")
           }
           devtools::install(
-            pkg = "flepimop/R_packages/flepicommon"
+            pkg = "flepimop/R_packages/flepicommon",
             args = c(getOption("devtools.install.args"), "--install-tests"),
             quick = TRUE,
             dependencies = TRUE,

From a16e5de11db04c7bed358c7ed77b74c8de59c94b Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 13:24:11 -0400
Subject: [PATCH 41/58] Manual install of deprecated covidcast

---
 .github/workflows/flepicommon-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 06d7fd8d5..d6b23897c 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -58,6 +58,7 @@ jobs:
             pkg = "flepimop/R_packages/flepicommon",
             dependencies = TRUE
           )
+          install.packages("covidcast", repos = "https://cloud.r-project.org")
         shell: Rscript {0}
       - name: Install The flepicommon Package
         run: |

From 8ccdae5518dce4f38dd91759cb85e07c6c39157a Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 13:53:17 -0400
Subject: [PATCH 42/58] Add missing `libudunits2-dev` for `units` pkg

---
 .github/workflows/flepicommon-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index d6b23897c..055d1f48d 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -32,7 +32,7 @@ jobs:
           r-version: ${{ matrix.R-version }}
           update-rtools: true
       - name: Install System Dependencies
-        run: sudo apt install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev libtiff5-dev
+        run: sudo apt install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev libtiff5-dev libudunits2-dev
       - name: Determine R Library Location
         run: |
           R_LIBPATH=$( R -s -e "cat(.libPaths()[1L])" | xargs )

From 3ce2b4895447329f7756d89bd807e16e0ac6642a Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 14:24:15 -0400
Subject: [PATCH 43/58] Missing gdal deps for `sf` package

See installation instructions for the `sf` R package here:
https://github.com/r-spatial/sf?tab=readme-ov-file#linux.
---
 .github/workflows/flepicommon-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 055d1f48d..78c77c016 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -32,7 +32,7 @@ jobs:
           r-version: ${{ matrix.R-version }}
           update-rtools: true
       - name: Install System Dependencies
-        run: sudo apt install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev libtiff5-dev libudunits2-dev
+        run: sudo apt install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev libtiff5-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev
       - name: Determine R Library Location
         run: |
           R_LIBPATH=$( R -s -e "cat(.libPaths()[1L])" | xargs )

From aeb96be5d96778d3b51fe13a61c3d0aeaf2c6d24 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:09:22 -0400
Subject: [PATCH 44/58] Make `inference` ci similar to `flepicommon` ci

---
 .github/workflows/inference-ci.yml | 99 ++++++++++++++++++++++++++----
 1 file changed, 86 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 4bb2a5b8b..5958cbcff 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -22,25 +22,98 @@ on:
 jobs:
   ci:
     runs-on: ubuntu-latest
-    if: ${{ github.event.workflow_run.conclusion == 'success' }}
-    container:
-      image: hopkinsidd/flepimop:latest-dev
-      options: --user root
+    if: ${{ github.event_name != 'workflow_run' or github.event.workflow_run.conclusion == 'success' }}
+    strategy:
+      matrix:
+        R-version: ["4.3.3"]
+        python-version: ["3.10", "3.11"]
     steps:
       - name: Checkout
         uses: actions/checkout@v4
         with:
           lfs: true
-      - name: Set up Rprofile
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install gempyor
         run: |
-          cp build/docker/Docker.Rprofile $HOME/.Rprofile
-          cp /home/app/.bashrc $HOME/.bashrc
-        shell: bash
-      - name: Install local R packages
-        run: Rscript build/local_install.R
+          python -m pip install --upgrade pip
+          python -m pip install "flepimop/gempyor_pkg[test]"
         shell: bash
-      - name: Run inference tests
+      - name: Setup R
+        uses: r-lib/actions/setup-r@v2
+        with:
+          r-version: ${{ matrix.R-version }}
+          update-rtools: true
+      - name: Install System Dependencies
+        run: sudo apt install libcurl4-openssl-dev libharfbuzz-dev libfribidi-dev libtiff5-dev libudunits2-dev libgdal-dev libgeos-dev libproj-dev
+      - name: Determine R Library Location
+        run: |
+          R_LIBPATH=$( R -s -e "cat(.libPaths()[1L])" | xargs )
+          echo "R_LIBPATH=$R_LIBPATH" >> $GITHUB_ENV
+          R_LIBPATH_CKSUM=$( echo "$R_LIBPATH" | cksum | cut -d ' ' -f 1 )
+          echo "R_LIBPATH_CKSUM=$R_LIBPATH_CKSUM" >> $GITHUB_ENV
+          CACHE_DATE=$( date -d "last Sunday" +%Y%m%d )
+          echo "CACHE_DATE=$CACHE_DATE" >> $GITHUB_ENV
+      - name: R Library Cache
+        uses: actions/cache@v4
+        with:
+          key: inference-rlibs-${{ runner.os }}-${{ hashFiles('flepimop/R_packages/flepicommon/DESCRIPTION', 'flepimop/R_packages/flepicommon/NAMESPACE', 'flepimop/R_packages/inference/DESCRIPTION', 'flepimop/R_packages/inference/NAMESPACE') }}-${{ env.R_LIBPATH_CKSUM }}-${{ env.CACHE_DATE }}
+          path: ${{ env.R_LIBPATH }}
+      - name: Install R Dependencies For flepicommon
+        if: steps.r-library-cache.outputs.cache-hit != 'true'
+        run: |
+          install.packages(
+            "devtools",
+            repos = "https://cloud.r-project.org",
+          )
+          library(devtools)
+          devtools::install_deps(
+            pkg = "flepimop/R_packages/flepicommon",
+            dependencies = TRUE
+          )
+          install.packages("covidcast", repos = "https://cloud.r-project.org")
+        shell: Rscript {0}
+      - name: Install The flepicommon Package
+        run: |
+          if ("flepicommon" %in% installed.packages()[,"Package"]) {
+            devtools::uninstall(pkg = "flepicommon")
+          }
+          devtools::install(
+            pkg = "flepimop/R_packages/flepicommon",
+            quick = TRUE,
+            dependencies = TRUE,
+          )
+        shell: Rscript {0}
+      - name: Install R Dependencies For inference
+        if: steps.r-library-cache.outputs.cache-hit != 'true'
+        run: |
+          install.packages(
+            "devtools",
+            repos = "https://cloud.r-project.org",
+          )
+          library(devtools)
+          devtools::install_deps(
+            pkg = "flepimop/R_packages/inference",
+            dependencies = TRUE
+          )
+          install.packages("covidcast", repos = "https://cloud.r-project.org")
+            - name: Install The flepicommon Package
+      - name: Install The inference Package
+        run: |
+          if ("inference" %in% installed.packages()[,"Package"]) {
+            devtools::uninstall(pkg = "inference")
+          }
+          devtools::install(
+            pkg = "flepimop/R_packages/inference",
+            args = c(getOption("devtools.install.args"), "--install-tests"),
+            quick = TRUE,
+            dependencies = TRUE,
+          )
+        shell: Rscript {0}
+      - name: Run Tests
         run: |
-          setwd("flepimop/R_packages/inference")
-          devtools::test(stop_on_failure=TRUE)
+          library(testthat)
+          test_package("inference")
         shell: Rscript {0}

From 551fdaa38d92113a36dd1b90d03c17745b4f73f9 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:10:59 -0400
Subject: [PATCH 45/58] Trigger `inference-ci`

Removed whitespace in `flepicommon` package to stop that ci and added
whitespace to the `inference` package to trigger that ci.
---
 flepimop/R_packages/flepicommon/R/config.R              | 1 -
 flepimop/R_packages/inference/R/inference_to_forecast.R | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/flepimop/R_packages/flepicommon/R/config.R b/flepimop/R_packages/flepicommon/R/config.R
index e4e2dd91d..6f65ff562 100644
--- a/flepimop/R_packages/flepicommon/R/config.R
+++ b/flepimop/R_packages/flepicommon/R/config.R
@@ -159,7 +159,6 @@ check_within_bounds <- function(value, obj) {
   }
 }
 
-
 #' @name prettyprint_optlist
 #' @description Print a list of options such that it does not take the whole screen
 #' Display `name : value` \n for all elements.
diff --git a/flepimop/R_packages/inference/R/inference_to_forecast.R b/flepimop/R_packages/inference/R/inference_to_forecast.R
index 13431f056..112afeac8 100644
--- a/flepimop/R_packages/inference/R/inference_to_forecast.R
+++ b/flepimop/R_packages/inference/R/inference_to_forecast.R
@@ -28,6 +28,7 @@ cum_death_forecast <- function (sim_data,
   
 }
 
+
 ##'
 ##' Creates a merged forecast from a data object and a set of sims.
 ##' 

From 7948050862368df500f0dd78932ff4191523881d Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:11:56 -0400
Subject: [PATCH 46/58] Limit to python 3.10 for the moment

---
 .github/workflows/inference-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 5958cbcff..0816205aa 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         R-version: ["4.3.3"]
-        python-version: ["3.10", "3.11"]
+        python-version: ["3.10"]
     steps:
       - name: Checkout
         uses: actions/checkout@v4

From 82fe326da3f21152ab7321240a70fdf56c7461a4 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:14:31 -0400
Subject: [PATCH 47/58] Fix invalid or syntax

---
 .github/workflows/inference-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 0816205aa..8f0e546fe 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -22,7 +22,7 @@ on:
 jobs:
   ci:
     runs-on: ubuntu-latest
-    if: ${{ github.event_name != 'workflow_run' or github.event.workflow_run.conclusion == 'success' }}
+    if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
     strategy:
       matrix:
         R-version: ["4.3.3"]

From db79f30cced20512a9499947a6f4b1dc0b0e552b Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:45:43 -0400
Subject: [PATCH 48/58] Remove dup `devtools` install, erroneous line

---
 .github/workflows/inference-ci.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 8f0e546fe..c841a6dfa 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -89,17 +89,11 @@ jobs:
       - name: Install R Dependencies For inference
         if: steps.r-library-cache.outputs.cache-hit != 'true'
         run: |
-          install.packages(
-            "devtools",
-            repos = "https://cloud.r-project.org",
-          )
-          library(devtools)
           devtools::install_deps(
             pkg = "flepimop/R_packages/inference",
             dependencies = TRUE
           )
           install.packages("covidcast", repos = "https://cloud.r-project.org")
-            - name: Install The flepicommon Package
       - name: Install The inference Package
         run: |
           if ("inference" %in% installed.packages()[,"Package"]) {

From a05500fedc2efbe7d461ef0458b498727379c057 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 15:49:42 -0400
Subject: [PATCH 49/58] Avoid checking out documentation folder

---
 .github/workflows/flepicommon-ci.yml | 4 ++++
 .github/workflows/gempyor-ci.yml     | 4 ++++
 .github/workflows/inference-ci.yml   | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 78c77c016..3b8b26ffd 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -26,6 +26,10 @@ jobs:
         uses: actions/checkout@v4
         with:
           lfs: true
+          sparse-checkout: |
+            *
+            !documentation/
+          sparse-checkout-cone-mode: false
       - name: Setup R
         uses: r-lib/actions/setup-r@v2
         with:
diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index 86b4645a3..c6d9f398d 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -28,6 +28,10 @@ jobs:
         uses: actions/checkout@v4
         with:
           lfs: true
+          sparse-checkout: |
+            *
+            !documentation/
+          sparse-checkout-cone-mode: false
       - name: Setup Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index c841a6dfa..0b9e24e75 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -32,6 +32,10 @@ jobs:
         uses: actions/checkout@v4
         with:
           lfs: true
+          sparse-checkout: |
+            *
+            !documentation/
+          sparse-checkout-cone-mode: false
       - name: Setup Python ${{ matrix.python-version }}
         uses: actions/setup-python@v5
         with:

From 9daa90e51e0e70f527d89549c013fbf8cddc48df Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 16:31:37 -0400
Subject: [PATCH 50/58] Add missing shell specification

---
 .github/workflows/inference-ci.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 0b9e24e75..9999f7090 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -98,6 +98,7 @@ jobs:
             dependencies = TRUE
           )
           install.packages("covidcast", repos = "https://cloud.r-project.org")
+        shell: Rscript {0}
       - name: Install The inference Package
         run: |
           if ("inference" %in% installed.packages()[,"Package"]) {

From 1ea7afee86e3235c47109743664e2b7c6f0ad2c7 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 17:16:47 -0400
Subject: [PATCH 51/58] Add R version to cache

---
 .github/workflows/flepicommon-ci.yml | 2 +-
 .github/workflows/inference-ci.yml   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 3b8b26ffd..8a3fff12e 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -48,7 +48,7 @@ jobs:
       - name: R Library Cache
         uses: actions/cache@v4
         with:
-          key: flepicommon-rlibs-${{ runner.os }}-${{ hashFiles('flepimop/R_packages/flepicommon/DESCRIPTION', 'flepimop/R_packages/flepicommon/NAMESPACE') }}-${{ env.R_LIBPATH_CKSUM }}-${{ env.CACHE_DATE }}
+          key: flepicommon-rlibs-${{ runner.os }}-${{ matrix.R-version }}-${{ hashFiles('flepimop/R_packages/flepicommon/DESCRIPTION', 'flepimop/R_packages/flepicommon/NAMESPACE') }}-${{ env.R_LIBPATH_CKSUM }}-${{ env.CACHE_DATE }}
           path: ${{ env.R_LIBPATH }}
       - name: Install R Dependencies
         if: steps.r-library-cache.outputs.cache-hit != 'true'
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 9999f7090..77f9bd019 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -63,7 +63,7 @@ jobs:
       - name: R Library Cache
         uses: actions/cache@v4
         with:
-          key: inference-rlibs-${{ runner.os }}-${{ hashFiles('flepimop/R_packages/flepicommon/DESCRIPTION', 'flepimop/R_packages/flepicommon/NAMESPACE', 'flepimop/R_packages/inference/DESCRIPTION', 'flepimop/R_packages/inference/NAMESPACE') }}-${{ env.R_LIBPATH_CKSUM }}-${{ env.CACHE_DATE }}
+          key: inference-rlibs-${{ runner.os }}-${{ matrix.R-version }}-${{ hashFiles('flepimop/R_packages/flepicommon/DESCRIPTION', 'flepimop/R_packages/flepicommon/NAMESPACE', 'flepimop/R_packages/inference/DESCRIPTION', 'flepimop/R_packages/inference/NAMESPACE') }}-${{ env.R_LIBPATH_CKSUM }}-${{ env.CACHE_DATE }}
           path: ${{ env.R_LIBPATH }}
       - name: Install R Dependencies For flepicommon
         if: steps.r-library-cache.outputs.cache-hit != 'true'

From 10b4a28e767785d231c32371f2b01e032e418c63 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 17:17:12 -0400
Subject: [PATCH 52/58] Add back python 3.11

---
 .github/workflows/inference-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 77f9bd019..38c8228e8 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -26,7 +26,7 @@ jobs:
     strategy:
       matrix:
         R-version: ["4.3.3"]
-        python-version: ["3.10"]
+        python-version: ["3.10", "3.11"]
     steps:
       - name: Checkout
         uses: actions/checkout@v4

From 6c9aff7fa41bc5ca9c6435f4c8c195fedd272079 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Thu, 17 Oct 2024 17:18:20 -0400
Subject: [PATCH 53/58] Remove whitespace to trigger full CI

---
 flepimop/R_packages/flepicommon/R/DataUtils.R | 1 -
 flepimop/gempyor_pkg/src/gempyor/__init__.py  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/flepimop/R_packages/flepicommon/R/DataUtils.R b/flepimop/R_packages/flepicommon/R/DataUtils.R
index ec94e0c04..7d882c8f0 100755
--- a/flepimop/R_packages/flepicommon/R/DataUtils.R
+++ b/flepimop/R_packages/flepicommon/R/DataUtils.R
@@ -1019,4 +1019,3 @@ get_CSSE_US_matchGlobal_data <- function(){
 
 }
 
-
diff --git a/flepimop/gempyor_pkg/src/gempyor/__init__.py b/flepimop/gempyor_pkg/src/gempyor/__init__.py
index 432754c6c..bdf56083b 100644
--- a/flepimop/gempyor_pkg/src/gempyor/__init__.py
+++ b/flepimop/gempyor_pkg/src/gempyor/__init__.py
@@ -1,4 +1,3 @@
 ## All functions are in minimal inference.
-
 from .inference import *
 from .utils import *

From 161915ed8365fed22ca150a80988ae7f1cb33075 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 18 Oct 2024 08:11:11 -0400
Subject: [PATCH 54/58] Rename 'ci' step to 'tests'

---
 .github/workflows/flepicommon-ci.yml | 2 +-
 .github/workflows/gempyor-ci.yml     | 2 +-
 .github/workflows/inference-ci.yml   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index 8a3fff12e..add58aab0 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -16,7 +16,7 @@ on:
       - dev
 
 jobs:
-  ci:
+  tests:
     runs-on: ubuntu-latest
     strategy:
       matrix:
diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index c6d9f398d..bb21b6938 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -18,7 +18,7 @@ on:
       - dev
 
 jobs:
-  ci:
+  tests:
     runs-on: ubuntu-latest
     strategy:
       matrix:
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 38c8228e8..d94707896 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -20,7 +20,7 @@ on:
       - dev
 
 jobs:
-  ci:
+  tests:
     runs-on: ubuntu-latest
     if: ${{ github.event_name != 'workflow_run' || github.event.workflow_run.conclusion == 'success' }}
     strategy:

From 0bebbb355bcc65dc44f36436f2477708939b3650 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 18 Oct 2024 08:24:15 -0400
Subject: [PATCH 55/58] Remove `devtools::uninstall` calls

---
 .github/workflows/flepicommon-ci.yml | 4 +---
 .github/workflows/inference-ci.yml   | 8 ++------
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index add58aab0..ca2a62415 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -66,14 +66,12 @@ jobs:
         shell: Rscript {0}
       - name: Install The flepicommon Package
         run: |
-          if ("flepicommon" %in% installed.packages()[,"Package"]) {
-            devtools::uninstall(pkg = "flepicommon")
-          }
           devtools::install(
             pkg = "flepimop/R_packages/flepicommon",
             args = c(getOption("devtools.install.args"), "--install-tests"),
             quick = TRUE,
             dependencies = TRUE,
+            force = TRUE
           )
         shell: Rscript {0}
       - name: Run Tests
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index d94707896..54390e130 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -81,13 +81,11 @@ jobs:
         shell: Rscript {0}
       - name: Install The flepicommon Package
         run: |
-          if ("flepicommon" %in% installed.packages()[,"Package"]) {
-            devtools::uninstall(pkg = "flepicommon")
-          }
           devtools::install(
             pkg = "flepimop/R_packages/flepicommon",
             quick = TRUE,
             dependencies = TRUE,
+            force = TRUE
           )
         shell: Rscript {0}
       - name: Install R Dependencies For inference
@@ -101,14 +99,12 @@ jobs:
         shell: Rscript {0}
       - name: Install The inference Package
         run: |
-          if ("inference" %in% installed.packages()[,"Package"]) {
-            devtools::uninstall(pkg = "inference")
-          }
           devtools::install(
             pkg = "flepimop/R_packages/inference",
             args = c(getOption("devtools.install.args"), "--install-tests"),
             quick = TRUE,
             dependencies = TRUE,
+            force = TRUE
           )
         shell: Rscript {0}
       - name: Run Tests

From cd81c5ef3a15ea7cee7877c4ef84f292387dd877 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:13:03 -0400
Subject: [PATCH 56/58] Remove whitespace changes

Remove the changes used to trigger the workflows as an example. Not
needed for the PR.
---
 flepimop/R_packages/flepicommon/R/DataUtils.R           | 1 +
 flepimop/R_packages/inference/R/inference_to_forecast.R | 1 -
 flepimop/gempyor_pkg/src/gempyor/__init__.py            | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/flepimop/R_packages/flepicommon/R/DataUtils.R b/flepimop/R_packages/flepicommon/R/DataUtils.R
index 7d882c8f0..ec94e0c04 100755
--- a/flepimop/R_packages/flepicommon/R/DataUtils.R
+++ b/flepimop/R_packages/flepicommon/R/DataUtils.R
@@ -1019,3 +1019,4 @@ get_CSSE_US_matchGlobal_data <- function(){
 
 }
 
+
diff --git a/flepimop/R_packages/inference/R/inference_to_forecast.R b/flepimop/R_packages/inference/R/inference_to_forecast.R
index 112afeac8..13431f056 100644
--- a/flepimop/R_packages/inference/R/inference_to_forecast.R
+++ b/flepimop/R_packages/inference/R/inference_to_forecast.R
@@ -28,7 +28,6 @@ cum_death_forecast <- function (sim_data,
   
 }
 
-
 ##'
 ##' Creates a merged forecast from a data object and a set of sims.
 ##' 
diff --git a/flepimop/gempyor_pkg/src/gempyor/__init__.py b/flepimop/gempyor_pkg/src/gempyor/__init__.py
index bdf56083b..432754c6c 100644
--- a/flepimop/gempyor_pkg/src/gempyor/__init__.py
+++ b/flepimop/gempyor_pkg/src/gempyor/__init__.py
@@ -1,3 +1,4 @@
 ## All functions are in minimal inference.
+
 from .inference import *
 from .utils import *

From f67ffeb25c0618665bbb3e0884e81eb83b622614 Mon Sep 17 00:00:00 2001
From: Timothy Willard <9395586+TimothyWillard@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:22:58 -0400
Subject: [PATCH 57/58] Remove `dev` branch from triggers

---
 .github/workflows/flepicommon-ci.yml | 2 --
 .github/workflows/gempyor-ci.yml     | 2 --
 .github/workflows/inference-ci.yml   | 2 --
 3 files changed, 6 deletions(-)

diff --git a/.github/workflows/flepicommon-ci.yml b/.github/workflows/flepicommon-ci.yml
index ca2a62415..9f8abc089 100644
--- a/.github/workflows/flepicommon-ci.yml
+++ b/.github/workflows/flepicommon-ci.yml
@@ -7,13 +7,11 @@ on:
       - flepimop/R_packages/flepicommon/**/*
     branches:
       - main
-      - dev
   pull_request:
     paths:
       - flepimop/R_packages/flepicommon/**/*
     branches:
       - main
-      - dev
 
 jobs:
   tests:
diff --git a/.github/workflows/gempyor-ci.yml b/.github/workflows/gempyor-ci.yml
index bb21b6938..e2637f1af 100644
--- a/.github/workflows/gempyor-ci.yml
+++ b/.github/workflows/gempyor-ci.yml
@@ -8,14 +8,12 @@ on:
       - flepimop/gempyor_pkg/**/*
     branches:
       - main
-      - dev
   pull_request:
     paths:
       - examples/**/*
       - flepimop/gempyor_pkg/**/*
     branches:
       - main
-      - dev
 
 jobs:
   tests:
diff --git a/.github/workflows/inference-ci.yml b/.github/workflows/inference-ci.yml
index 54390e130..d80a2e735 100644
--- a/.github/workflows/inference-ci.yml
+++ b/.github/workflows/inference-ci.yml
@@ -11,13 +11,11 @@ on:
       - flepimop/R_packages/inference/**/*
     branches:
       - main
-      - dev
   pull_request:
     paths:
       - flepimop/R_packages/inference/**/*
     branches:
       - main
-      - dev
 
 jobs:
   tests:

From e39c39a0e27d7503e9e3a7dc3a9ac94b5528c044 Mon Sep 17 00:00:00 2001
From: Emily Przykucki <100221052+emprzy@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:45:55 -0400
Subject: [PATCH 58/58] Delete .github/workflows/labeler.yml

---
 .github/workflows/labeler.yml | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 .github/workflows/labeler.yml

diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
deleted file mode 100644
index 2b022fe7a..000000000
--- a/.github/workflows/labeler.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-name: "Issue Labeler"
-on:
-  issues:
-    types: [opened, edited]
-
-permissions:
-  issues: write
-  contents: read
-
-jobs:
-  triage:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: github/issue-labeler@v3.3 
-      with:
-        configuration-path: .github/labeler.yml
-        enable-versioned-regex: 0
-        repo-token: ${{ github.token }}