Merge branch 'breaking-improvments' of https://github.com/HopkinsIDD/…

…flepiMoP into breaking-improvments
HopkinsIDD · Nov 10, 2023 · f32d946 · f32d946
2 parents 5d019ca + a80a672
commit f32d946
Show file tree

Hide file tree

Showing 73 changed files with 3,843 additions and 954 deletions.
diff --git a/.gitignore b/.gitignore
@@ -65,3 +65,9 @@ Outcomes.egg-info/
 
 # R package manuals
 man/
+flepimop/gempyor_pkg/.coverage
+flepimop/gempyor_pkg/.coverage.kojis-mbp-8.sph.ad.jhsph.edu.6137.959542
+flepimop/gempyor_pkg/get_value.prof
+flepimop/gempyor_pkg/tests/seir/.coverage
+flepimop/gempyor_pkg/tests/seir/.coverage.kojis-mbp-8.sph.ad.jhsph.edu.90615.974746
+flepimop/gempyor_pkg/.coverage
diff --git a/batch/SLURM_inference_job.run b/batch/SLURM_inference_job.run
@@ -177,7 +177,7 @@ if [[ $S3_UPLOAD == "true" ]]; then
             export FILENAME=$(python -c "from gempyor import file_paths; print(file_paths.create_file_name(run_id='$FLEPI_RUN_INDEX',
                                                                                                         prefix='$FLEPI_PREFIX/$FLEPI_RUN_INDEX',
                                                                                                         inference_filepath_suffix='chimeric/intermediate',
-                                                                                                        iference_filename_prefix='%09d.'% $FLEPI_SLOT_INDEX,
+                                                                                                        inference_filename_prefix='%09d.'% $FLEPI_SLOT_INDEX,
                                                                                                         index=$FLEPI_BLOCK_INDEX,
                                                                                                         ftype='$type',
                                                                                                         extension='csv'))")

diff --git a/flepimop/R_packages/config.writer/R/yaml_utils.R b/flepimop/R_packages/config.writer/R/yaml_utils.R
@@ -301,7 +301,7 @@ print_value1 <- function(value_type, value_dist, value_mean,
     space3 <- rep(" ", indent_space + 4) %>% paste0(collapse = "")
 
     print_val <- ""
-    if (value_type == "timeseriess" && !is.null(value_type)){
+    if (value_type == "timeseries" && !is.null(value_type)){
         print_val <- paste0(print_val,
                             space, "timeseries: ", value_mean$timeseries, "\n")
 
@@ -1974,15 +1974,19 @@ seir_chunk <- function(resume_modifier = NULL,
                    "      proportion_exponent: [\n",
                    "        [",rate_propexp_parts, ",\"1\",\"1\",\"1\"],\n",
                    "        [",rate_alpha_parts, ",\"1\",\"1\",\"1\"]]\n",
-                   "      rate: [\n",
-                   paste0(sapply(X = na.omit(c(rate_seir_parts, rate_vacc_parts, rate_var_parts, rate_age_parts)),
-                              function(x = X){ paste0("        ",x,",\n")}) ),
-                   "      ]\n"),
+                   "      rate: [",
+                   ifelse(nchar(rate_seir_parts)<100,
+                          paste(na.omit(c(rate_seir_parts, rate_vacc_parts, rate_var_parts, rate_age_parts)), collapse = ", "),
+                          paste0("\n        ", paste(na.omit(c(rate_seir_parts, rate_vacc_parts, rate_var_parts, rate_age_parts)), collapse = ",\n        "))),
+                   "]\n"),
                paste0(
                    "      proportional_to: [\"source\"]\n",
                    "      proportion_exponent: [[\"1\",\"1\",\"1\",\"1\"]]\n",
-                   "      rate: [", paste(na.omit(c(rate_seir_parts, rate_vacc_parts, rate_var_parts, rate_age_parts)), collapse = ", "), "]\n")),
-                   # "      rate: [", glue::glue_collapse(na.omit(c(rate_seir_parts, rate_vacc_parts, rate_var_parts, rate_age_parts)), collapse = ", "), "]\n")),
+                   "      rate: [",
+                   ifelse(nchar(rate_seir_parts)<100,
+                          paste(na.omit(c(rate_seir_parts, rate_vacc_parts, rate_var_parts, rate_age_parts)), collapse = ", "),
+                          paste0("\n        ", paste(na.omit(c(rate_seir_parts, rate_vacc_parts, rate_var_parts, rate_age_parts)), collapse = ",\n        "))),
+                   "]\n")),
         "\n")
 
     return(tmp)

diff --git a/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R b/flepimop/R_packages/inference/R/inference_slot_runner_funcs.R
@@ -731,7 +731,7 @@ initialize_mcmc_first_block <- function(
     # These functions save variables to files of the form variable/name/seir_modifiers_scenario/outcome_modifiers_scenario/run_id/global/intermediate/slot.(block-1),runID.variable.ext
     if (any(checked_par_files %in% global_file_names)) {
         if (!all(checked_par_files %in% global_file_names)) {
-            stop("Provided some InferenceSimulator input, but not all")
+            stop("Provided some GempyorSimulator input, but not all")
         }
         if (any(checked_sim_files %in% global_file_names)) {
             if (!all(checked_sim_files %in% global_file_names)) {
@@ -747,7 +747,7 @@ initialize_mcmc_first_block <- function(
             })
             #gempyor_inference_runner$one_simulation(sim_id2write = block - 1)
         } else {
-            stop("Provided some InferenceSimulator output(seir, hosp), but not InferenceSimulator input")
+            stop("Provided some GempyorSimulator output(seir, hosp), but not GempyorSimulator input")
         }
     } else {
         if (any(checked_sim_files %in% global_file_names)) {

diff --git a/flepimop/gempyor_pkg/src/gempyor/interface.py b/flepimop/gempyor_pkg/src/gempyor/interface.py
@@ -50,7 +50,6 @@ def __init__(
         stoch_traj_flag=False,
         rng_seed=None,
         nslots=1,
-        initialize=True,
         inference_filename_prefix="",  # usually for {global or chimeric}/{intermediate or final}
         inference_filepath_suffix="",  # usually for the slot_id
         out_run_id=None,  # if out_run_id is different from in_run_id, fill this
@@ -100,7 +99,7 @@ def __init__(
             f"""  gempyor >> prefix: {self.modinf.in_prefix};"""  # ti: {s.ti}; tf: {s.tf};
         )
 
-        self.already_built = False  # whether we have already build the costly objects that need just one build
+        self.already_built = False  # whether we have already built the costly objects that need just one build
 
     def update_prefix(self, new_prefix, new_out_prefix=None):
         self.modinf.in_prefix = new_prefix

diff --git a/flepimop/gempyor_pkg/src/gempyor/model_info.py b/flepimop/gempyor_pkg/src/gempyor/model_info.py
@@ -258,13 +258,13 @@ def get_setup_name(self):
         return self.setup_name
 
     def read_simID(self, ftype: str, sim_id: int, input: bool = True, extension_override: str = ""):
-        fname=self.get_filename(
-                ftype=ftype,
-                sim_id=sim_id,
-                input=input,
-                extension_override=extension_override,
-            )
-        #print(f"Readings {fname}")
+        fname = self.get_filename(
+            ftype=ftype,
+            sim_id=sim_id,
+            input=input,
+            extension_override=extension_override,
+        )
+        # print(f"Readings {fname}")
         return read_df(fname=fname)
 
     def write_simID(
@@ -281,7 +281,7 @@ def write_simID(
             input=input,
             extension_override=extension_override,
         )
-        #print(f"Writing {fname}")
+        # print(f"Writing {fname}")
         write_df(
             fname=fname,
             df=df,

diff --git a/flepimop/gempyor_pkg/src/gempyor/parameters.py b/flepimop/gempyor_pkg/src/gempyor/parameters.py
@@ -71,17 +71,20 @@ def __init__(
                     print("loaded dates:", df.index)
                     raise ValueError(
                         f"""ERROR loading file {fn_name} for parameter {pn}: 
-                    the 'date' index of the provided file does not cover the whole config time span from
-                    {ti}->{tf}, where we have dates from {str(df.index[0])} to {str(df.index[-1])}"""
+                    the 'date' entries of the provided file do not include all the days specified to be modeled by 
+                    the config. the provided file includes {len(df.index)} days between {str(df.index[0])} to {str(df.index[-1])}, 
+                    while there are {len(pd.date_range(ti, tf))} days in the config time span of {ti}->{tf}. The file must contain entries for the
+                    the exact start and end dates from the config. """
                     )
-                # check the date range, need the lenght to be equal
                 if not (pd.date_range(ti, tf) == df.index).all():
                     print("config dates:", pd.date_range(ti, tf))
                     print("loaded dates:", df.index)
                     raise ValueError(
                         f"""ERROR loading file {fn_name} for parameter {pn}: 
-                    the 'date' index of the provided file does not cover the whole config time span from
-                    {ti}->{tf}"""
+                    the 'date' entries of the provided file do not include all the days specified to be modeled by 
+                    the config. the provided file includes {len(df.index)} days between {str(df.index[0])} to {str(df.index[-1])}, 
+                    while there are {len(pd.date_range(ti, tf))} days in the config time span of {ti}->{tf}. The file must contain entries for the
+                    the exact start and end dates from the config. """
                     )
 
                 self.pdata[pn]["ts"] = df

diff --git a/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py b/flepimop/gempyor_pkg/src/gempyor/seeding_ic.py
@@ -34,6 +34,8 @@ def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict:
 
     n_seeding_ignored_before = 0
     n_seeding_ignored_after = 0
+
+    #id_seed = 0
     for idx, (row_index, row) in enumerate(df.iterrows()):
         if row["subpop"] not in setup.subpop_struct.subpop_names:
             raise ValueError(
@@ -42,6 +44,7 @@ def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict:
 
         if (row["date"].date() - setup.ti).days >= 0:
             if (row["date"].date() - setup.ti).days < len(nb_seed_perday):
+
                 nb_seed_perday[(row["date"].date() - setup.ti).days] = (
                     nb_seed_perday[(row["date"].date() - setup.ti).days] + 1
                 )
@@ -51,6 +54,7 @@ def _DataFrame2NumbaDict(df, amounts, setup) -> nb.typed.Dict:
                 seeding_dict["seeding_destinations"][idx] = setup.compartments.get_comp_idx(destination_dict)
                 seeding_dict["seeding_subpops"][idx] = setup.subpop_struct.subpop_names.index(row["subpop"])
                 seeding_amounts[idx] = amounts[idx]
+                #id_seed+=1
             else:
                 n_seeding_ignored_after += 1
         else:
@@ -160,7 +164,7 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray:
                     )
         elif method == "InitialConditionsFolderDraw" or method == "FromFile":
             if method == "InitialConditionsFolderDraw":
-                ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"], sim_id=sim_id)
+                ic_df = setup.read_simID(ftype=self.initial_conditions_config["initial_file_type"].get(), sim_id=sim_id)
             elif method == "FromFile":
                 ic_df = read_df(
                     self.initial_conditions_config["initial_conditions_file"].get(),
@@ -250,9 +254,13 @@ def draw_ic(self, sim_id: int, setup) -> np.ndarray:
             if self.initial_conditions_config["ignore_population_checks"].get():
                 ignore_population_checks = True
         if error and not ignore_population_checks:
-            raise ValueError(f""" geodata and initial condition do not agree on population size (see messages above). Use ignore_population_checks: True to ignore""")
+            raise ValueError(
+                f""" geodata and initial condition do not agree on population size (see messages above). Use ignore_population_checks: True to ignore"""
+            )
         elif error and ignore_population_checks:
-            print(""" Ignoring the previous population mismatch errors because you added flag 'ignore_population_checks'. This is dangerous""")
+            print(
+                """ Ignoring the previous population mismatch errors because you added flag 'ignore_population_checks'. This is dangerous"""
+            )
         return y0
 
     def draw_seeding(self, sim_id: int, setup) -> nb.typed.Dict:
@@ -295,7 +303,15 @@ def draw_seeding(self, sim_id: int, setup) -> nb.typed.Dict:
             raise NotImplementedError(f"unknown seeding method [got: {method}]")
 
         # Sorting by date is very important here for the seeding format necessary !!!!
+        print(seeding.shape)
         seeding = seeding.sort_values(by="date", axis="index").reset_index()
+        print(seeding)
+        mask = (seeding['date'].dt.date > setup.ti) & (seeding['date'].dt.date <= setup.tf)
+        seeding = seeding.loc[mask].reset_index()
+        print(seeding.shape)
+        print(seeding)
+
+        # TODO: print.
 
         amounts = np.zeros(len(seeding))
         if method == "PoissonDistributed":
@@ -306,6 +322,7 @@ def draw_seeding(self, sim_id: int, setup) -> nb.typed.Dict:
         elif method == "FolderDraw" or method == "FromFile":
             amounts = seeding["amount"]
 
+
         return _DataFrame2NumbaDict(df=seeding, amounts=amounts, setup=setup)
 
     def load_seeding(self, sim_id: int, setup) -> nb.typed.Dict:

diff --git a/flepimop/gempyor_pkg/src/gempyor/seir.py b/flepimop/gempyor_pkg/src/gempyor/seir.py
@@ -43,7 +43,7 @@ def build_step_source_arg(
         dt = 2.0
         logging.info(f"Integration method not provided, assuming type {integration_method} with dt=2")
 
-    assert type(modinf.mobility) == scipy.sparse.csr.csr_matrix
+    assert type(modinf.mobility) == scipy.sparse.csr_matrix
     mobility_data = modinf.mobility.data
     mobility_data = mobility_data.astype("float64")
     assert type(modinf.compartments.compartments.shape[0]) == int

diff --git a/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py b/flepimop/gempyor_pkg/src/gempyor/steps_rk4.py
@@ -145,6 +145,7 @@ def rhs(t, x, today):
                     number_move = source_number * compound_adjusted_rate  ## to initialize typ
                     for spatial_node in range(nspatial_nodes):
                         number_move[spatial_node] = np.random.binomial(
+                            # number_move[spatial_node] = random.binomial(
                             source_number[spatial_node],
                             compound_adjusted_rate[spatial_node],
                         )
@@ -316,6 +317,6 @@ def rk4_integrate(t, x, today):
         print(
             "load the name space with: \nwith open('integration_dump.pkl','rb') as fn_dump:\n    states, states_daily_incid, ncompartments, nspatial_nodes, ndays, parameters, dt, transitions, proportion_info,  transition_sum_compartments, initial_conditions, seeding_data, seeding_amounts, mobility_data, mobility_row_indices, mobility_data_indices, population,  stochastic_p,  method = pickle.load(fn_dump)"
         )
-        print("/!\ Invalid integration, will cause problems for downstream users /!\ ")
+        print("/!\\ Invalid integration, will cause problems for downstream users /!\\ ")
         # raise ValueError("Invalid Integration...")
     return states, states_daily_incid
diff --git a/flepimop/gempyor_pkg/src/gempyor/utils.py b/flepimop/gempyor_pkg/src/gempyor/utils.py
@@ -190,12 +190,16 @@ def as_random_distribution(self):
         elif dist == "poisson":
             return functools.partial(np.random.poisson, self["lam"].as_evaled_expression())
         elif dist == "binomial":
-            if (self["p"] < 0) or (self["p"] > 1):
-                raise ValueError(f"""p value { self["p"] } is out of range [0,1]""")
+            p = self["p"].as_evaled_expression()
+            if (p < 0) or (p > 1):
+                raise ValueError(f"""p value { p } is out of range [0,1]""")
+                # if (self["p"] < 0) or (self["p"] > 1):
+                #    raise ValueError(f"""p value { self["p"] } is out of range [0,1]""")
             return functools.partial(
                 np.random.binomial,
                 self["n"].as_evaled_expression(),
-                self["p"].as_evaled_expression(),
+                # self["p"].as_evaled_expression(),
+                p,
             )
         elif dist == "truncnorm":
             return get_truncated_normal(

diff --git a/flepimop/gempyor_pkg/tests/interface/data/config.yml b/flepimop/gempyor_pkg/tests/interface/data/config.yml
@@ -0,0 +1,123 @@
+name: minimal for interface
+setup_name: minimal4interface
+start_date: 2020-01-31
+end_date: 2020-05-31
+data_path: data
+nslots: 1
+
+
+spatial_setup:
+  geodata: geodata.csv
+  mobility: mobility.csv
+  popnodes_key: population
+  subpop_names_key: subpop
+
+seeding:
+  method: FolderDraw
+  seeding_file_type: seed
+
+initial_conditions:
+  method: Default
+
+compartments:
+  infection_stage: ["S", "E", "I1", "I2", "I3", "R"]
+  vaccination_stage: ["unvaccinated"]
+
+seir:
+  integration:
+    method: legacy
+    dt: 1/6
+  parameters:
+    alpha:
+      value:
+        distribution: fixed
+        value: .9
+    sigma:
+      value:
+        distribution: fixed
+        value: 1 / 5.2
+    gamma:
+      value:
+        distribution: uniform
+        low: 1 / 6
+        high: 1 / 2.6
+    R0s:
+      value:
+        distribution: uniform
+        low: 2
+        high: 3
+  transitions:
+    - source: ["S", "unvaccinated"]
+      destination: ["E", "unvaccinated"]
+      rate: ["R0s * gamma", 1]
+      proportional_to: [
+          ["S", "unvaccinated"],
+          [[["I1", "I2", "I3"]], "unvaccinated"],
+      ]
+      proportion_exponent: [["1", "1"], ["alpha", "1"]] 
+    - source: [["E"], ["unvaccinated"]]
+      destination: [["I1"], ["unvaccinated"]]
+      rate: ["sigma", 1]
+      proportional_to: [[["E"], ["unvaccinated"]]]
+      proportion_exponent: [["1", "1"]]
+    - source: [["I1"], ["unvaccinated"]]
+      destination: [["I2"], ["unvaccinated"]]
+      rate: ["3 * gamma", 1]
+      proportional_to: [[["I1"], ["unvaccinated"]]]
+      proportion_exponent: [["1", "1"]]
+    - source: [["I2"], ["unvaccinated"]]
+      destination: [["I3"], ["unvaccinated"]]
+      rate: ["3 * gamma", 1]
+      proportional_to: [[["I2"], ["unvaccinated"]]]
+      proportion_exponent: [["1", "1"]]
+    - source: [["I3"], ["unvaccinated"]]
+      destination: [["R"], ["unvaccinated"]]
+      rate: ["3 * gamma", 1]
+      proportional_to: [[["I3"], ["unvaccinated"]]]
+      proportion_exponent: [["1", "1"]]
+
+interventions:
+  scenarios:
+    - None
+    - Scenario1
+    - Scenario2
+  settings:
+    None:
+      template: SinglePeriodModifier
+      parameter: r0
+      period_start_date: 2020-04-01
+      period_end_date: 2020-05-15
+      value:
+        distribution: fixed
+        value: 0
+    Wuhan:
+      template: SinglePeriodModifier
+      parameter: r0
+      period_start_date: 2020-04-01
+      period_end_date: 2020-05-15
+      value:
+        distribution: uniform
+        low: .14
+        high: .33
+    KansasCity:
+      template: MultiPeriodModifier
+      parameter: r0
+      groups:
+        - periods:
+            - start_date: 2020-04-01
+              end_date: 2020-05-15
+          subpop: "all"
+      value:
+        distribution: uniform
+        low: .04
+        high: .23
+    Scenario1:
+      template: StackedModifier
+      scenarios:
+        - KansasCity
+        - Wuhan
+        - None
+    Scenario2:
+      template: StackedModifier
+      scenarios:
+        - Wuhan