NorskRegnesentral · martinju · Oct 6, 2024 · May 6, 2024 · May 30, 2024 · May 30, 2024
diff --git a/.Rprofile b/.Rprofile
@@ -1,3 +1,5 @@
+testthat::set_max_fails(Inf)
+
 #' Helper function for package development
 #'
 #' This is a manual extension of [testthat::snapshot_review()] which works for the \code{.rds} files used in
@@ -7,7 +9,7 @@
 #' @param ... Additional arguments passed to [waldo::compare()]
 #' Gives the relative path to the test files to review
 #'
-snapshot_review_man <- function(path, tolerance = NULL, ...) {
+snapshot_review_man <- function(path, tolerance = 10^(-5), max_diffs = 200, ...) {
   changed <- testthat:::snapshot_meta(path)
   these_rds <- (tools::file_ext(changed$name) == "rds")
   if (any(these_rds)) {
@@ -16,7 +18,7 @@ snapshot_review_man <- function(path, tolerance = NULL, ...) {
       new <- readRDS(changed[i, "new"])
 
       cat(paste0("Difference for check ", changed[i, "name"], " in test ", changed[i, "test"], "\n"))
-      print(waldo::compare(old, new, max_diffs = 50, tolerance = tolerance, ...))
+      print(waldo::compare(old, new, max_diffs = max_diffs, tolerance = tolerance, ...))
       browser()
     }
   }

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -27,7 +27,7 @@ Encoding: UTF-8
 LazyData: true
 ByteCompile: true
 Language: en-US
-RoxygenNote: 7.3.1
+RoxygenNote: 7.3.2
 Depends: R (>= 3.5.0)
 Imports: 
     stats,
@@ -40,7 +40,7 @@ Suggests:
     ranger,
     xgboost,
     mgcv,
-    testthat (>= 3.0.0),
+    testthat,
     knitr,
     rmarkdown,
     roxygen2,

diff --git a/NAMESPACE b/NAMESPACE
@@ -52,15 +52,22 @@ S3method(setup_approach,regression_separate)
 S3method(setup_approach,regression_surrogate)
 S3method(setup_approach,timeseries)
 S3method(setup_approach,vaeac)
+export(additional_regression_setup)
 export(aicc_full_single_cpp)
+export(check_convergence)
+export(coalition_matrix_cpp)
+export(compute_estimates)
 export(compute_shapley_new)
+export(compute_time)
 export(compute_vS)
+export(compute_vS_forecast)
 export(correction_matrix_cpp)
+export(create_coalition_table)
 export(explain)
 export(explain_forecast)
-export(feature_combinations)
-export(feature_matrix_cpp)
 export(finalize_explanation)
+export(finalize_explanation_forecast)
+export(get_adaptive_arguments_default)
 export(get_cov_mat)
 export(get_data_specs)
 export(get_model_specs)
@@ -75,17 +82,23 @@ export(predict_model)
 export(prepare_data)
 export(prepare_data_copula_cpp)
 export(prepare_data_gaussian_cpp)
+export(prepare_next_iteration)
+export(print_iter)
 export(regression.train_model)
 export(rss_cpp)
+export(save_results)
 export(setup)
 export(setup_approach)
 export(setup_computation)
+export(shapley_setup)
+export(testing_cleanup)
 export(vaeac_get_evaluation_criteria)
 export(vaeac_get_extra_para_default)
 export(vaeac_plot_eval_crit)
 export(vaeac_plot_imputed_ggpairs)
 export(vaeac_train_model)
 export(vaeac_train_model_continue)
+export(weight_matrix)
 export(weight_matrix_cpp)
 importFrom(Rcpp,sourceCpp)
 importFrom(data.table,":=")
@@ -110,6 +123,7 @@ importFrom(stats,as.formula)
 importFrom(stats,contrasts)
 importFrom(stats,embed)
 importFrom(stats,formula)
+importFrom(stats,median)
 importFrom(stats,model.frame)
 importFrom(stats,model.matrix)
 importFrom(stats,predict)

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -110,15 +110,15 @@ inv_gaussian_transform_cpp <- function(z, x) {
 
 #' Generate (Gaussian) Copula MC samples
 #'
-#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the
+#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_MC_samples`, `n_features`) containing samples from the
 #' univariate standard normal.
 #' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations
 #' to explain on the original scale.
 #' @param x_explain_gaussian_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the
 #' observations to explain after being transformed using the Gaussian transform, i.e., the samples have been
 #' transformed to a standardized normal distribution.
 #' @param x_train_mat arma::mat. Matrix of dimension (`n_train`, `n_features`) containing the training observations.
-#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of
+#' @param S arma::mat. Matrix of dimension (`n_coalitions`, `n_features`) containing binary representations of
 #' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones.
 #' This is not a problem internally in shapr as the empty and grand coalitions treated differently.
 #' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature after being transformed
@@ -127,8 +127,8 @@ inv_gaussian_transform_cpp <- function(z, x) {
 #' between all pairs of features after being transformed using the Gaussian transform, i.e., the samples have been
 #' transformed to a standardized normal distribution.
 #'
-#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where
-#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian
+#' @return An arma::cube/3D array of dimension (`n_MC_samples`, `n_explain` * `n_coalitions`, `n_features`), where
+#' the columns (_,j,_) are matrices of dimension (`n_MC_samples`, `n_features`) containing the conditional Gaussian
 #' copula MC samples for each explicand and coalition on the original scale.
 #'
 #' @export
@@ -140,19 +140,19 @@ prepare_data_copula_cpp <- function(MC_samples_mat, x_explain_mat, x_explain_gau
 
 #' Generate Gaussian MC samples
 #'
-#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_samples`, `n_features`) containing samples from the
+#' @param MC_samples_mat arma::mat. Matrix of dimension (`n_MC_samples`, `n_features`) containing samples from the
 #' univariate standard normal.
 #' @param x_explain_mat arma::mat. Matrix of dimension (`n_explain`, `n_features`) containing the observations
 #' to explain.
-#' @param S arma::mat. Matrix of dimension (`n_combinations`, `n_features`) containing binary representations of
+#' @param S arma::mat. Matrix of dimension (`n_coalitions`, `n_features`) containing binary representations of
 #' the used coalitions. S cannot contain the empty or grand coalition, i.e., a row containing only zeros or ones.
 #' This is not a problem internally in shapr as the empty and grand coalitions treated differently.
 #' @param mu arma::vec. Vector of length `n_features` containing the mean of each feature.
 #' @param cov_mat arma::mat. Matrix of dimension (`n_features`, `n_features`) containing the pairwise covariance
 #' between all pairs of features.
 #'
-#' @return An arma::cube/3D array of dimension (`n_samples`, `n_explain` * `n_coalitions`, `n_features`), where
-#' the columns (_,j,_) are matrices of dimension (`n_samples`, `n_features`) containing the conditional Gaussian
+#' @return An arma::cube/3D array of dimension (`n_MC_samples`, `n_explain` * `n_coalitions`, `n_features`), where
+#' the columns (_,j,_) are matrices of dimension (`n_MC_samples`, `n_features`) containing the conditional Gaussian
 #' MC samples for each explicand and coalition.
 #'
 #' @export
@@ -199,7 +199,7 @@ sample_features_cpp <- function(m, n_features) {
 #'
 #' @param xtest Numeric matrix. Represents a single test observation.
 #'
-#' @param S Integer matrix of dimension \code{n_combinations x m}, where \code{n_combinations} equals
+#' @param S Integer matrix of dimension \code{n_coalitions x m}, where \code{n_coalitions} equals
 #' the total number of sampled/non-sampled feature combinations and \code{m} equals
 #' the total number of unique features. Note that \code{m = ncol(xtrain)}. See details
 #' for more information.
@@ -228,34 +228,34 @@ observation_impute_cpp <- function(index_xtrain, index_s, xtrain, xtest, S) {
 
 #' Calculate weight matrix
 #'
-#' @param subsets List. Each of the elements equals an integer
+#' @param coalitions List. Each of the elements equals an integer
 #' vector representing a valid combination of features/feature groups.
 #' @param m Integer. Number of features/feature groups
 #' @param n Integer. Number of combinations
 #' @param w Numeric vector of length \code{n}, i.e. \code{w[i]} equals
 #' the Shapley weight of feature/feature group combination \code{i}, represented by
-#' \code{subsets[[i]]}.
+#' \code{coalitions[[i]]}.
 #'
 #' @export
 #' @keywords internal
 #'
 #' @return Matrix of dimension n x m + 1
-#' @author Nikolai Sellereite
-weight_matrix_cpp <- function(subsets, m, n, w) {
-    .Call(`_shapr_weight_matrix_cpp`, subsets, m, n, w)
+#' @author Nikolai Sellereite, Martin Jullum
+weight_matrix_cpp <- function(coalitions, m, n, w) {
+    .Call(`_shapr_weight_matrix_cpp`, coalitions, m, n, w)
 }
 
-#' Get feature matrix
+#' Get coalition matrix
 #'
-#' @param features List
-#' @param m Positive integer. Total number of features
+#' @param coalitions List
+#' @param m Positive integer. Total number of coalitions
 #'
 #' @export
 #' @keywords internal
 #'
 #' @return Matrix
-#' @author Nikolai Sellereite
-feature_matrix_cpp <- function(features, m) {
-    .Call(`_shapr_feature_matrix_cpp`, features, m)
+#' @author Nikolai Sellereite, Martin Jullum
+coalition_matrix_cpp <- function(coalitions, m) {
+    .Call(`_shapr_coalition_matrix_cpp`, coalitions, m)
 }
 
diff --git a/R/approach.R b/R/approach.R
@@ -11,15 +11,29 @@
 setup_approach <- function(internal, ...) {
   approach <- internal$parameters$approach
 
-  this_class <- ""
+  iter <- length(internal$iter_list)
+  X <- internal$iter_list[[iter]]$X
 
-  if (length(approach) > 1) {
-    class(this_class) <- "combined"
+  needs_X <- c("regression_surrogate", "vaeac")
+
+  run_now <- (isFALSE(any(needs_X %in% approach)) && isTRUE(is.null(X))) ||
+    (isTRUE(any(needs_X %in% approach)) && isFALSE(is.null(X)))
+
+  if (isFALSE(run_now)) { # Do nothing
+    return(internal)
   } else {
-    class(this_class) <- approach
-  }
+    this_class <- ""
 
-  UseMethod("setup_approach", this_class)
+    if (length(approach) > 1) {
+      class(this_class) <- "combined"
+    } else {
+      class(this_class) <- approach
+    }
+
+    UseMethod("setup_approach", this_class)
+
+    internal$timing_list$setup_approach <- Sys.time()
+  }
 }
 
 #' @inheritParams default_doc
@@ -49,6 +63,10 @@ setup_approach.combined <- function(internal, ...) {
 #' @export
 #' @keywords internal
 prepare_data <- function(internal, index_features = NULL, ...) {
+  iter <- length(internal$iter_list)
+
+  X <- internal$iter_list[[iter]]$X
+
   # Extract the used approach(es)
   approach <- internal$parameters$approach
 
@@ -57,9 +75,9 @@ prepare_data <- function(internal, index_features = NULL, ...) {
 
   # Check if the user provided one or several approaches.
   if (length(approach) > 1) {
-    # Picks the relevant approach from the internal$objects$X table which list the unique approach of the batch
+    # Picks the relevant approach from the X table which list the unique approach of the batch
     # matches by index_features
-    class(this_class) <- internal$objects$X[id_combination == index_features[1], approach]
+    class(this_class) <- X[id_coalition == index_features[1], approach]
   } else {
     # Only one approach for all coalitions sizes
     class(this_class) <- approach

diff --git a/R/approach_categorical.R b/R/approach_categorical.R
@@ -96,8 +96,11 @@ prepare_data.categorical <- function(internal, index_features = NULL, ...) {
 
   joint_probability_dt <- internal$parameters$categorical.joint_prob_dt
 
-  X <- internal$objects$X
-  S <- internal$objects$S
+  iter <- length(internal$iter_list)
+
+  X <- internal$iter_list[[iter]]$X
+  S <- internal$iter_list[[iter]]$S
+
 
   if (is.null(index_features)) { # 2,3
     features <- X$features # list of [1], [2], [2, 3]
@@ -106,9 +109,9 @@ prepare_data.categorical <- function(internal, index_features = NULL, ...) {
   }
   feature_names <- internal$parameters$feature_names
 
-  # 3 id columns: id, id_combination, and id_all
+  # 3 id columns: id, id_coalition, and id_all
   # id: for each x_explain observation
-  # id_combination: the rows of the S matrix
+  # id_coalition: the rows of the S matrix
   # id_all: identifies the unique combinations of feature values from
   # the training data (not necessarily the ones in the explain data)
 
@@ -118,9 +121,9 @@ prepare_data.categorical <- function(internal, index_features = NULL, ...) {
 
   S_dt <- data.table::data.table(S)
   S_dt[S_dt == 0] <- NA
-  S_dt[, id_combination := seq_len(nrow(S_dt))]
+  S_dt[, id_coalition := seq_len(nrow(S_dt))]
 
-  data.table::setnames(S_dt, c(feature_conditioned, "id_combination"))
+  data.table::setnames(S_dt, c(feature_conditioned, "id_coalition"))
 
   # (1) Compute marginal probabilities
 
@@ -153,21 +156,21 @@ prepare_data.categorical <- function(internal, index_features = NULL, ...) {
 
   cond_dt <- j_S_all_feat[marg_dt, on = feature_conditioned]
   cond_dt[, cond_prob := joint_prob / marg_prob]
-  cond_dt[id_combination == 1, marg_prob := 0]
-  cond_dt[id_combination == 1, cond_prob := 1]
+  cond_dt[id_coalition == 1, marg_prob := 0]
+  cond_dt[id_coalition == 1, cond_prob := 1]
 
   # check marginal probabilities
   cond_dt_unique <- unique(cond_dt, by = feature_conditioned)
-  check <- cond_dt_unique[id_combination != 1][, .(sum_prob = sum(marg_prob)),
-    by = "id_combination"
+  check <- cond_dt_unique[id_coalition != 1][, .(sum_prob = sum(marg_prob)),
+    by = "id_coalition"
   ][["sum_prob"]]
   if (!all(round(check) == 1)) {
     print("Warning - not all marginal probabilities sum to 1. There could be a problem
           with the joint probabilities. Consider checking.")
   }
 
   # make x_explain
-  data.table::setkeyv(cond_dt, c("id_combination", "id_all"))
+  data.table::setkeyv(cond_dt, c("id_coalition", "id_all"))
   x_explain_with_id <- data.table::copy(x_explain)[, id := .I]
   dt_just_explain <- cond_dt[x_explain_with_id, on = feature_names]
 
@@ -178,22 +181,22 @@ prepare_data.categorical <- function(internal, index_features = NULL, ...) {
   dt <- cond_dt[dt_explain_just_conditioned, on = feature_conditioned, allow.cartesian = TRUE]
 
   # check conditional probabilities
-  check <- dt[id_combination != 1][, .(sum_prob = sum(cond_prob)),
-    by = c("id_combination", "id")
+  check <- dt[id_coalition != 1][, .(sum_prob = sum(cond_prob)),
+    by = c("id_coalition", "id")
   ][["sum_prob"]]
   if (!all(round(check) == 1)) {
     print("Warning - not all conditional probabilities sum to 1. There could be a problem
           with the joint probabilities. Consider checking.")
   }
 
   setnames(dt, "cond_prob", "w")
-  data.table::setkeyv(dt, c("id_combination", "id"))
+  data.table::setkeyv(dt, c("id_coalition", "id"))
 
   # here we merge so that we only return the combintations found in our actual explain data
   # this merge does not change the number of rows in dt
-  # dt <- merge(dt, x$X[, .(id_combination, n_features)], by = "id_combination")
+  # dt <- merge(dt, x$X[, .(id_coalition, n_features)], by = "id_coalition")
   # dt[n_features %in% c(0, ncol(x_explain)), w := 1.0]
-  dt[id_combination %in% c(1, 2^ncol(x_explain)), w := 1.0]
-  ret_col <- c("id_combination", "id", feature_names, "w")
-  return(dt[id_combination %in% index_features, mget(ret_col)])
+  dt[id_coalition %in% c(1, 2^ncol(x_explain)), w := 1.0]
+  ret_col <- c("id_coalition", "id", feature_names, "w")
+  return(dt[id_coalition %in% index_features, mget(ret_col)])
 }