diff --git a/NAMESPACE b/NAMESPACE index 78a6a505e..21007d8b6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -60,13 +60,11 @@ export(compute_estimates) export(compute_shapley_new) export(compute_time) export(compute_vS) -export(compute_vS_forecast) export(correction_matrix_cpp) export(create_coalition_table) export(explain) export(explain_forecast) export(finalize_explanation) -export(finalize_explanation_forecast) export(get_adaptive_arguments_default) export(get_cov_mat) export(get_data_specs) diff --git a/R/check_convergence.R b/R/check_convergence.R index 2f2ccc122..4b0710666 100644 --- a/R/check_convergence.R +++ b/R/check_convergence.R @@ -20,7 +20,7 @@ check_convergence <- function(internal) { n_sampled_coalitions <- internal$iter_list[[iter]]$n_coalitions - 2 # Subtract the zero and full predictions - max_sd <- dt_shapley_sd[, max(.SD), .SDcols = -1, by = .I]$V1 # Max per prediction + max_sd <- dt_shapley_sd[, max(.SD, na.rm = TRUE), .SDcols = -1, by = .I]$V1 # Max per prediction max_sd0 <- max_sd * sqrt(n_sampled_coalitions) # Scales UP the sd as it scales at this rate dt_shapley_est0 <- copy(dt_shapley_est) @@ -33,8 +33,8 @@ check_convergence <- function(internal) { } else { converged_exact <- FALSE if (!is.null(convergence_tolerance)) { - dt_shapley_est0[, maxval := max(.SD), .SDcols = -c(1, 2), by = .I] - dt_shapley_est0[, minval := min(.SD), .SDcols = -c(1, 2), by = .I] + dt_shapley_est0[, maxval := max(.SD, na.rm = TRUE), .SDcols = -c(1, 2), by = .I] + dt_shapley_est0[, minval := min(.SD, na.rm = TRUE), .SDcols = -c(1, 2), by = .I] dt_shapley_est0[, max_sd0 := max_sd0] dt_shapley_est0[, req_samples := (max_sd0 / ((maxval - minval) * convergence_tolerance))^2] dt_shapley_est0[, conv_measure := max_sd0 / ((maxval - minval) * sqrt(n_sampled_coalitions))] diff --git a/R/compute_estimates.R b/R/compute_estimates.R index 1e548579e..46bbb5833 100644 --- a/R/compute_estimates.R +++ b/R/compute_estimates.R @@ -8,7 +8,7 @@ #' @keywords internal compute_estimates <- function(internal, vS_list) { verbose <- internal$parameters$verbose - cli_id <- internal$parameter$cli_id + type <- internal$parameters$type internal$timing_list$compute_vS <- Sys.time() @@ -40,7 +40,7 @@ compute_estimates <- function(internal, vS_list) { cli::cli_progress_step("Boostrapping Shapley value sds") } - dt_shapley_sd <- bootstrap_shapley_new(internal, n_boot_samps = n_boot_samps, processed_vS_list$dt_vS) + dt_shapley_sd <- bootstrap_shapley(internal, n_boot_samps = n_boot_samps, processed_vS_list$dt_vS) internal$timing_list$compute_bootstrap <- Sys.time() } else { @@ -50,10 +50,12 @@ compute_estimates <- function(internal, vS_list) { # Adding explain_id to the output dt - dt_shapley_est[, explain_id := .I] - setcolorder(dt_shapley_est, "explain_id") - dt_shapley_sd[, explain_id := .I] - setcolorder(dt_shapley_sd, "explain_id") + if (type != "forecast") { + dt_shapley_est[, explain_id := .I] + setcolorder(dt_shapley_est, "explain_id") + dt_shapley_sd[, explain_id := .I] + setcolorder(dt_shapley_sd, "explain_id") + } internal$iter_list[[iter]]$dt_shapley_est <- dt_shapley_est @@ -137,9 +139,10 @@ compute_shapley_new <- function(internal, dt_vS) { # If multiple horizons with explain_forecast are used, we only distribute value to those used at each horizon if (type == "forecast") { - id_coalition_mapper_dt <- internal$objects$id_coalition_mapper_dt + id_coalition_mapper_dt <- internal$iter_list[[iter]]$id_coalition_mapper_dt horizon <- internal$parameters$horizon cols_per_horizon <- internal$objects$cols_per_horizon + shap_names <- internal$parameters$shap_names W_list <- internal$objects$W_list kshap_list <- list() @@ -260,21 +263,47 @@ bootstrap_shapley <- function(internal, dt_vS, n_boot_samps = 100, seed = 123) { return(dt_kshap_boot_sd) } -bootstrap_shapley_new <- function(internal, dt_vS, n_boot_samps = 100, seed = 123) { +bootstrap_shapley <- function(internal, dt_vS, n_boot_samps = 100, seed = 123) { iter <- length(internal$iter_list) + type <- internal$parameters$type + is_groupwise <- internal$parameters$is_groupwise + X_list <- internal$iter_list[[iter]]$X_list - X <- internal$iter_list[[iter]]$X + result <- list() + if (type == "forecast") { + n_explain <- internal$parameters$n_explain + for (i in seq_along(X_list)) { + X <- X_list[[i]] + if (is_groupwise) { + n_shapley_values <- length(internal$data$shap_names) + shap_names <- internal$data$shap_names + } else { + n_shapley_values <- length(internal$parameters$horizon_features[[i]]) + shap_names <- internal$parameters$horizon_features[[i]] + } + dt_cols <- c(1, seq_len(n_explain) + (i - 1) * n_explain + 1) + dt_vS_this <- dt_vS[, ..dt_cols] + result[[i]] <- bootstrap_shapley_inner(X, n_shapley_values, shap_names, internal, dt_vS_this, n_boot_samps, seed) + } + result <- rbindlist(result, fill = TRUE) + } else { + X <- internal$iter_list[[iter]]$X + n_shapley_values <- internal$parameters$n_shapley_values + shap_names <- internal$parameters$shap_names + result <- bootstrap_shapley_inner(X, n_shapley_values, shap_names, internal, dt_vS, n_boot_samps, seed) + } + return(result) +} - set.seed(seed) +bootstrap_shapley_inner <- function(X, n_shapley_values, shap_names, internal, dt_vS, n_boot_samps = 100, seed = 123) { + type <- internal$parameters$type + iter <- length(internal$iter_list) - is_groupwise <- internal$parameters$is_groupwise + set.seed(seed) n_explain <- internal$parameters$n_explain paired_shap_sampling <- internal$parameters$paired_shap_sampling shapley_reweight <- internal$parameters$shapley_reweighting - shap_names <- internal$parameters$shap_names - n_shapley_values <- internal$parameters$n_shapley_values - X_org <- copy(X) @@ -300,7 +329,6 @@ bootstrap_shapley_new <- function(internal, dt_vS, n_boot_samps = 100, seed = 12 X_boot00[, boot_id := rep(seq(n_boot_samps), times = n_coalitions_boot / 2)] - X_boot00_paired <- copy(X_boot00[, .(coalitions, boot_id)]) X_boot00_paired[, coalitions := lapply(coalitions, function(x) seq(n_shapley_values)[-x])] X_boot00_paired[, coalitions_tmp := sapply(coalitions, paste, collapse = " ")] @@ -338,7 +366,13 @@ bootstrap_shapley_new <- function(internal, dt_vS, n_boot_samps = 100, seed = 12 X_boot[, sample_freq := .N / n_coalitions_boot, by = .(id_coalition, boot_id)] X_boot <- unique(X_boot, by = c("id_coalition", "boot_id")) X_boot[, shapley_weight := sample_freq] - X_boot[coalition_size %in% c(0, n_shapley_values), shapley_weight := X_org[1, shapley_weight]] + if (type == "forecast") { + id_coalition_mapper_dt <- internal$iter_list[[iter]]$id_coalition_mapper_dt + full_ids <- id_coalition_mapper_dt$id_coalition[id_coalition_mapper_dt$full] + X_boot[coalition_size == 0 | id_coalition %in% full_ids, shapley_weight := X_org[1, shapley_weight]] + } else { + X_boot[coalition_size %in% c(0, n_shapley_values), shapley_weight := X_org[1, shapley_weight]] + } } for (i in seq_len(n_boot_samps)) { diff --git a/R/compute_vS.R b/R/compute_vS.R index 231424b0a..b2e89fce4 100644 --- a/R/compute_vS.R +++ b/R/compute_vS.R @@ -236,8 +236,8 @@ compute_preds <- function( if (type == "forecast") { dt[, (pred_cols) := predict_model( x = model, - newdata = .SD[, 1:n_endo], - newreg = .SD[, -(1:n_endo)], + newdata = .SD[, .SD, .SDcols = seq_len(n_endo)], + newreg = .SD[, .SD, .SDcols = seq_len(length(feature_names) - n_endo) + n_endo], horizon = horizon, explain_idx = explain_idx[id], explain_lags = explain_lags, @@ -263,38 +263,3 @@ compute_MCint <- function(dt, pred_cols = "p_hat") { return(dt_mat) } - - -#' Computes `v(S)` for all features subsets `S`. -#' -#' @inheritParams default_doc -#' @inheritParams explain -#' -#' @param method Character -#' Indicates whether the lappy method (default) or loop method should be used. -#' -#' @export -compute_vS_forecast <- function(internal, model, predict_model, method = "future") { - # old function used only for forecast temporary - S_batch <- internal$objects$S_batch - - - - if (method == "future") { - ret <- future_compute_vS_batch(S_batch = S_batch, internal = internal, model = model, predict_model = predict_model) - } else { - # Doing the same as above without future without progressbar or paralellization - ret <- list() - for (i in seq_along(S_batch)) { - S <- S_batch[[i]] - ret[[i]] <- batch_compute_vS( - S = S, - internal = internal, - model = model, - predict_model = predict_model - ) - } - } - - return(ret) -} diff --git a/R/explain_forecast.R b/R/explain_forecast.R index 2621b816d..55ab1d45c 100644 --- a/R/explain_forecast.R +++ b/R/explain_forecast.R @@ -95,6 +95,9 @@ explain_forecast <- function(model, approach, prediction_zero, max_n_coalitions = NULL, + adaptive = NULL, + adaptive_arguments = list(), + shapley_reweighting = "on_all_cond", group_lags = TRUE, group = NULL, n_MC_samples = 1e3, @@ -104,11 +107,11 @@ explain_forecast <- function(model, get_model_specs = NULL, verbose = "basic", ...) { # ... is further arguments passed to specific approaches - timing_list <- list( - init_time = Sys.time() - ) + init_time <- Sys.time() - set.seed(seed) + if (!is.null(seed)) { + set.seed(seed) + } # Gets and check feature specs from the model feature_specs <- get_feature_specs(get_model_specs, model) @@ -118,7 +121,6 @@ explain_forecast <- function(model, train_idx <- seq.int(from = max(c(explain_y_lags, explain_xreg_lags)), to = nrow(y))[-explain_idx] } - # Sets up and organizes input parameters # Checks the input parameters and their compatability # Checks data/model compatability @@ -128,13 +130,15 @@ explain_forecast <- function(model, output_size = horizon, max_n_coalitions = max_n_coalitions, n_MC_samples = n_MC_samples, - # n_batches = n_batches, # TODO: This is not used anymore, but the code does not use the adaptive version of it - # either I think... I have now just set it to always 10 in the create_S_batches_forecast function. seed = seed, keep_samp_for_vS = keep_samp_for_vS, feature_specs = feature_specs, type = "forecast", horizon = horizon, + adaptive = adaptive, + adaptive_arguments = adaptive_arguments, + shapley_reweighting = shapley_reweighting, + init_time = init_time, y = y, xreg = xreg, train_idx = train_idx, @@ -147,7 +151,6 @@ explain_forecast <- function(model, ... ) - timing_list$setup <- Sys.time() # Gets predict_model (if not passed to explain) predict_model <- get_predict_model( @@ -155,7 +158,6 @@ explain_forecast <- function(model, model = model ) - # Checks that predict_model gives correct format test_predict_model( x_test = head(internal$data$x_train, 2), @@ -164,39 +166,71 @@ explain_forecast <- function(model, internal = internal ) - timing_list$test_prediction <- Sys.time() + internal$timing_list$test_prediction <- Sys.time() + # Setup for approach + internal <- setup_approach(internal, model = model, predict_model = predict_model) - # Sets up the Shapley (sampling) framework and prepares the - # conditional expectation computation for the chosen approach - # Note: model and predict_model are ONLY used by the AICc-methods of approach empirical to find optimal parameters - internal <- setup_computation(internal, model, predict_model) + internal$main_timing_list <- internal$timing_list - timing_list$setup_computation <- Sys.time() + converged <- FALSE + iter <- length(internal$iter_list) + if (!is.null(seed)) { + set.seed(seed) + } - ### Temporary solution for forecast - internal$iter_list[[1]]$X <- internal$objects$X - internal$iter_list[[1]]$S <- internal$objects$S + cli_startup(internal, model, verbose) + while (converged == FALSE) { + cli_iter(verbose, internal, iter) - # Compute the v(S): - # Get the samples for the conditional distributions with the specified approach - # Predict with these samples - # Perform MC integration on these to estimate the conditional expectation (v(S)) - vS_list <- compute_vS_forecast(internal, model, predict_model, method = "regular") + internal$timing_list <- list(init = Sys.time()) - timing_list$compute_vS <- Sys.time() + # setup the Shapley framework + internal <- shapley_setup_forecast(internal) - # Compute Shapley values based on conditional expectations (v(S)) - # Organize function output - output <- finalize_explanation_forecast( - vS_list = vS_list, - internal = internal - ) + # May not need to be called here? + internal <- setup_approach(internal, model = model, predict_model = predict_model) + + # Compute the vS + vS_list <- compute_vS(internal, model, predict_model, method = "regular") + + # Compute Shapley values based on conditional expectations (v(S)) + internal <- compute_estimates( + vS_list = vS_list, + internal = internal + ) + + # Check convergence based on estimates and standard deviations (and thresholds) + internal <- check_convergence(internal) - output$timing <- compute_time(timing_list) + # Save intermediate results + save_results(internal) + # Preparing parameters for next iteration (does not do anything if already converged) + internal <- prepare_next_iteration(internal) + + # Printing iteration information + print_iter(internal) + + ### Setting globals for to simplify the loop + converged <- internal$iter_list[[iter]]$converged + + internal$timing_list$postprocess_res <- Sys.time() + + internal$iter_timing_list[[iter]] <- internal$timing_list + + iter <- iter + 1 + } + + internal$main_timing_list$main_computation <- Sys.time() + + output <- finalize_explanation(internal = internal) + + internal$main_timing_list$finalize_explanation <- Sys.time() + + output$timing <- compute_time(internal) # Some cleanup when doing testing testing <- internal$parameters$testing @@ -314,6 +348,8 @@ get_data_forecast <- function(y, xreg, train_idx, explain_idx, explain_y_lags, e y = y, xreg = xreg, group = reg_fcast$group, + horizon_group = reg_fcast$horizon_group, + shap_names = names(data_lag$group), n_endo = ncol(data_lag$lagged), x_train = cbind( data.table::as.data.table(data_lag$lagged[train_idx, , drop = FALSE]), @@ -366,6 +402,7 @@ lag_data <- function(x, lags) { reg_forecast_setup <- function(x, horizon, group) { fcast <- matrix(NA, nrow(x) - horizon + 1, 0) names <- character() + horizon_group <- lapply(seq_len(horizon), function(i) names(group)[!(names(group) %in% colnames(x))]) for (i in seq_len(ncol(x))) { names_i <- paste0(colnames(x)[i], ".F", seq_len(horizon)) names <- c(names, names_i) @@ -374,8 +411,12 @@ reg_forecast_setup <- function(x, horizon, group) { fcast <- cbind(fcast, fcast_i) # Append group names if the exogenous regressor also has lagged values. - group[[colnames(x)[i]]] <- c(group[[colnames(x)[i]]], names_i) + for (h in seq_len(horizon)) { + group[[paste0(colnames(x)[i], ".", h)]] <- c(group[[colnames(x)[i]]], names_i[seq_len(h)]) + horizon_group[[h]] <- c(horizon_group[[h]], paste0(colnames(x)[i], ".", h)) + } + group[[colnames(x)[i]]] <- NULL } colnames(fcast) <- names - return(list(fcast = fcast, group = group)) + return(list(fcast = fcast, group = group, horizon_group = horizon_group)) } diff --git a/R/finalize_explanation.R b/R/finalize_explanation.R index 8da2e73fb..579a39504 100644 --- a/R/finalize_explanation.R +++ b/R/finalize_explanation.R @@ -220,54 +220,3 @@ compute_MSEv_eval_crit <- function(internal, MSEv_coalition = MSEv_coalition )) } - - -#' Computes the Shapley values given `v(S)` -#' -#' @inherit explain -#' @inheritParams default_doc -#' @param vS_list List -#' Output from [compute_vS()] -#' -#' @export -finalize_explanation_forecast <- function(vS_list, internal) { # Temporary used for forecast only (the old function) - MSEv_uniform_comb_weights <- internal$parameters$MSEv_uniform_comb_weights - - processed_vS_list <- postprocess_vS_list( - vS_list = vS_list, - internal = internal - ) - - # Extract the predictions we are explaining - p <- get_p(processed_vS_list$dt_vS, internal) - - # Compute the Shapley values - dt_shapley <- compute_shapley_new(internal, processed_vS_list$dt_vS) - - # Clearing out the timing lists as they are added to the output separately - internal$main_timing_list <- internal$iter_timing_list <- internal$timing_list <- NULL - - # Clearing out the tmp list with model and predict_model (only added for AICc-types of empirical approach) - internal$tmp <- NULL - - internal$output <- processed_vS_list - - output <- list( - shapley_values = dt_shapley, - internal = internal, - pred_explain = p - ) - attr(output, "class") <- c("shapr", "list") - - # Compute the MSEv evaluation criterion if the output of the predictive model is a scalar. - # TODO: check if it makes sense for output_size > 1. - if (internal$parameters$output_size == 1) { - output$MSEv <- compute_MSEv_eval_crit( - internal = internal, - dt_vS = processed_vS_list$dt_vS, - MSEv_uniform_comb_weights = MSEv_uniform_comb_weights - ) - } - - return(output) -} diff --git a/R/get_predict_model.R b/R/get_predict_model.R index 93577e8b9..4ffec6a45 100644 --- a/R/get_predict_model.R +++ b/R/get_predict_model.R @@ -43,8 +43,11 @@ test_predict_model <- function(x_test, predict_model, model, internal) { if (!is.null(internal$parameters$type) && internal$parameters$type == "forecast") { tmp <- tryCatch(predict_model( x = model, - newdata = x_test[, 1:internal$data$n_endo, drop = FALSE], - newreg = x_test[, -(1:internal$data$n_endo), drop = FALSE], + newdata = x_test[, .SD, .SDcols = seq_len(internal$data$n_endo), drop = FALSE], + newreg = x_test[, .SD, + .SDcols = seq_len(ncol(x_test) - internal$data$n_endo) + internal$data$n_endo, + drop = FALSE + ], horizon = internal$parameters$horizon, explain_idx = rep(internal$parameters$explain_idx[1], 2), y = internal$data$y, diff --git a/R/model_arima.R b/R/model_arima.R index 7f53cd6cc..2b2a70d46 100644 --- a/R/model_arima.R +++ b/R/model_arima.R @@ -5,29 +5,32 @@ predict_model.Arima <- function(x, newdata, newreg, horizon, explain_idx, explai stop("The stats package is required for predicting stats models") } - prediction <- matrix(NA, nrow(newdata), horizon) - newdata <- as.matrix(newdata) + prediction <- matrix(NA, length(explain_idx), horizon) + newdata <- as.matrix(newdata, nrow = length(explain_idx)) newreg <- as.matrix(newreg) newdata_y_cols <- seq_len(explain_lags$y) newdata_xreg_cols_list <- lapply(paste0("xreg", seq_along(explain_lags$xreg)), function(x) grep(x, colnames(newdata))) exp_idx <- -1 - for (i in seq_len(nrow(newdata))) { + for (i in seq_len(length(explain_idx))) { if (explain_idx[i] != exp_idx) { exp_idx <- explain_idx[i] y_hist <- y[seq_len(exp_idx)] xreg_hist <- xreg[seq_len(exp_idx), , drop = FALSE] } - y_new <- as.numeric(newdata[i, newdata_y_cols]) - y_hist[seq.int(length.out = length(y_new), to = length(y_hist))] <- rev(y_new) + if (ncol(newdata) > 0) { + y_new <- as.numeric(newdata[i, newdata_y_cols]) + y_hist[seq.int(length.out = length(y_new), to = length(y_hist))] <- rev(y_new) + } if (ncol(xreg) == 0) { x <- forecast::Arima(y = y_hist, model = x) prediction[i, ] <- predict(x, h = horizon)$pred } else { for (j in seq_along(explain_lags$xreg)) { + if (length(newdata_xreg_cols_list[[j]]) == 0) next xreg_new <- as.numeric(newdata[i, newdata_xreg_cols_list[[j]]]) xreg_hist[seq.int(length.out = length(xreg_new), to = nrow(xreg_hist)), j] <- rev(xreg_new) } diff --git a/R/print.R b/R/print.R index 4977a9974..5937e1b42 100644 --- a/R/print.R +++ b/R/print.R @@ -1,4 +1,8 @@ #' @export print.shapr <- function(x, digits = 4, ...) { - print(x$shapley_values, digits = digits) + shap <- copy(x$shapley_values) + shap_names <- x$internal$parameters$shap_names + cols <- c("none", shap_names) + shap[, (cols) := lapply(.SD, round, digits = digits + 2), .SDcols = cols] + print(shap, digits = digits) } diff --git a/R/setup.R b/R/setup.R index 2545f5017..19d32d563 100644 --- a/R/setup.R +++ b/R/setup.R @@ -111,9 +111,9 @@ setup <- function(x_train, colnames(internal$parameters$output_labels) <- c("explain_idx", "horizon") internal$parameters$explain_idx <- explain_idx internal$parameters$explain_lags <- list(y = explain_y_lags, xreg = explain_xreg_lags) + internal$parameters$group_lags <- group_lags # TODO: Consider handling this parameter update somewhere else (like in get_extra_parameters?) - if (group_lags) internal$parameters$group <- internal$data$group } else { internal$data <- get_data(x_train, x_explain) } @@ -122,9 +122,9 @@ setup <- function(x_train, check_data(internal) - internal <- get_extra_parameters(internal) # This includes both extra parameters and other objects + internal <- get_extra_parameters(internal, type) # This includes both extra parameters and other objects - internal <- check_and_set_parameters(internal) + internal <- check_and_set_parameters(internal, type) internal <- set_adaptive_parameters(internal, prev_iter_list) @@ -481,7 +481,17 @@ compare_feature_specs <- function(spec1, spec2, name1 = "model", name2 = "x_trai #' This includes both extra parameters and other objects #' @keywords internal -get_extra_parameters <- function(internal) { +get_extra_parameters <- function(internal, type) { + if (type == "forecast") { + if (internal$parameters$group_lags) { + internal$parameters$group <- internal$data$group + } + internal$parameters$horizon_features <- lapply( + internal$data$horizon_group, + function(x) as.character(unlist(internal$data$group[x])) + ) + } + # get number of features and observations to explain internal$parameters$n_features <- ncol(internal$data$x_explain) internal$parameters$n_explain <- nrow(internal$data$x_explain) @@ -513,14 +523,22 @@ get_extra_parameters <- function(internal) { match(x, internal$parameters$feature_names) }) - - - internal$parameters$n_groups <- length(group) internal$parameters$group_names <- names(group) internal$parameters$group <- group - internal$parameters$shap_names <- internal$parameters$group_names internal$parameters$n_shapley_values <- internal$parameters$n_groups + + if (type == "forecast") { + if (internal$parameters$group_lags) { + internal$parameters$horizon_group <- internal$data$horizon_group + internal$parameters$shap_names <- internal$data$shap_names + } else { + internal$parameters$shap_names <- internal$parameters$group_names + } + } else { + # For normal explain + internal$parameters$shap_names <- internal$parameters$group_names + } } else { internal$objects$coal_feature_list <- as.list(seq_len(internal$parameters$n_features)) @@ -583,10 +601,14 @@ get_data_specs <- function(x) { #' @keywords internal -check_and_set_parameters <- function(internal) { +check_and_set_parameters <- function(internal, type) { # Check groups feature_names <- internal$parameters$feature_names - group <- internal$parameters$group + if (type == "forecast") { + group <- internal$parameters$group[internal$parameters$horizon_group[internal$parameters$horizon][[1]]] + } else { + group <- internal$parameters$group + } # Check group if (!is.null(group)) check_groups(feature_names, group) diff --git a/R/shapley_setup.R b/R/shapley_setup.R index 01fb7e8ca..ad8d42f30 100644 --- a/R/shapley_setup.R +++ b/R/shapley_setup.R @@ -515,93 +515,11 @@ weight_matrix <- function(X, normalize_W_weights = TRUE) { return(W) } -#' @keywords internal -create_S_batch_forecast <- function(internal, seed = NULL) { # This is temporary used for forecast only. to be removed - n_shapley_values <- internal$parameters$n_shapley_values - approach0 <- internal$parameters$approach - n_batches <- 10 # TODO: fix this! similartly to how it is done in non-forecast - - iter <- length(internal$iter_list) - - n_coalitions <- internal$iter_list[[iter]]$n_coalitions - - X <- internal$objects$X - - if (!is.null(seed)) set.seed(seed) - - if (length(approach0) > 1) { - X[!(n_coalitions %in% c(0, n_shapley_values)), approach := approach0[n_coalitions]] - - # Finding the number of batches per approach - batch_count_dt <- X[!is.na(approach), list( - n_batches_per_approach = - pmax(1, round(.N / (n_coalitions - 2) * n_batches)), - n_S_per_approach = .N - ), by = approach] - - # Ensures that the number of batches corresponds to `n_batches` - if (sum(batch_count_dt$n_batches_per_approach) != n_batches) { - # Ensure that the number of batches is not larger than `n_batches`. - # Remove one batch from the approach with the most batches. - while (sum(batch_count_dt$n_batches_per_approach) > n_batches) { - batch_count_dt[ - which.max(n_batches_per_approach), - n_batches_per_approach := n_batches_per_approach - 1 - ] - } - - # Ensure that the number of batches is not lower than `n_batches`. - # Add one batch to the approach with most coalitions per batch - while (sum(batch_count_dt$n_batches_per_approach) < n_batches) { - batch_count_dt[ - which.max(n_S_per_approach / n_batches_per_approach), - n_batches_per_approach := n_batches_per_approach + 1 - ] - } - } - - batch_count_dt[, n_leftover_first_batch := n_S_per_approach %% n_batches_per_approach] - data.table::setorder(batch_count_dt, -n_leftover_first_batch) - - approach_vec <- batch_count_dt[, approach] - n_batch_vec <- batch_count_dt[, n_batches_per_approach] - - # Randomize order before ordering spreading the batches on the different approaches as evenly as possible - # with respect to shapley_weight - X[, randomorder := sample(.N)] - data.table::setorder(X, randomorder) # To avoid smaller id_coalitions always proceeding large ones - data.table::setorder(X, shapley_weight) - - batch_counter <- 0 - for (i in seq_along(approach_vec)) { - X[approach == approach_vec[i], batch := ceiling(.I / .N * n_batch_vec[i]) + batch_counter] - batch_counter <- X[approach == approach_vec[i], max(batch)] - } - } else { - X[!(n_coalitions %in% c(0, n_shapley_values)), approach := approach0] - - # Spreading the batches - X[, randomorder := sample(.N)] - data.table::setorder(X, randomorder) - data.table::setorder(X, shapley_weight) - X[!(coalition_size %in% c(0, n_shapley_values)), batch := ceiling(.I / .N * n_batches)] - } - - # Assigning batch 1 (which always is the smallest) to the full prediction. - X[, randomorder := NULL] - X[id_coalition == max(id_coalition), batch := 1] - setkey(X, id_coalition) - - # Create a list of the batch splits - S_groups <- split(X[id_coalition != 1, id_coalition], X[id_coalition != 1, batch]) - - return(S_groups) -} - #' @keywords internal create_S_batch <- function(internal, seed = NULL) { n_shapley_values <- internal$parameters$n_shapley_values approach0 <- internal$parameters$approach + type <- internal$parameters$type iter <- length(internal$iter_list) @@ -613,6 +531,10 @@ create_S_batch <- function(internal, seed = NULL) { coalition_map <- internal$iter_list[[iter]]$coalition_map + if (type == "forecast") { + id_coalition_mapper_dt <- internal$iter_list[[iter]]$id_coalition_mapper_dt + full_ids <- id_coalition_mapper_dt$id_coalition[id_coalition_mapper_dt$full] + } X0 <- copy(internal$iter_list[[iter]]$X) @@ -632,7 +554,11 @@ create_S_batch <- function(internal, seed = NULL) { if (!is.null(seed)) set.seed(seed) if (length(approach0) > 1) { - X0[!(coalition_size %in% c(0, n_shapley_values)), approach := approach0[coalition_size]] + if (type == "forecast") { + X0[!(coalition_size == 0 | id_coalition %in% full_ids), approach := approach0[coalition_size]] + } else { + X0[!(coalition_size %in% c(0, n_shapley_values)), approach := approach0[coalition_size]] + } # Finding the number of batches per approach batch_count_dt <- X0[!is.na(approach), list( @@ -680,18 +606,30 @@ create_S_batch <- function(internal, seed = NULL) { batch_counter <- X0[approach == approach_vec[i], max(batch)] } } else { - X0[!(coalition_size %in% c(0, n_shapley_values)), approach := approach0] + if (type == "forecast") { + X0[!(coalition_size == 0 | id_coalition %in% full_ids), approach := approach0] + } else { + X0[!(coalition_size %in% c(0, n_shapley_values)), approach := approach0] + } # Spreading the batches X0[, randomorder := sample(.N)] data.table::setorder(X0, randomorder) data.table::setorder(X0, shapley_weight) - X0[!(coalition_size %in% c(0, n_shapley_values)), batch := ceiling(.I / .N * n_batches)] + if (type == "forecast") { + X0[!(coalition_size == 0 | id_coalition %in% full_ids), batch := ceiling(.I / .N * n_batches)] + } else { + X0[!(coalition_size %in% c(0, n_shapley_values)), batch := ceiling(.I / .N * n_batches)] + } } # Assigning batch 1 (which always is the smallest) to the full prediction. X0[, randomorder := NULL] - X0[id_coalition == max(id_coalition), batch := 1] + if (type == "forecast") { + X0[id_coalition %in% full_ids, batch := 1] + } else { + X0[id_coalition == max(id_coalition), batch := 1] + } setkey(X0, id_coalition) # Create a list of the batch splits @@ -707,7 +645,7 @@ create_S_batch <- function(internal, seed = NULL) { #' @inheritParams explain #' @inherit default_doc #' @export -setup_computation <- function(internal, model, predict_model) { +setup_computation <- function(internal, model, predict_model) { # Can this function be removed? /Jon # model and predict_model are only needed for type AICc of approach empirical, otherwise ignored type <- internal$parameters$type @@ -731,6 +669,7 @@ shapley_setup_forecast <- function(internal) { coal_feature_list <- internal$objects$coal_feature_list horizon <- internal$parameters$horizon + horizon_group <- internal$parameters$horizon_group feature_names <- internal$parameters$feature_names iter <- length(internal$iter_list) @@ -741,50 +680,36 @@ shapley_setup_forecast <- function(internal) { X_list <- W_list <- list() - # Find columns to be included in each of the different horizons - col_del_list <- list() - col_del_list[[1]] <- numeric() - if (horizon > 1) { - k <- 2 - for (i in rev(seq_len(horizon)[-1])) { - col_del_list[[k]] <- c(unlist(col_del_list[[k - 1]]), grep(paste0(".F", i), feature_names)) - k <- k + 1 - } - } - - cols_per_horizon <- lapply(rev(col_del_list), function(x) if (length(x) > 0) feature_names[-x] else feature_names) - + cols_per_horizon <- internal$parameters$horizon_features horizon_features <- lapply(cols_per_horizon, function(x) which(internal$parameters$feature_names %in% x)) # Apply create_coalition_table, weigth_matrix and coalition_matrix_cpp to each of the different horizons for (i in seq_along(horizon_features)) { - # TODO: Somethis is not correct in these next 20 lines of code. Something was messed up after - # Removing the group stuff and generalizing to coalitions. - - this_featcomb <- horizon_features[[i]] - - this_coal_feature_list <- lapply(coal_feature_list, function(x) x[x %in% this_featcomb]) - this_coal_feature_list <- this_coal_feature_list[sapply(this_coal_feature_list, function(x) length(x) != 0)] + if (is_groupwise && !is.null(horizon_group)) { + this_coal_feature_list <- coal_feature_list[sapply( + names(coal_feature_list), + function(x) x %in% horizon_group[[i]] + )] + } else { + this_coal_feature_list <- lapply(coal_feature_list, function(x) x[x %in% horizon_features[[i]]]) + this_coal_feature_list <- this_coal_feature_list[sapply(this_coal_feature_list, function(x) length(x) != 0)] + } n_this_featcomb <- length(this_coal_feature_list) - - n_coalitions_here <- min(2^n_this_featcomb, n_coalitions) - exact_here <- ifelse(n_coalitions_here == 2^n_this_featcomb, TRUE, exact) - X_list[[i]] <- create_coalition_table( m = n_this_featcomb, - exact = exact_here, + exact = exact, n_coalitions = n_coalitions_here, weight_zero_m = 10^6, paired_shap_sampling = paired_shap_sampling, prev_coal_samples = prev_coal_samples, coal_feature_list = this_coal_feature_list, - approach0 = approach + approach0 = approach, + shapley_reweighting = shapley_reweighting ) - W_list[[i]] <- weight_matrix( X = X_list[[i]], normalize_W_weights = TRUE @@ -794,17 +719,16 @@ shapley_setup_forecast <- function(internal) { # Merge the coalition data.table to single one to use for computing conditional expectations later on X <- rbindlist(X_list, idcol = "horizon") X[, N := NA] - X[, shapley_weight := NA] data.table::setorderv(X, c("coalition_size", "horizon"), order = c(1, -1)) X[, horizon_id_coalition := id_coalition] X[, id_coalition := 0] - X[!duplicated(coalitions), id_coalition := .I] - X[, tmp_coalitions := as.character(coalitions)] + X[!duplicated(features), id_coalition := .I] + X[, tmp_coalitions := as.character(features)] X[, id_coalition := max(id_coalition), by = tmp_coalitions] X[, tmp_coalitions := NULL] # Extracts a data.table allowing mapping from X to X_list/W_list to be used in the compute_shapley function - id_coalition_mapper_dt <- X[, .(horizon, horizon_id_coalition, id_coalition)] + id_coalition_mapper_dt <- X[, .(horizon, horizon_id_coalition, id_coalition, full = features %in% horizon_features)] X[, horizon := NULL] X[, horizon_id_coalition := NULL] @@ -813,6 +737,10 @@ shapley_setup_forecast <- function(internal) { W <- NULL # Included for consistency. Necessary weights are in W_list instead + coalition_map <- X[, .(id_coalition, + coalitions_str = sapply(features, paste, collapse = " ") + )] + ## Get feature matrix --------- S <- coalition_matrix_cpp( coalitions = X[["features"]], @@ -824,6 +752,7 @@ shapley_setup_forecast <- function(internal) { # Updating parameters$exact as done in create_coalition_table if (!exact && n_coalitions >= 2^n_shapley_values) { + internal$iter_list[[iter]]$exact <- TRUE internal$parameters$exact <- TRUE # Note that this is exact only if all horizons use the exact method. } @@ -833,16 +762,16 @@ shapley_setup_forecast <- function(internal) { internal$parameters$group_num <- NULL # TODO: Checking whether I could just do this processing where needed # instead of storing it - internal$objects$X <- X - internal$objects$W <- W - internal$objects$S <- S - internal$objects$S_batch <- create_S_batch_forecast(internal) + internal$iter_list[[iter]]$X <- X + internal$iter_list[[iter]]$W <- W + internal$iter_list[[iter]]$S <- S + internal$iter_list[[iter]]$id_coalition_mapper_dt <- id_coalition_mapper_dt + internal$iter_list[[iter]]$X_list <- X_list + internal$iter_list[[iter]]$coalition_map <- coalition_map + internal$iter_list[[iter]]$S_batch <- create_S_batch(internal) - internal$objects$id_coalition_mapper_dt <- id_coalition_mapper_dt internal$objects$cols_per_horizon <- cols_per_horizon internal$objects$W_list <- W_list - internal$objects$X_list <- X_list - return(internal) } diff --git a/man/compute_vS_forecast.Rd b/man/compute_vS_forecast.Rd deleted file mode 100644 index 0b6d25633..000000000 --- a/man/compute_vS_forecast.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/compute_vS.R -\name{compute_vS_forecast} -\alias{compute_vS_forecast} -\title{Computes \code{v(S)} for all features subsets \code{S}.} -\usage{ -compute_vS_forecast(internal, model, predict_model, method = "future") -} -\arguments{ -\item{internal}{List. -Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} -The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} - -\item{model}{Objects. -The model object that ought to be explained. -See the documentation of \code{\link[=explain]{explain()}} for details.} - -\item{predict_model}{Function. -The prediction function used when \code{model} is not natively supported. -See the documentation of \code{\link[=explain]{explain()}} for details.} - -\item{method}{Character -Indicates whether the lappy method (default) or loop method should be used.} -} -\description{ -Computes \code{v(S)} for all features subsets \code{S}. -} diff --git a/man/explain_forecast.Rd b/man/explain_forecast.Rd index e7adac7f4..c3a07622f 100644 --- a/man/explain_forecast.Rd +++ b/man/explain_forecast.Rd @@ -16,6 +16,9 @@ explain_forecast( approach, prediction_zero, max_n_coalitions = NULL, + adaptive = NULL, + adaptive_arguments = list(), + shapley_reweighting = "on_all_cond", group_lags = TRUE, group = NULL, n_MC_samples = 1000, @@ -82,6 +85,35 @@ The quantity refers to the number of unique feature coalitions if \code{group = and group coalitions if \code{group != NULL}. \code{max_n_coalitions = NULL} corresponds to \code{max_n_coalitions=2^n_features}.} +\item{adaptive}{Logical or NULL +If \code{NULL} (default), the argument is set to \code{TRUE} if there are more than 5 features/groups, and \code{FALSE} otherwise. +If eventually \code{TRUE}, the Shapley values are estimated adaptively in an iterative manner. +This provides sufficiently accurate Shapley value estimates faster. +First an initial number of coalitions is sampled, then bootsrapping is used to estimate the variance of the Shapley +values. +A convergence criterion is used to determine if the variances of the Shapley values are sufficently small. +If the variances are too high, we estimate the number of required samples to reach convergence, and thereby add more +coalitions. +The process is repeated until the variances are below the threshold. +Specifics related to the adaptive process and convergence criterion are set through \code{adaptive_arguments}.} + +\item{adaptive_arguments}{Named list. +Specifices the arguments for the adaptive procedure. +See \code{\link[=get_adaptive_arguments_default]{get_adaptive_arguments_default()}} for description of the arguments and their default values.} + +\item{shapley_reweighting}{String. +How to reweight the sampling frequency weights in the kernelSHAP solution after sampling, with the aim of reducing +the randomness and thereby the variance of the Shapley value estimates. +One of \code{'none'}, \code{'on_N'}, \code{'on_all'}, \code{'on_all_cond'} (default). +\code{'none'} means no reweighting, i.e. the sampling frequency weights are used as is. +\code{'on_coal_size'} means the sampling frequencies are averaged over all coalitions of the same size. +\code{'on_N'} means the sampling frequencies are averaged over all coalitions with the same original sampling +probabilities. +\code{'on_all'} means the original sampling probabilities are used for all coalitions. +\code{'on_all_cond'} means the original sampling probabilities are used for all coalitions, while adjusting for the +probability that they are sampled at least once. +This method is preferred as it has performed the best in simulation studies.} + \item{group_lags}{Logical. If \code{TRUE} all lags of each variable are grouped together and explained as a group. If \code{FALSE} all lags of each variable are explained individually.} diff --git a/man/finalize_explanation_forecast.Rd b/man/finalize_explanation_forecast.Rd deleted file mode 100644 index c42c9b783..000000000 --- a/man/finalize_explanation_forecast.Rd +++ /dev/null @@ -1,232 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/finalize_explanation.R -\name{finalize_explanation_forecast} -\alias{finalize_explanation_forecast} -\title{Computes the Shapley values given \code{v(S)}} -\usage{ -finalize_explanation_forecast(vS_list, internal) -} -\arguments{ -\item{vS_list}{List -Output from \code{\link[=compute_vS]{compute_vS()}}} - -\item{internal}{List. -Holds all parameters, data, functions and computed objects used within \code{\link[=explain]{explain()}} -The list contains one or more of the elements \code{parameters}, \code{data}, \code{objects}, \code{iter_list}, \code{timing_list}, -\code{main_timing_list}, \code{output}, \code{iter_timing_list} and \code{iter_results}.} -} -\value{ -Object of class \code{c("shapr", "list")}. Contains the following items: -\describe{ -\item{shapley_values}{data.table with the estimated Shapley values with explained observation in the rows and -features along the columns. -The column \code{none} is the prediction not devoted to any of the features (given by the argument \code{prediction_zero})} -\item{shapley_values_sd}{data.table with the standard deviation of the Shapley values reflecting the uncertainty. -Note that this only reflects the coalition sampling part of the kernelSHAP procedure, and is therefore by -definition 0 when all coalitions is used. -Only present when \code{adaptive = TRUE} and \code{adaptive_arguments$compute_sd=TRUE}.} -\item{internal}{List with the different parameters, data, functions and other output used internally.} -\item{pred_explain}{Numeric vector with the predictions for the explained observations} -\item{MSEv}{List with the values of the MSEv evaluation criterion for the approach. See the -\href{https://norskregnesentral.github.io/shapr/articles/understanding_shapr.html#msev-evaluation-criterion -}{MSEv evaluation section in the vignette for details}.} -\item{timing}{List containing timing information for the different parts of the computation. -\code{init_time} and \code{end_time} gives the time stamps for the start and end of the computation. -\code{total_time_secs} gives the total time in seconds for the complete execution of \code{explain()}. -\code{main_timing_secs} gives the time in seconds for the main computations. -\code{iter_timing_secs} gives for each iteration of the adaptive estimation, the time spent on the different parts -adaptive estimation routine.} -} -} -\description{ -Computes dependence-aware Shapley values for observations in \code{x_explain} from the specified -\code{model} by using the method specified in \code{approach} to estimate the conditional expectation. -} -\details{ -The \code{shapr} package implements kernelSHAP estimation of dependence-aware Shapley values with -eight different Monte Carlo-based approaches for estimating the conditional distributions of the data, namely -\code{"empirical"}, \code{"gaussian"}, \code{"copula"}, \code{"ctree"}, \code{"vaeac"}, \code{"categorical"}, \code{"timeseries"}, and \code{"independence"}. -\code{shapr} has also implemented two regression-based approaches \code{"regression_separate"} and \code{"regression_surrogate"}. -It is also possible to combine the different approaches, see the vignettes for more information. - -The package also supports the computation of causal and asymmetric Shapley values as introduced by -Heskes et al. (2020) and Frye et al. (2020). Asymmetric Shapley values were proposed by Heskes et al. (2020) -as a way to incorporate causal knowledge in the real world by restricting the possible feature -combinations/coalitions when computing the Shapley values to those consistent with a (partial) causal ordering. -Causal Shapley values were proposed by Frye et al. (2020) as a way to explain the total effect of features -on the prediction, taking into account their causal relationships, by adapting the sampling procedure in \code{shapr}. - -The package allows for parallelized computation with progress updates through the tightly connected -\link[future:future]{future::future} and \link[progressr:progressr]{progressr::progressr} packages. See the examples below. -For adaptive estimation (\code{adaptive=TRUE}), intermediate results may also be printed to the console -(according to the \code{verbose} argument). -Moreover, the intermediate results are written to disk. -This combined with adaptive estimation with (optional) intermediate results printed to the console (and temporary -written to disk, and batch computing of the v(S) values, enables fast and accurate estimation of the Shapley values -in a memory friendly manner. -} -\examples{ - -# Load example data -data("airquality") -airquality <- airquality[complete.cases(airquality), ] -x_var <- c("Solar.R", "Wind", "Temp", "Month") -y_var <- "Ozone" - -# Split data into test- and training data -data_train <- head(airquality, -3) -data_explain <- tail(airquality, 3) - -x_train <- data_train[, x_var] -x_explain <- data_explain[, x_var] - -# Fit a linear model -lm_formula <- as.formula(paste0(y_var, " ~ ", paste0(x_var, collapse = " + "))) -model <- lm(lm_formula, data = data_train) - -# Explain predictions -p <- mean(data_train[, y_var]) - -\dontrun{ -# (Optionally) enable parallelization via the future package -if (requireNamespace("future", quietly = TRUE)) { - future::plan("multisession", workers = 2) -} -} - -# (Optionally) enable progress updates within every iteration via the progressr package -if (requireNamespace("progressr", quietly = TRUE)) { - progressr::handlers(global = TRUE) -} - -# Empirical approach -explain1 <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - approach = "empirical", - prediction_zero = p, - n_MC_samples = 1e2 -) - -# Gaussian approach -explain2 <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - approach = "gaussian", - prediction_zero = p, - n_MC_samples = 1e2 -) - -# Gaussian copula approach -explain3 <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - approach = "copula", - prediction_zero = p, - n_MC_samples = 1e2 -) - -# ctree approach -explain4 <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - approach = "ctree", - prediction_zero = p, - n_MC_samples = 1e2 -) - -# Combined approach -approach <- c("gaussian", "gaussian", "empirical") -explain5 <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - approach = approach, - prediction_zero = p, - n_MC_samples = 1e2 -) - -# Print the Shapley values -print(explain1$shapley_values) - -# Plot the results -if (requireNamespace("ggplot2", quietly = TRUE)) { - plot(explain1) - plot(explain1, plot_type = "waterfall") -} - -# Group-wise explanations -group_list <- list(A = c("Temp", "Month"), B = c("Wind", "Solar.R")) - -explain_groups <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - group = group_list, - approach = "empirical", - prediction_zero = p, - n_MC_samples = 1e2 -) -print(explain_groups$shapley_values) - -# Separate and surrogate regression approaches with linear regression models. -# More complex regression models can be used, and we can use CV to -# tune the hyperparameters of the regression models and preprocess -# the data before sending it to the model. See the regression vignette -# (Shapley value explanations using the regression paradigm) for more -# details about the `regression_separate` and `regression_surrogate` approaches. -explain_separate_lm <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - prediction_zero = p, - approach = "regression_separate", - regression.model = parsnip::linear_reg() -) - -explain_surrogate_lm <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - prediction_zero = p, - approach = "regression_surrogate", - regression.model = parsnip::linear_reg() -) - -## Adaptive estimation -# For illustration purposes only. By default not used for such small dimensions as here - -# Gaussian approach -explain_adaptive <- explain( - model = model, - x_explain = x_explain, - x_train = x_train, - approach = "gaussian", - prediction_zero = p, - n_MC_samples = 1e2, - adaptive = TRUE, - adaptive_arguments = list(initial_n_coalitions = 10) -) - -} -\references{ -\itemize{ -\item Aas, K., Jullum, M., & Lland, A. (2021). Explaining individual predictions when features are dependent: -More accurate approximations to Shapley values. Artificial Intelligence, 298, 103502. -\item Frye, C., Rowat, C., & Feige, I. (2020). Asymmetric Shapley values: -incorporating causal knowledge into model-agnostic explainability. -Advances in neural information processing systems, 33, 1229-1239. -\item Heskes, T., Sijben, E., Bucur, I. G., & Claassen, T. (2020). Causal shapley values: -Exploiting causal knowledge to explain individual predictions of complex models. -Advances in neural information processing systems, 33, 4778-4789. -\item Olsen, L. H. B., Glad, I. K., Jullum, M., & Aas, K. (2024). A comparative study of methods for estimating -model-agnostic Shapley value explanations. Data Mining and Knowledge Discovery, 1-48. -} -} -\author{ -Martin Jullum, Lars Henry Berge Olsen -} diff --git a/man/get_extra_parameters.Rd b/man/get_extra_parameters.Rd index de1acfa35..7168e74bd 100644 --- a/man/get_extra_parameters.Rd +++ b/man/get_extra_parameters.Rd @@ -4,7 +4,7 @@ \alias{get_extra_parameters} \title{This includes both extra parameters and other objects} \usage{ -get_extra_parameters(internal) +get_extra_parameters(internal, type) } \description{ This includes both extra parameters and other objects diff --git a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds index 9c5af80e0..7ce9cd8c5 100644 Binary files a/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds and b/tests/testthat/_snaps/adaptive-output/output_lm_numeric_gaussian_group_converges_tol.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output.md b/tests/testthat/_snaps/asymmetric-causal-output.md index 69d88372c..614566a6a 100644 --- a/tests/testthat/_snaps/asymmetric-causal-output.md +++ b/tests/testthat/_snaps/asymmetric-causal-output.md @@ -556,7 +556,7 @@ explain_id none Solar.R Wind Temp Day Month_factor 1: 1 42.44 -2.13189 8.867 9.390 -1.137 -4.404 - 2: 2 42.44 0.07793 -7.916 -3.340 -1.378 -2.828 + 2: 2 42.44 0.07794 -7.916 -3.340 -1.378 -2.828 3: 3 42.44 -2.32289 -13.512 4.116 -1.343 2.462 # output_mixed_asym_caus_conf_mixed diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds index 2a123885e..e93b12e6e 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_TRUE_group.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds index d23513ef9..b45047bb8 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group.rds differ diff --git a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_adaptive.rds b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_adaptive.rds index ac9f600bf..7ff387bb2 100644 Binary files a/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_adaptive.rds and b/tests/testthat/_snaps/asymmetric-causal-output/output_sym_caus_conf_mix_group_adaptive.rds differ diff --git a/tests/testthat/_snaps/forecast-output.md b/tests/testthat/_snaps/forecast-output.md new file mode 100644 index 000000000..000bc2510 --- /dev/null +++ b/tests/testthat/_snaps/forecast-output.md @@ -0,0 +1,239 @@ +# forecast_output_ar_numeric + + Code + (out <- code) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 4, + and is therefore set to 2^n_features = 4. + + * Model class: + * Approach: empirical + * Adaptive estimation: FALSE + * Number of feature-wise Shapley values: 2 + * Number of observations to explain: 2 + + -- Main computation started -- + + i Using 4 of 4 coalitions. + Output + explain_idx horizon none Temp.1 Temp.2 + + 1: 152 1 77.88 -0.3972 -1.3912 + 2: 153 1 77.88 -6.6177 -0.1835 + 3: 152 2 77.88 -0.3285 -1.2034 + 4: 153 2 77.88 -6.0208 -0.3371 + 5: 152 3 77.88 -0.2915 -1.0552 + 6: 153 3 77.88 -5.2122 -0.2553 + +# forecast_output_arima_numeric + + Code + (out <- code) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 128, + and is therefore set to 2^n_features = 128. + + * Model class: + * Approach: empirical + * Adaptive estimation: FALSE + * Number of feature-wise Shapley values: 7 + * Number of observations to explain: 2 + + -- Main computation started -- + + i Using 128 of 128 coalitions. + Output + explain_idx horizon none Temp.1 Temp.2 Wind.1 Wind.2 Wind.F1 Wind.F2 + + 1: 149 1 77.88 -0.9588 -5.044 1.0543 -2.8958 -2.6627 NA + 2: 150 1 77.88 1.1553 -3.137 -2.8802 0.7196 -1.4930 NA + 3: 149 2 77.88 0.1327 -5.048 0.3337 -2.8249 -2.3014 -1.1764 + 4: 150 2 77.88 1.6007 -2.399 -2.8146 0.4646 -0.7938 0.4662 + 5: 149 3 77.88 -1.3878 -5.014 0.7964 -1.3881 -1.9652 -0.3295 + 6: 150 3 77.88 1.6690 -2.556 -2.3821 0.3835 -0.8644 -0.1648 + Wind.F3 + + 1: NA + 2: NA + 3: NA + 4: NA + 5: 0.5630 + 6: -0.7615 + +# forecast_output_arima_numeric_adaptive + + Code + (out <- code) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + * Model class: + * Approach: empirical + * Adaptive estimation: TRUE + * Number of feature-wise Shapley values: 9 + * Number of observations to explain: 2 + + -- Adaptive computation started -- + + -- Iteration 1 ----------------------------------------------------------------- + i Using 10 of 512 coalitions, 10 new. + + -- Iteration 2 ----------------------------------------------------------------- + i Using 30 of 512 coalitions, 4 new. + + -- Iteration 3 ----------------------------------------------------------------- + i Using 78 of 512 coalitions, 6 new. + Output + explain_idx horizon none Temp.1 Temp.2 Temp.3 Wind.1 Wind.2 Wind.3 + + 1: 149 1 77.88 -2.795 -4.5597 -1.114 1.564 -1.8995 0.2087 + 2: 150 1 77.88 4.024 -0.5774 -4.589 -2.234 0.1985 -2.2827 + 3: 149 2 77.88 -3.701 -4.2427 -1.326 1.465 -1.9227 0.7060 + 4: 150 2 77.88 3.460 -0.9158 -5.264 -2.452 0.7709 -1.7864 + 5: 149 3 77.88 -4.721 -3.4208 -1.503 1.172 -0.4564 -0.6058 + 6: 150 3 77.88 2.811 0.4206 -5.361 -1.388 0.0752 -0.2130 + Wind.F1 Wind.F2 Wind.F3 + + 1: -1.9118 NA NA + 2: -0.1747 NA NA + 3: -1.1883 -0.6744 NA + 4: 0.7128 1.9982 NA + 5: -1.5436 -0.5418 2.8952 + 6: -0.6202 -0.8545 0.4549 + +# forecast_output_arima_numeric_adaptive_groups + + Code + (out <- code) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + * Model class: + * Approach: empirical + * Adaptive estimation: TRUE + * Number of group-wise Shapley values: 10 + * Number of observations to explain: 2 + + -- Adaptive computation started -- + + -- Iteration 1 ----------------------------------------------------------------- + i Using 10 of 1024 coalitions, 10 new. + + -- Iteration 2 ----------------------------------------------------------------- + i Using 28 of 1024 coalitions, 2 new. + + -- Iteration 3 ----------------------------------------------------------------- + i Using 56 of 1024 coalitions, 12 new. + Output + explain_idx horizon none Temp Wind Solar.R Ozone + + 1: 149 1 77.88 -4.680 -3.6712 0.3230 -1.253 + 2: 150 1 77.88 -2.487 -3.6317 1.8415 -0.891 + 3: 149 2 77.88 -6.032 -4.1973 2.5973 -2.402 + 4: 150 2 77.88 -3.124 0.1986 0.8258 -2.245 + 5: 149 3 77.88 -7.777 1.1382 0.6962 -3.267 + 6: 150 3 77.88 -3.142 -1.6674 2.9047 -2.024 + +# forecast_output_arima_numeric_no_xreg + + Code + (out <- code) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 4, + and is therefore set to 2^n_features = 4. + + * Model class: + * Approach: empirical + * Adaptive estimation: FALSE + * Number of feature-wise Shapley values: 2 + * Number of observations to explain: 2 + + -- Main computation started -- + + i Using 4 of 4 coalitions. + Output + explain_idx horizon none Temp.1 Temp.2 + + 1: 149 1 77.88 -1.7273 -7.033 + 2: 150 1 77.88 -0.2229 -4.492 + 3: 149 2 77.88 -1.7273 -7.033 + 4: 150 2 77.88 -0.2229 -4.492 + 5: 149 3 77.88 -1.7273 -7.033 + 6: 150 3 77.88 -0.2229 -4.492 + +# forecast_output_forecast_ARIMA_group_numeric + + Code + (out <- code) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is NULL or larger than or 2^n_groups = 16, + and is therefore set to 2^n_groups = 16. + + * Model class: + * Approach: empirical + * Adaptive estimation: FALSE + * Number of group-wise Shapley values: 4 + * Number of observations to explain: 2 + + -- Main computation started -- + + i Using 16 of 16 coalitions. + Output + explain_idx horizon none Temp Wind + + 1: 149 1 77.88 -5.3063 -5.201 + 2: 150 1 77.88 -1.4435 -4.192 + 3: 149 2 77.88 -3.6824 -7.202 + 4: 150 2 77.88 -0.2568 -3.220 + 5: 149 3 77.88 -6.5216 -2.204 + 6: 150 3 77.88 -1.2125 -3.463 + +# forecast_output_arima_numeric_no_lags + + Code + (out <- code) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is NULL or larger than or 2^n_features = 8, + and is therefore set to 2^n_features = 8. + + * Model class: + * Approach: independence + * Adaptive estimation: FALSE + * Number of feature-wise Shapley values: 3 + * Number of observations to explain: 2 + + -- Main computation started -- + + i Using 8 of 8 coalitions. + Output + explain_idx horizon none Wind.F1 Wind.F2 Wind.F3 + + 1: 149 1 77.88 -10.507 NA NA + 2: 150 1 77.88 -5.635 NA NA + 3: 149 2 77.88 -4.696 -6.189 NA + 4: 150 2 77.88 -2.071 -1.405 NA + 5: 149 3 77.88 -3.133 -3.133 -2.46 + 6: 150 3 77.88 -1.383 -1.383 -1.91 + diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds new file mode 100644 index 000000000..acf02e047 Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_ar_numeric.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds new file mode 100644 index 000000000..a54430ddf Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive.rds new file mode 100644 index 000000000..bfb36dcf3 Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive_groups.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive_groups.rds new file mode 100644 index 000000000..08c6e8c35 Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_adaptive_groups.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds new file mode 100644 index 000000000..dcf472133 Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_lags.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds new file mode 100644 index 000000000..24546311b Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_arima_numeric_no_xreg.rds differ diff --git a/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds b/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds new file mode 100644 index 000000000..8ed51d8c1 Binary files /dev/null and b/tests/testthat/_snaps/forecast-output/forecast_output_forecast_ARIMA_group_numeric.rds differ diff --git a/tests/testthat/_snaps/forecast-setup.md b/tests/testthat/_snaps/forecast-setup.md new file mode 100644 index 000000000..f78fa6afa --- /dev/null +++ b/tests/testthat/_snaps/forecast-setup.md @@ -0,0 +1,345 @@ +# error with custom model without providing predict_model + + Code + model_custom_arima_temp <- model_arima_temp + class(model_custom_arima_temp) <- "whatever" + explain_forecast(testing = TRUE, model = model_custom_arima_temp, y = data_arima[ + 1:150, "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149: + 150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", + prediction_zero = p0_ar) + Message + Note: You passed a model to explain() which is not natively supported, and did not supply a 'get_model_specs' function to explain(). + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is NULL or larger than or 2^n_groups = 16, + and is therefore set to 2^n_groups = 16. + + Condition + Error in `get_predict_model()`: + ! You passed a model to explain() which is not natively supported, and did not supply the 'predict_model' function to explain(). + See ?shapr::explain or the vignette for more information on how to run shapr with custom models. + +# erroneous input: `x_train/x_explain` + + Code + y_wrong_format <- data_arima[, c("Temp", "Wind")] + explain_forecast(testing = TRUE, model = model_arima_temp, y = y_wrong_format, + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", + prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! `y` has 2 columns (Temp,Wind). + `explain_y_lags` has length 1. + These two should match. + +--- + + Code + xreg_wrong_format <- data_arima[, c("Temp", "Wind")] + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = xreg_wrong_format, train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", + prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! `xreg` has 2 columns (Temp,Wind). + `explain_xreg_lags` has length 1. + These two should match. + +--- + + Code + xreg_no_column_names <- data_arima[, "Wind"] + names(xreg_no_column_names) <- NULL + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = xreg_no_column_names, train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", + prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! `xreg` misses column names. + +# erroneous input: `model` + + Code + explain_forecast(testing = TRUE, y = data_arima[1:150, "Temp"], xreg = data_arima[ + , "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, + explain_xreg_lags = 2, horizon = 3, approach = "independence", prediction_zero = p0_ar) + Condition + Error in `explain_forecast()`: + ! argument "model" is missing, with no default + +# erroneous input: `prediction_zero` + + Code + p0_wrong_length <- p0_ar[1:2] + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", + prediction_zero = p0_wrong_length) + Condition + Error in `get_parameters()`: + ! `prediction_zero` (77.8823529411765, 77.8823529411765) must be numeric and match the output size of the model (3). + +# erroneous input: `max_n_coalitions` + + Code + horizon <- 3 + explain_y_lags <- 2 + explain_xreg_lags <- 2 + n_coalitions <- horizon + explain_y_lags + explain_xreg_lags - 1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = explain_y_lags, explain_xreg_lags = explain_xreg_lags, + horizon = horizon, approach = "independence", prediction_zero = p0_ar, + max_n_coalitions = n_coalitions, group_lags = FALSE) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is smaller than max(10, n_features + 1 = 8),which will result in unreliable results. + It is therefore set to 10. + + * Model class: + * Approach: independence + * Adaptive estimation: TRUE + * Number of feature-wise Shapley values: 7 + * Number of observations to explain: 2 + + -- Adaptive computation started -- + + -- Iteration 1 ----------------------------------------------------------------- + i Using 8 of 128 coalitions, 8 new. + Output + explain_idx horizon none Temp.1 Temp.2 Wind.1 Wind.2 Wind.F1 Wind.F2 + + 1: 149 1 77.88 -6.1669 -3.2230 0.5692 0.5692 -2.2557 NA + 2: 150 1 77.88 -1.8556 -2.2854 0.5822 0.5822 -2.6587 NA + 3: 149 2 77.88 -3.7482 -0.8043 -0.6402 -0.6402 -1.1390 -3.9124 + 4: 150 2 77.88 -0.3161 -0.7458 -0.1876 -0.1876 -1.1651 -0.8742 + 5: 149 3 77.88 -1.7783 -1.7783 -1.7783 -1.1423 -1.7783 0.0000 + 6: 150 3 77.88 -0.4531 -0.4531 -0.4531 -1.1683 -0.4531 0.0000 + Wind.F3 + + 1: NA + 2: NA + 3: NA + 4: NA + 5: -0.4697 + 6: -1.6952 + +--- + + Code + horizon <- 3 + explain_y_lags <- 2 + explain_xreg_lags <- 2 + n_coalitions <- 1 + 1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = explain_y_lags, explain_xreg_lags = explain_xreg_lags, + horizon = horizon, approach = "independence", prediction_zero = p0_ar, + max_n_coalitions = n_coalitions, group_lags = TRUE) + Message + Note: Feature names extracted from the model contains NA. + Consistency checks between model and data is therefore disabled. + + Success with message: + max_n_coalitions is smaller than max(10, n_groups + 1 = 5),which will result in unreliable results. + It is therefore set to 10. + + * Model class: + * Approach: independence + * Adaptive estimation: FALSE + * Number of group-wise Shapley values: 4 + * Number of observations to explain: 2 + + -- Main computation started -- + + i Using 5 of 16 coalitions. + Output + explain_idx horizon none Temp Wind + + 1: 149 1 77.88 -8.252 -2.2557 + 2: 150 1 77.88 -2.977 -2.6587 + 3: 149 2 77.88 -8.252 -2.6320 + 4: 150 2 77.88 -2.977 -0.4990 + 5: 149 3 77.88 -8.256 -0.4697 + 6: 150 3 77.88 -2.981 -1.6952 + +# erroneous input: `train_idx` + + Code + train_idx_too_short <- 2 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = train_idx_too_short, + explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `train_idx` must be a vector of positive finite integers and length > 1. + +--- + + Code + train_idx_not_integer <- c(3:5) + 0.1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = train_idx_not_integer, + explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `train_idx` must be a vector of positive finite integers and length > 1. + +--- + + Code + train_idx_out_of_range <- 1:5 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = train_idx_out_of_range, + explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! The train (`train_idx`) and explain (`explain_idx`) indices must fit in the lagged data. + The lagged data begins at index 2 and ends at index 150. + +# erroneous input: `explain_idx` + + Code + explain_idx_not_integer <- c(3:5) + 0.1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = explain_idx_not_integer, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", + prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `explain_idx` must be a vector of positive finite integers. + +--- + + Code + explain_idx_out_of_range <- 1:5 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = explain_idx_out_of_range, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", + prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! The train (`train_idx`) and explain (`explain_idx`) indices must fit in the lagged data. + The lagged data begins at index 2 and ends at index 150. + +# erroneous input: `explain_y_lags` + + Code + explain_y_lags_negative <- -1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = explain_y_lags_negative, explain_xreg_lags = 2, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `explain_y_lags` must be a vector of positive finite integers. + +--- + + Code + explain_y_lags_not_integer <- 2.1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = explain_y_lags_not_integer, explain_xreg_lags = 2, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `explain_y_lags` must be a vector of positive finite integers. + +--- + + Code + explain_y_lags_more_than_one <- c(1, 2) + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = explain_y_lags_more_than_one, explain_xreg_lags = 2, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! `y` has 1 columns (Temp). + `explain_y_lags` has length 2. + These two should match. + +--- + + Code + explain_y_lags_zero <- 0 + explain_forecast(testing = TRUE, model = model_arima_temp_noxreg, y = data_arima[ + 1:150, "Temp"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 0, + horizon = 3, approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! `explain_y_lags=0` is not allowed for models without exogeneous variables + +# erroneous input: `explain_x_lags` + + Code + explain_xreg_lags_negative <- -2 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = explain_xreg_lags_negative, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `explain_xreg_lags` must be a vector of positive finite integers. + +--- + + Code + explain_xreg_lags_not_integer <- 2.1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = explain_xreg_lags_not_integer, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `explain_xreg_lags` must be a vector of positive finite integers. + +--- + + Code + explain_x_lags_wrong_length <- c(1, 2) + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = explain_x_lags_wrong_length, horizon = 3, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_data_forecast()`: + ! `xreg` has 1 columns (Wind). + `explain_xreg_lags` has length 2. + These two should match. + +# erroneous input: `horizon` + + Code + horizon_negative <- -2 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = horizon_negative, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `horizon` must be a vector (or scalar) of positive integers. + +--- + + Code + horizon_not_integer <- 2.1 + explain_forecast(testing = TRUE, model = model_arima_temp, y = data_arima[1:150, + "Temp"], xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, + explain_y_lags = 2, explain_xreg_lags = 2, horizon = horizon_not_integer, + approach = "independence", prediction_zero = p0_ar) + Condition + Error in `get_parameters()`: + ! `horizon` must be a vector (or scalar) of positive integers. + diff --git a/tests/testthat/_snaps/regular-output.md b/tests/testthat/_snaps/regular-output.md index 03aa6549d..151bd379e 100644 --- a/tests/testthat/_snaps/regular-output.md +++ b/tests/testthat/_snaps/regular-output.md @@ -71,7 +71,7 @@ 1: 1 42.44 -13.252 15.541 12.826 -5.77179 3.259 2: 2 42.44 2.758 -3.325 -7.992 -7.12800 1.808 - 3: 3 42.44 6.805 -22.126 3.730 -0.09235 -5.885 + 3: 3 42.44 6.805 -22.126 3.730 -0.09234 -5.885 # output_lm_numeric_empirical_n_coalitions @@ -418,7 +418,7 @@ explain_id none S1 S2 S3 S4 1: 1 4.895 -0.5261 0.7831 -0.21023 -0.3885 - 2: 2 4.895 -0.6310 1.6288 -0.04498 -2.9298 + 2: 2 4.895 -0.6310 1.6288 -0.04498 -2.9297 # output_lm_numeric_comb1 @@ -443,7 +443,7 @@ 1: 1 42.44 -8.987 9.070 15.511 -2.5647 -0.4281 2: 2 42.44 2.916 -4.516 -7.845 -4.1649 -0.2686 - 3: 3 42.44 6.968 -22.988 -1.717 0.6776 -0.5086 + 3: 3 42.44 6.968 -22.988 -1.717 0.6776 -0.5085 # output_lm_numeric_comb2 @@ -492,7 +492,7 @@ explain_id none Solar.R Wind Temp Month Day 1: 1 42.44 -6.887 10.715 12.199 -3.670 0.24393 - 2: 2 42.44 2.603 -2.648 -8.464 -5.405 0.03414 + 2: 2 42.44 2.603 -2.648 -8.464 -5.405 0.03415 3: 3 42.44 5.868 -22.184 3.401 -2.955 -1.69888 # output_lm_mixed_independence @@ -749,7 +749,7 @@ 1: 1 42.44 -13.252 15.541 12.826 -5.77179 3.259 2: 2 42.44 2.758 -3.325 -7.992 -7.12800 1.808 - 3: 3 42.44 6.805 -22.126 3.730 -0.09235 -5.885 + 3: 3 42.44 6.805 -22.126 3.730 -0.09234 -5.885 # output_lm_numeric_independence_keep_samp_for_vS diff --git a/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds b/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds index 716868975..36dce40a2 100644 Binary files a/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds and b/tests/testthat/_snaps/regular-output/output_lm_timeseries_method.rds differ diff --git a/tests/testthat/helper-ar-arima.R b/tests/testthat/helper-ar-arima.R index 47944e87b..9ac21641b 100644 --- a/tests/testthat/helper-ar-arima.R +++ b/tests/testthat/helper-ar-arima.R @@ -1,17 +1,18 @@ options(digits = 5) # To avoid round off errors when printing output on different systems +data_arima <- data.table::as.data.table(airquality) +data_arima[, Solar.R := ifelse(is.na(Solar.R), mean(Solar.R, na.rm = TRUE), Solar.R)] +data_arima[, Ozone := ifelse(is.na(Ozone), mean(Ozone, na.rm = TRUE), Ozone)] - -data <- data.table::as.data.table(airquality) - -model_ar_temp <- ar(data$Temp, order = 2) +model_ar_temp <- ar(data_arima$Temp, order = 2) model_ar_temp$n.ahead <- 3 -p0_ar <- rep(mean(data$Temp), 3) +p0_ar <- rep(mean(data_arima$Temp), 3) -model_arima_temp <- arima(data$Temp[1:150], c(2, 1, 0), xreg = data$Wind[1:150]) +model_arima_temp <- arima(data_arima$Temp[1:150], c(2, 1, 0), xreg = data_arima$Wind[1:150]) +model_arima_temp2 <- arima(data_arima$Temp[1:150], c(2, 1, 0), xreg = data_arima[1:150, c("Wind", "Solar.R", "Ozone")]) -model_arima_temp_noxreg <- arima(data$Temp[1:150], c(2, 1, 0)) +model_arima_temp_noxreg <- arima(data_arima$Temp[1:150], c(2, 1, 0)) # When loading this here we avoid the "Registered S3 method overwritten" when calling forecast -model_forecast_ARIMA_temp <- forecast::Arima(data$Temp[1:150], order = c(2, 1, 0), xreg = data$Wind[1:150]) +model_forecast_ARIMA_temp <- forecast::Arima(data_arima$Temp[1:150], order = c(2, 1, 0), xreg = data_arima$Wind[1:150]) diff --git a/tests/testthat/test-asymmetric-causal-output.R b/tests/testthat/test-asymmetric-causal-output.R index 0ae98f763..8c7148239 100644 --- a/tests/testthat/test-asymmetric-causal-output.R +++ b/tests/testthat/test-asymmetric-causal-output.R @@ -453,7 +453,7 @@ test_that("output_categorical_asym_causal_mixed_cat", { explain( testing = TRUE, model = model_lm_categorical, - x_explain = x_explain_categorical[1:2], #Temp [1:2] as [1:3] give different sample on GHA-macOS for unknown reason + x_explain = x_explain_categorical[1:2], # Temp [1:2] as [1:3] give different sample on GHA-macOS (unknown reason) x_train = x_train_categorical, approach = "categorical", prediction_zero = p0, @@ -483,7 +483,7 @@ test_that("output_cat_asym_causal_mixed_cat_ad", { confounding = c(TRUE, FALSE, FALSE), n_MC_samples = 5, # Just for speed adaptive = TRUE - ), + ), "output_cat_asym_causal_mixed_cat_ad" ) }) diff --git a/tests/testthat/0test-forecast-output.R b/tests/testthat/test-forecast-output.R similarity index 66% rename from tests/testthat/0test-forecast-output.R rename to tests/testthat/test-forecast-output.R index 6468ee743..876d09615 100644 --- a/tests/testthat/0test-forecast-output.R +++ b/tests/testthat/test-forecast-output.R @@ -3,7 +3,7 @@ test_that("forecast_output_ar_numeric", { explain_forecast( testing = TRUE, model = model_ar_temp, - y = data[, "Temp"], + y = data_arima[, "Temp"], train_idx = 2:151, explain_idx = 152:153, explain_y_lags = 2, @@ -22,8 +22,8 @@ test_that("forecast_output_arima_numeric", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -32,18 +32,65 @@ test_that("forecast_output_arima_numeric", { approach = "empirical", prediction_zero = p0_ar, group_lags = FALSE, - n_batches = 1 + max_n_coalitions = 150, + adaptive = FALSE ), "forecast_output_arima_numeric" ) }) +test_that("forecast_output_arima_numeric_adaptive", { + expect_snapshot_rds( + explain_forecast( + testing = TRUE, + model = model_arima_temp, + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], + train_idx = 3:148, + explain_idx = 149:150, + explain_y_lags = 3, + explain_xreg_lags = 3, + horizon = 3, + approach = "empirical", + prediction_zero = p0_ar, + group_lags = FALSE, + max_n_coalitions = 150, + adaptive = TRUE, + adaptive_arguments = list(initial_n_coalitions = 10) + ), + "forecast_output_arima_numeric_adaptive" + ) +}) + +test_that("forecast_output_arima_numeric_adaptive_groups", { + expect_snapshot_rds( + explain_forecast( + testing = TRUE, + model = model_arima_temp2, + y = data_arima[1:150, "Temp"], + xreg = data_arima[, c("Wind", "Solar.R", "Ozone")], + train_idx = 3:148, + explain_idx = 149:150, + explain_y_lags = 3, + explain_xreg_lags = c(3, 3, 3), + horizon = 3, + approach = "empirical", + prediction_zero = p0_ar, + group_lags = TRUE, + max_n_coalitions = 150, + adaptive = TRUE, + adaptive_arguments = list(initial_n_coalitions = 10, convergence_tolerance = 7e-3) + ), + "forecast_output_arima_numeric_adaptive_groups" + ) +}) + test_that("forecast_output_arima_numeric_no_xreg", { expect_snapshot_rds( explain_forecast( testing = TRUE, model = model_arima_temp_noxreg, - y = data[1:150, "Temp"], + y = data_arima[1:150, "Temp"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -57,13 +104,14 @@ test_that("forecast_output_arima_numeric_no_xreg", { ) }) +# Old snap does not correspond to the results from the master branch, why is unclear. test_that("forecast_output_forecast_ARIMA_group_numeric", { expect_snapshot_rds( explain_forecast( testing = TRUE, model = model_forecast_ARIMA_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -78,13 +126,33 @@ test_that("forecast_output_forecast_ARIMA_group_numeric", { ) }) +test_that("forecast_output_arima_numeric_no_lags", { + expect_snapshot_rds( + explain_forecast( + testing = TRUE, + model = model_arima_temp, + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], + train_idx = 2:148, + explain_idx = 149:150, + explain_y_lags = 0, + explain_xreg_lags = 0, + horizon = 3, + approach = "independence", + prediction_zero = p0_ar, + group_lags = FALSE, + n_batches = 1 + ), + "forecast_output_arima_numeric_no_lags" + ) +}) test_that("ARIMA gives the same output with different horizons", { h3 <- explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -94,15 +162,16 @@ test_that("ARIMA gives the same output with different horizons", { prediction_zero = p0_ar[1:3], group_lags = FALSE, n_batches = 1, - max_n_coalitions = 50 + max_n_coalitions = 200, + adaptive = FALSE ) h2 <- explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -112,14 +181,15 @@ test_that("ARIMA gives the same output with different horizons", { prediction_zero = p0_ar[1:2], group_lags = FALSE, n_batches = 1, - max_n_coalitions = 50 + max_n_coalitions = 100, + adaptive = FALSE ) h1 <- explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -129,7 +199,8 @@ test_that("ARIMA gives the same output with different horizons", { prediction_zero = p0_ar[1], group_lags = FALSE, n_batches = 1, - max_n_coalitions = 50 + max_n_coalitions = 50, + adaptive = FALSE ) cols_horizon1 <- h2$internal$objects$cols_per_horizon[[1]] @@ -154,8 +225,8 @@ test_that("ARIMA gives the same output with different horizons with grouping", { h3 <- explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -165,15 +236,16 @@ test_that("ARIMA gives the same output with different horizons with grouping", { prediction_zero = p0_ar[1:3], group_lags = TRUE, n_batches = 1, - max_n_coalitions = 50 + max_n_coalitions = 50, + adaptive = FALSE ) h2 <- explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -183,14 +255,15 @@ test_that("ARIMA gives the same output with different horizons with grouping", { prediction_zero = p0_ar[1:2], group_lags = TRUE, n_batches = 1, - max_n_coalitions = 50 + max_n_coalitions = 50, + adaptive = FALSE ) h1 <- explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, @@ -200,7 +273,8 @@ test_that("ARIMA gives the same output with different horizons with grouping", { prediction_zero = p0_ar[1], group_lags = TRUE, n_batches = 1, - max_n_coalitions = 50 + max_n_coalitions = 50, + adaptive = FALSE ) expect_equal( @@ -218,25 +292,3 @@ test_that("ARIMA gives the same output with different horizons with grouping", { h2$shapley_values[horizon == 2] ) }) - -test_that("forecast_output_arima_numeric_no_lags", { - # TODO: Need to check out this output. It gives lots of warnings, which indicates something might be wrong. - expect_snapshot_rds( - explain_forecast( - testing = TRUE, - model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], - train_idx = 2:148, - explain_idx = 149:150, - explain_y_lags = 0, - explain_xreg_lags = 0, - horizon = 3, - approach = "independence", - prediction_zero = p0_ar, - group_lags = FALSE, - n_batches = 1 - ), - "forecast_output_arima_numeric_no_lags" - ) -}) diff --git a/tests/testthat/0test-forecast-setup.R b/tests/testthat/test-forecast-setup.R similarity index 80% rename from tests/testthat/0test-forecast-setup.R rename to tests/testthat/test-forecast-setup.R index 552c614a2..a20c55f2c 100644 --- a/tests/testthat/0test-forecast-setup.R +++ b/tests/testthat/test-forecast-setup.R @@ -11,16 +11,15 @@ test_that("error with custom model without providing predict_model", { explain_forecast( testing = TRUE, model = model_custom_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -34,21 +33,20 @@ test_that("erroneous input: `x_train/x_explain`", { expect_snapshot( { # not vector or one-column data.table/matrix - y_wrong_format <- data[, c("Temp", "Wind")] + y_wrong_format <- data_arima[, c("Temp", "Wind")] explain_forecast( testing = TRUE, model = model_arima_temp, y = y_wrong_format, - xreg = data[, "Wind"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -57,12 +55,12 @@ test_that("erroneous input: `x_train/x_explain`", { expect_snapshot( { # not correct dimension - xreg_wrong_format <- data[, c("Temp", "Wind")] + xreg_wrong_format <- data_arima[, c("Temp", "Wind")] explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], + y = data_arima[1:150, "Temp"], xreg = xreg_wrong_format, train_idx = 2:148, explain_idx = 149:150, @@ -70,8 +68,7 @@ test_that("erroneous input: `x_train/x_explain`", { explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -80,13 +77,13 @@ test_that("erroneous input: `x_train/x_explain`", { expect_snapshot( { # missing column names x_train - xreg_no_column_names <- data[, "Wind"] + xreg_no_column_names <- data_arima[, "Wind"] names(xreg_no_column_names) <- NULL explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], + y = data_arima[1:150, "Temp"], xreg = xreg_no_column_names, train_idx = 2:148, explain_idx = 149:150, @@ -94,8 +91,7 @@ test_that("erroneous input: `x_train/x_explain`", { explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -110,16 +106,15 @@ test_that("erroneous input: `model`", { # no model passed explain_forecast( testing = TRUE, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -138,16 +133,15 @@ test_that("erroneous input: `prediction_zero`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_wrong_length, - n_batches = 1 + prediction_zero = p0_wrong_length ) }, error = TRUE @@ -168,8 +162,8 @@ test_that("erroneous input: `max_n_coalitions`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = explain_y_lags, @@ -177,7 +171,6 @@ test_that("erroneous input: `max_n_coalitions`", { horizon = horizon, approach = "independence", prediction_zero = p0_ar, - n_batches = 1, max_n_coalitions = n_coalitions, group_lags = FALSE ) @@ -195,8 +188,8 @@ test_that("erroneous input: `max_n_coalitions`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = explain_y_lags, @@ -204,7 +197,6 @@ test_that("erroneous input: `max_n_coalitions`", { horizon = horizon, approach = "independence", prediction_zero = p0_ar, - n_batches = 1, max_n_coalitions = n_coalitions, group_lags = TRUE ) @@ -223,16 +215,15 @@ test_that("erroneous input: `train_idx`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = train_idx_too_short, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -247,16 +238,15 @@ test_that("erroneous input: `train_idx`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = train_idx_not_integer, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -270,16 +260,15 @@ test_that("erroneous input: `train_idx`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = train_idx_out_of_range, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -297,16 +286,15 @@ test_that("erroneous input: `explain_idx`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = explain_idx_not_integer, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -320,16 +308,15 @@ test_that("erroneous input: `explain_idx`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = explain_idx_out_of_range, explain_y_lags = 2, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -347,16 +334,15 @@ test_that("erroneous input: `explain_y_lags`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = explain_y_lags_negative, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -370,16 +356,15 @@ test_that("erroneous input: `explain_y_lags`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = explain_y_lags_not_integer, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -393,16 +378,15 @@ test_that("erroneous input: `explain_y_lags`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = explain_y_lags_more_than_one, explain_xreg_lags = 2, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -417,14 +401,13 @@ test_that("erroneous input: `explain_y_lags`", { explain_forecast( testing = TRUE, model = model_arima_temp_noxreg, - y = data[1:150, "Temp"], + y = data_arima[1:150, "Temp"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 0, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -443,16 +426,15 @@ test_that("erroneous input: `explain_x_lags`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = explain_xreg_lags_negative, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -466,16 +448,15 @@ test_that("erroneous input: `explain_x_lags`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = explain_xreg_lags_not_integer, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -489,16 +470,15 @@ test_that("erroneous input: `explain_x_lags`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = explain_x_lags_wrong_length, horizon = 3, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -516,16 +496,15 @@ test_that("erroneous input: `horizon`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = horizon_negative, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE @@ -539,16 +518,15 @@ test_that("erroneous input: `horizon`", { explain_forecast( testing = TRUE, model = model_arima_temp, - y = data[1:150, "Temp"], - xreg = data[, "Wind"], + y = data_arima[1:150, "Temp"], + xreg = data_arima[, "Wind"], train_idx = 2:148, explain_idx = 149:150, explain_y_lags = 2, explain_xreg_lags = 2, horizon = horizon_not_integer, approach = "independence", - prediction_zero = p0_ar, - n_batches = 1 + prediction_zero = p0_ar ) }, error = TRUE