add tests and fixes part 1

SLINGhub · Jan 14, 2025 · 0661814 · 0661814
1 parent 59082b1
commit 0661814
Show file tree

Hide file tree

Showing 64 changed files with 15,583 additions and 251,303 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -81,7 +81,7 @@ Collate:
     'helper.R'
     'lipidomics.R'
     'metadata-access.R'
-    'metadata-read.R'
+    'metadata-import.R'
     'midar-package.R'
     'plots-calibcurves.R'
     'plots-eda.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -21,10 +21,18 @@ export(data_load_example)
 export(exclude_analyses)
 export(exclude_features)
 export(filter_features_qc)
+export(get_analyis_end)
+export(get_analyis_start)
+export(get_analysis_breaks)
+export(get_analysis_count)
+export(get_analysis_duration)
 export(get_analyticaldata)
 export(get_batch_boundaries)
 export(get_calibration_results)
+export(get_feature_count)
+export(get_featurelist)
 export(get_response_curve_stats)
+export(get_runtime_median)
 export(import_data_csv)
 export(import_data_masshunter)
 export(import_data_mrmkit)

diff --git a/R/calc-calibrations.R b/R/calc-calibrations.R
@@ -14,7 +14,7 @@
 #' @param fit_method A character string indicating the default regression fit method to use for the calibration curve. Must be one of `"linear"` or `"quadratic"`. This method will be used if no specific fit method is defined for a feature in the metadata.
 #' @param fit_weighting A character string indicating the default weighting method for the regression points in the calibration curve. Must be one of `"none"`, `"1/x"`, or `"1/x^2"`. If no specific weighting method is defined for a feature in the metadata, this method will be used.
 #' @param error_failed_calibration If `TRUE`, an error will be raised if the calibration curve fitting failed for any feature. If `FALSE`, failed calibration curve fitting will be ignored, and resulting feature concentration will be `NA`.
-#' @param error_missing_annotation Raise error if any of the following information is missing: calibration curve data, ISTD mix volume and sample amounts for any feature.
+#' @param fail_missing_annotation Raise error if any of the following information is missing: calibration curve data, ISTD mix volume and sample amounts for any feature.
 #'   If `FALSE`, missing annotations will be ignored, and resulting feature concentration will be `NA`
 #' @return A modified `MidarExperiment` object with updated concentration values.
 #'
@@ -26,7 +26,7 @@ quantify_by_calibration <- function(data = NULL,
                                      fit_method = c("linear", "quadratic"),
                                      fit_weighting = c("none", "1/x", "1/x^2"),
                                      error_failed_calibration = TRUE,
-                                     error_missing_annotation = TRUE
+                                     fail_missing_annotation = TRUE
                                      ) {
 
   check_data(data)
@@ -39,23 +39,23 @@ quantify_by_calibration <- function(data = NULL,
                                    overwrite_metadata = overwrite_metadata,
                                    fit_method = fit_method,
                                    fit_weighting = fit_weighting,
-                                   error_missing_annotation = error_missing_annotation)
+                                   fail_missing_annotation = fail_missing_annotation)
   d_calib <- data@metrics_calibration
 
-  features_no_calib <- setdiff(d_calib$feature_id, get_featurelist(data, isistd = FALSE, isquantifier = TRUE))
+  features_no_calib <- setdiff(d_calib$feature_id, get_featurelist(data, is_istd = FALSE, is_quantifier = TRUE))
   # Check if calibration curve data is missing for any feature
   if (length(features_no_calib) > 0){
-    if (!error_missing_annotation) {
+    if (!fail_missing_annotation) {
       cli::cli_alert_warning(cli::col_yellow("Calibration curve results for {length(feartures_no_calib)} features missing. Calculated concentrations of affected features will be `NA`."))
     } else {
-      cli::cli_abort(cli::col_red("Calibration curve results for {length(feartures_no_calib)} features missing. Please update metadata or set `error_missing_annotation = FALSE`."))
+      cli::cli_abort(cli::col_red("Calibration curve results for {length(feartures_no_calib)} features missing. Please update metadata or set `fail_missing_annotation = FALSE`."))
     }
   }
 
   features_failed_calib <- sum(d_calib$reg_failed)
   # Check if calibration curve data is missing for any feature
   if (features_failed_calib > 0){
-    if(!error_missing_annotation) {
+    if(!fail_missing_annotation) {
       cli::cli_alert_warning(cli::col_yellow("Calibration curve fitting failed for {length(features_failed_calib)} features. Calculated concentrations of affected features will be `NA`."))
     } else {
       cli::cli_abort(cli::col_red("Calibration curve fitting failed for {length(features_failed_calib)} features. Please inspect calibration curve details, e.g. by plotting using `plot_calibrationcurves()`."))
@@ -119,7 +119,7 @@ quantify_by_calibration <- function(data = NULL,
 #' @param overwrite_metadata A logical value (`TRUE` or `FALSE`). If `TRUE`, the function will ignore any fit method and weighting settings defined in the metadata and use the provided `fit_method` and `fit_weighting` values for all analytes.
 #' @param fit_method A character string specifying the default regression fit method to use for the calibration curve. Must be one of `"linear"` or `"quadratic"`. This method will be applied if no specific fit method is defined for a feature in the metadata.
 #' @param fit_weighting A character string specifying the default weighting method for the regression points in the calibration curve. Must be one of `"none"`, `"1/x"`, or `"1/x^2"`. This method will be applied if no specific weighting method is defined for a feature in the metadata.
-#' @param error_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: calibration curve data, ISTD mix volume, and sample amounts for any feature.
+#' @param fail_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: calibration curve data, ISTD mix volume, and sample amounts for any feature.
 #' @param include_fit_object If `TRUE`, the function will return the full regression fit objects for each feature in the `metrics_calibration` table.
 #'
 #' @return A modified `MidarExperiment` object with an updated `metrics_calibration` table containing the calibration curve results, including concentrations, LoD, and LoQ values for each feature.
@@ -128,12 +128,12 @@ quantify_by_calibration <- function(data = NULL,
 #'
 #' @export
 
-#TODO: implement error handling  error_missing_annotation
+#TODO: implement error handling  fail_missing_annotation
 calc_calibration_results <- function(data = NULL,
                                     overwrite_metadata = FALSE,
                                     fit_method = c("linear", "quadratic"),
                                     fit_weighting = c("none", "1/x", "1/x^2"),
-                                    error_missing_annotation = TRUE,
+                                    fail_missing_annotation = TRUE,
                                     include_fit_object = FALSE
                                     ) {
 

diff --git a/R/calc-istd-normalization.R b/R/calc-istd-normalization.R
@@ -6,15 +6,15 @@
 #'
 
 #' @param data A `MidarExperiment` object.
-#' @param error_missing_annotation If `TRUE`, the function
+#' @param fail_missing_annotation If `TRUE`, the function
 #'   will raise an error when an ISTD is not defined for one or more features (excluding the ISTDs themselves).
 #'   If `FALSE`, features with missing ISTD annotations will have NA values in the normalized intensities.
 #'
 #' @return A `MidarExperiment` object with normalized feature intensities
 #'
 #' @export
 
-normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
+normalize_by_istd <- function(data = NULL, fail_missing_annotation = TRUE) {
 
   check_data(data)
 
@@ -32,16 +32,15 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
 
   # Check if all ISTDs are defined as distinct feature in the feature metadata
   d_annot <- data@annot_features |> select("feature_id", "istd_feature_id")
-  all_istds <- unique(d_annot$istd_feature_id)
-
+  all_istds <- unique(na.omit(d_annot$istd_feature_id))
 
   if(!all(is.na(all_istds))){
     istd_not_defined <- setdiff(all_istds, d_annot$feature_id)
     if (length(istd_not_defined) > 0) {
-      cli::cli_abort(cli::col_red("All ISTDs must be defined as feature in the feature metadata, {nrow(istd_not_defined)} ISTD(s) were not. Please check metadata."))
+      cli::cli_abort(cli::col_red("{length(istd_not_defined)} ISTD(s) were not defined as individual feature(s). Please check feature metadata."))
     }
   } else {
-    cli::cli_abort(cli::col_red("No ISTDs defined in metadata. Please add feature metadata with ISTDs defined."))
+    cli::cli_abort(cli::col_red("No ISTDs defined in feature metadata. Please define ISTDs for each feature in feature metadata."))
   }
 
   # check if ISTDs are defined for all features (except ISTDs that are not defined for themselves)
@@ -50,10 +49,10 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
     dplyr::semi_join(data@dataset, by = c("feature_id"))
 
   if (nrow(features_no_istd) > 0) {
-    if(!error_missing_annotation)
-      cli::cli_alert_warning(cli::col_yellow("No ISTD was defined for {nrow(features_no_istd)} features, normalized intensities will be `NA` for these. "))
+    if(!fail_missing_annotation)
+      cli::cli_alert_warning(cli::col_yellow("For {nrow(features_no_istd)} feature(s) no ISTD was defined, normalized intensities will be `NA` for these features. "))
     else
-      cli::cli_abort(cli::col_red("No ISTD was defined for {nrow(features_no_istd)} features. Please ammend feature metadata or set `error_missing_annotation = FALSE`."))
+      cli::cli_abort(cli::col_red("For {nrow(features_no_istd)} feature(s) no ISTD was defined. Please ammend feature metadata or set `fail_missing_annotation = FALSE`."))
   }
 
   # Add ISTD intensities to temporary dataset
@@ -63,15 +62,21 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
   # Normalize intensities
   d_temp <- d_temp |>
     dplyr::group_by(.data$istd_feature_id, .data$analysis_id) |>
-    dplyr::mutate(feature_norm_intensity = .data$feature_intensity / .data$feature_intensity[.data$is_istd]) |>
+    dplyr::mutate(
+      feature_norm_intensity = ifelse(
+        !is.na(.data$istd_feature_id),
+        .data$feature_intensity / .data$feature_intensity[.data$is_istd],
+        NA_real_
+      )
+    ) |>
     dplyr::ungroup()
 
   # Add normalized intensities to dataset table
   data@dataset <- data@dataset |>
     dplyr::inner_join(d_temp |> dplyr::select("analysis_id", "feature_id", "feature_norm_intensity"), by = c("analysis_id", "feature_id"))
 
   # Print summary
-  n_features <- length(unique(d_temp$feature_id))
+  n_features <- length(unique(d_temp$feature_id)) - nrow(features_no_istd)
 
   istds <- data@annot_features |> filter(.data$is_istd) |> pull(.data$feature_id)
   n_used_istds <- length(intersect(istds, data@annot_features |> filter(!.data$is_istd) |> pull(.data$quant_istd_feature_id) |> unique()))
@@ -82,8 +87,8 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
 
   # Update status
   data@status_processing <- "ISTD-normalized ata"
-  data <- change_is_normalized(data, TRUE)
-  data <- change_is_quantitated(data, FALSE)
+  data <- update_after_normalization(data, TRUE)
+  data <- update_after_quantitation(data, FALSE)
   data@is_filtered <- FALSE
   data@metrics_qc <- data@metrics_qc[FALSE,]
   data
@@ -101,7 +106,7 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
 #' named `feature_conc`.
 #'
 #' @param data A `MidarExperiment` object
-#' @param error_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: ISTD concentration, ISTD mix volume, and sample amounts for any feature.
+#' @param fail_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: ISTD concentration, ISTD mix volume, and sample amounts for any feature.
 #'   If `FALSE`, missing annotations will be ignored, and resulting feature concentration will be `NA`
 #'
 #' @return A `MidarExperiment` object with the calculated analyte concentrations added to the
@@ -111,7 +116,7 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
 #'
 #' @export
 
-quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
+quantify_by_istd <- function(data = NULL, fail_missing_annotation = TRUE) {
 
   check_data(data)
 
@@ -121,14 +126,14 @@ quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
 
   # Check if sample and ISTD amounts are defined for all analyses
   samples_no_amounts <- data@annot_analyses |>
-    filter(.data$valid_analysis, is.na(.data$sample_amount), is.na(.data$istd_volume)) |>
+    filter(.data$valid_analysis, is.na(.data$sample_amount) | is.na(.data$istd_volume)) |>
     dplyr::semi_join(data@dataset, by = c("analysis_id"))
 
   if (nrow(samples_no_amounts) > 0) {
-    if(!error_missing_annotation)
-      cli::cli_alert_warning(cli::col_yellow("Sample and/or ISTD solution amount(s) for {length(samples_no_amounts)} analyses missing, concentrations of all features for these analyses will be `NA`"))
+    if(!fail_missing_annotation)
+      cli::cli_alert_warning(cli::col_yellow("Sample and/or ISTD solution amount(s) for {nrow(samples_no_amounts)} analyses missing, concentrations of all features for these analyses will be `NA`"))
     else
-      cli::cli_abort(cli::col_red("Sample and/or ISTD amount(s) for {nrow(samples_no_amounts)} analyses missing. Please ammend analysis metadata or set `error_missing_annotation = FALSE`."))
+      cli::cli_abort(cli::col_red("Sample and/or ISTD amount(s) for {nrow(samples_no_amounts)} analyses missing. Please ammend analysis metadata or set `fail_missing_annotation = FALSE`."))
   }
 
   # Add ISTD concentrations and sample amouts to temporary dataset
@@ -143,10 +148,10 @@ quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
 
   # Check if ISTD concentrations in spiked-in mix  are defined for all ISTDs
   if (length(istd_no_conc) > 0){
-    if(!error_missing_annotation) {
+    if(!fail_missing_annotation) {
       cli::cli_alert_warning(cli::col_yellow("Spiked-in concentrations of {length(istd_no_conc)} ISTD(s) missing, calculated concentrations of affected features will be `NA`."))
     } else {
-      cli::cli_abort(cli::col_red("Concentrations of {length(istd_no_conc)} ISTD(s) missing. Please ammend ISTD metadata or set `error_missing_annotation = FALSE`."))
+      cli::cli_abort(cli::col_red("Concentrations of {length(istd_no_conc)} ISTD(s) missing. Please ammend ISTD metadata or set `fail_missing_annotation = FALSE`."))
     }
   }
 
@@ -171,13 +176,13 @@ quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
 
   conc_unit <- get_conc_unit(data@annot_analyses$sample_amount_unit)
 
-  cli_alert_success(cli::col_green("{n_features_with_conc} feature concentrations calculated based on {n_istd} ISTDs and sample amounts of {get_analysis_count(data)} analyses."))
+  cli_alert_success(cli::col_green("{n_features_with_conc} feature concentrations calculated based on {n_istd} ISTDs and sample amounts of {get_analysis_count(data) - nrow(samples_no_amounts)} analyses."))
   cli::cli_alert_info("Concentrations are given in {conc_unit}.")
 
   data@status_processing <- "ISTD-quantitated data"
 
-  data <- change_is_normalized(data, TRUE)
-  data <- change_is_quantitated(data, TRUE)
+  data <- update_after_normalization(data, TRUE)
+  data <- update_after_quantitation(data, TRUE)
   data@is_filtered <- FALSE
   data@metrics_qc <- data@metrics_qc[FALSE,]
 

diff --git a/R/correct-drift-batch.R b/R/correct-drift-batch.R
@@ -407,7 +407,7 @@ corr_drift_fun <- function(data = NULL,
     last = " and ", width = 160)
 
 
-  nfeat <- get_feature_count(data, isistd = FALSE)
+  nfeat <- get_feature_count(data, is_istd = FALSE)
 
   if(conditional_correction & within_batch)
     count_feature_text <- glue::glue("of at least one batch for {features_corrected}
@@ -454,9 +454,9 @@ corr_drift_fun <- function(data = NULL,
 
   # Invalidate downstream processed data
   if(variable == "feature_intensity"){
-    data <- change_is_normalized(data, FALSE)
+    data <- update_after_normalization(data, FALSE)
   } else if(variable == "feature_norm_intensity") {
-    data <- change_is_quantitated(data, FALSE)
+    data <- update_after_quantitation(data, FALSE)
   }
 
   data@status_processing <- "Drift-corrected concentrations"
@@ -746,7 +746,7 @@ correct_batch_centering <- function(data = NULL,
       cv_diff_text = format(round(.data$cv_diff_median, 1), nsmall = 1)
     )
 
-  nfeat <- get_feature_count(data, isistd = FALSE)
+  nfeat <- get_feature_count(data, is_istd = FALSE)
 
   # Print summary
   if (data@var_drift_corrected[[variable]]) {

diff --git a/R/correct-isotope.R b/R/correct-isotope.R
@@ -106,7 +106,7 @@ correct_interferences <- function(data = NULL, variable = "feature_intensity") {
 
   data@is_isotope_corr <- TRUE
   data@status_processing <- "Isotope-corrected raw data"
-  data <- change_is_normalized(data, FALSE)
+  data <- update_after_normalization(data, FALSE)
   data@var_drift_corrected <- c(feature_intensity = FALSE, feature_norm_intensity = FALSE, feature_conc = FALSE)
   data@var_drift_corrected <- c(feature_intensity = FALSE, feature_norm_intensity = FALSE, feature_conc = FALSE)
   data@is_filtered <- FALSE

diff --git a/R/data-import.R b/R/data-import.R
@@ -151,30 +151,31 @@ import_data_main <- function(data = NULL, path, import_function, file_ext, silen
 
   # VERIFY DATA, i.e. analysis_ids, feature_ids, and values are replicated ===
   ## which can be result of multiple imports of the same/overlapping data or due to parsing error
-
-  if (nrow(d_raw) > nrow(d_raw |> distinct(.data$analysis_id, .data$feature_id, .keep_all = FALSE))) {
+  n_idpairs_distinct <- d_raw |> select("analysis_id", "feature_id") |>  distinct(.keep_all = FALSE) |> nrow()
+  if (nrow(d_raw) > n_idpairs_distinct) {
     has_duplicated_id <- TRUE
-    if (nrow(d_raw) > nrow(d_raw |> distinct(.data$analysis_id, .data$feature_id, .keep_all = TRUE))) {
-      has_duplicated_id_values <- TRUE
+
+    n_idvalpairs_distinct <- d_raw |> select("analysis_id", "feature_id", any_of(c("feature_area", "feature_rt", "feature_intensity", "feature_height", "feature_conc", "feauture_norm_intensity"))) |>  distinct(.keep_all = FALSE) |> nrow()
+
+    if (n_idvalpairs_distinct == n_idpairs_distinct ) {
+      has_duplicated_values <- TRUE
     } else {
-      has_duplicated_id_values <- FALSE
+      has_duplicated_values <- FALSE
     }
   } else {
     has_duplicated_id <- FALSE
   }
 
   if (has_duplicated_id) {
-    if (has_duplicated_id_values) {
-      cli::cli_abort(glue::glue("Imported data contains replicated reportings (analysis and feature pairs) with **identical** intensity values. Please check imported dataset(s)."))
+    if (has_duplicated_values) {
+      cli::cli_abort(glue::glue("Imported data contains duplicated reportings (analysis and feature pairs) with {cli::style_italic('identical')} feature variable values. Please check imported dataset(s)."))
     } else {
-      cli::cli_abort(glue::glue("Imported data contains replicated reportings (analysis and feature pairs) with **different intensity values. Please check imported dataset(s)."))
+      cli::cli_abort(glue::glue("Imported data contains duplicated reportings (analysis and feature pairs) with {cli::style_italic('different')} feature variable values. Please check imported dataset(s)."))
     }
   }
 
   data@dataset_orig <- dplyr::bind_rows(pkg.env$table_templates$dataset_orig_template, d_raw)
 
-
-
   # TODO: excl_unmatched_analyses below
 
   check_integrity(data, excl_unmatched_analyses = FALSE)
@@ -752,16 +753,6 @@ read_data_table <- function(path, value_type = c("area", "height", "intensity",
 }
 
 
-#' @title internal method to read csv files
-#' @param path csv file name
-#' @return tibble table
-#' @noRd
-# TODO remove this test function
-.test_mult <- function(a, b) {
-  nms <- c(1, 2, 3)
-  n_col <- length(nms)
-  rep("numeric", n_col)
-}