external calibration work

SLINGhub · Dec 18, 2024 · 4009f74 · 4009f74
1 parent 908ff83
commit 4009f74
Show file tree

Hide file tree

Showing 19 changed files with 435 additions and 39 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -63,6 +63,7 @@ RoxygenNote: 7.3.2
 LazyData: true
 LazyDataCompression: xz
 Collate: 
+    'calc-calibrations.R'
     'calc-istd-normalization.R'
     'midar-global-definitions.R'
     'classes.R'
@@ -82,12 +83,12 @@ Collate:
     'metadata-access.R'
     'metadata-read.R'
     'midar-package.R'
+    'plots-calibcurves.R'
     'plots-eda.R'
     'plots-qc-cv.R'
     'plots-qc-filtering.R'
     'plots-qc-pca.R'
     'plots-qc-responsecurves.R'
-    'plots-calibcurves.R'
     'plots-qc-runorder.R'
     'qc-filtering.R'
     'tibble-classes.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -9,6 +9,7 @@ export("metadata_responsecurves<-")
 export(MidarExperiment)
 export(add_metadata)
 export(analysis_type)
+export(calc_calibration_results)
 export(combine_experiments)
 export(corr_drift_fun)
 export(correct_batch_centering)
@@ -30,12 +31,14 @@ export(import_metadata_analyses)
 export(import_metadata_features)
 export(import_metadata_istds)
 export(import_metadata_midarxlm)
+export(import_metadata_qcconcentrations)
 export(lipidomics_get_lipid_class_names)
 export(metadata_responsecurves)
 export(normalize_by_istd)
 export(parse_masshunter_csv)
 export(parse_mrmkit_result)
 export(parse_plain_csv)
+export(plot_calibrationcurves)
 export(plot_cv_normalization)
 export(plot_pca)
 export(plot_qc_summary_byclass)
@@ -46,6 +49,7 @@ export(plot_runscatter)
 export(plot_runsequence)
 export(plot_x_vs_y)
 export(qc_calc_metrics)
+export(quantify_by_calibration)
 export(quantify_by_istd)
 export(report_write_qc_metrics)
 export(save_dataset_csv)
@@ -98,6 +102,7 @@ importFrom(dplyr,inner_join)
 importFrom(dplyr,join_by)
 importFrom(dplyr,left_join)
 importFrom(dplyr,mutate)
+importFrom(dplyr,n)
 importFrom(dplyr,pick)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)

diff --git a/R/calc-calibrations.R b/R/calc-calibrations.R
@@ -1,7 +1,7 @@
 #' Calculate concentrations based on external calibration
 #'
-#' This function calculates the concentrations of analytes in samples based on a calibration curve derived from external calibration samples.
-#' Concentrations are determined using ISTD-normalized intensities, and the determined concentration unit is based on the `sample_amount_unit` provided in the analysis metadata.
+
+#' Concentrations are determined using ISTD-normalized intensities and external calibration curves. The determined concentration unit is based on values defined in the column `concentration_unit` of the qc_concentration metadata.
 #'
 #' The fit model (e.g., linear or quadratic) and the weighting method (e.g., none, 1/x, 1/x^2) used for the calibration curve can either be defined for each analyte in the metadata, or, if missing, the default values provided as arguments will be used.
 #'
@@ -16,6 +16,82 @@
 #'
 #' @export
 quantify_by_calibration <- function(data = NULL,
+                                     overwrite_metadata_fit = FALSE,
+                                     fit_method = c("linear", "quadratic"),
+                                     fit_weighting = c(NA, "none", "1/x", "1/x^2"),
+                                     missing_error = TRUE,
+                                     ignore_istd = FALSE) {
+
+  check_data(data)
+
+  rlang::arg_match(fit_method, c(NA, "linear", "quadratic"))
+  rlang::arg_match(fit_weighting, c(NA, "none", "1/x", "1/x^2"))
+
+  data <- calc_calibration_results(data = data,
+                                   overwrite_metadata_fit = overwrite_metadata_fit,
+                                   fit_method = fit_method,
+                                   fit_weighting = fit_weighting,
+                                   missing_error = missing_error,
+                                   ignore_istd = ignore_istd)
+
+  d_stats_calc <- data@metrics_calibration |>
+    dplyr::select("feature_id", "fit_model", "coef_a_cal_1", "coef_b_cal_1", "coef_c_cal_1")
+
+  data@dataset <- data@dataset |>
+    left_join(d_stats_calc, by = c("feature_id" = "feature_id")) |>
+    mutate(feature_conc = case_when(
+      fit_model != "quadratic" ~ (.data$feature_norm_intensity - .data$coef_b_cal_1) / .data$coef_a_cal_1,
+      TRUE ~ purrr::pmap_dbl(
+        list(coef_c_cal_1, coef_b_cal_1, coef_a_cal_1, feature_norm_intensity),
+        function(c, b, a, x) {
+          # Check for invalid coefficients that would lead to a degenerate polynomial
+          if (is.na(c) || is.na(b) || is.na(a) || is.na(x) || a == 0) {
+            return(NA_real_)  # Return NA if coefficients are invalid
+          }
+
+          # Apply polyroot and extract the real root
+          root <- tryCatch({
+            polyroot(c(c - x, b, a))
+          }, error = function(e) {
+            # Catch any errors in polyroot and return NA in case of error
+            return(NA_complex_)
+          })
+
+          # If the root is a complex number, check if it is valid (not NA)
+          if (length(root) > 0 && !is.na(Re(root[1]))) {
+            return(Re(root[1]))  # Return the real part of the first root
+          } else {
+            return(NA_real_)  # Return NA if no valid root
+          }
+        }
+      )
+    )) |> select(-c("coef_a_cal_1", "coef_b_cal_1", "coef_c_cal_1"))
+
+  data
+}
+
+
+
+
+
+#' Calculate external calibration curve results
+#'
+
+#' Concentrations are determined using ISTD-normalized intensities and external calibration curves. The determined concentration unit is based on values defined in the column `concentration_unit` of the qc_concentration metadata.
+#'
+#' The fit model (e.g., linear or quadratic) and the weighting method (e.g., none, 1/x, 1/x^2) used for the calibration curve can either be defined for each analyte in the metadata, or, if missing, the default values provided as arguments will be used.
+#'
+#' @param data A `MidarExperiment` object
+#' @param overwrite_metadata_fit A logical value (`TRUE` or `FALSE`). If `TRUE`, the function will ignore any fit method and weighting settings defined in the metadata and use the `fit_method` and `fit_weighting` values for all analytes.
+#' @param fit_method A character string indicating the default regression fit method to use for the calibration curve. Must be one of `"linear"` or `"quadratic"`. This method will be used if no specific fit method is defined for a feature in the metadata.
+#' @param fit_weighting A character string indicating the default weighting method for the regression points in the calibration curve. Must be one of `"none"`, `"1/x"`, or `"1/x^2"`. If no specific weighting method is defined for a feature in the metadata, this method will be used.
+#' @param missing_error A logical value (`TRUE` or `FALSE`). If `TRUE`, an error will be raised if one or more ISTD concentrations and sample/ISTD amounts are missing. Default is `TRUE`.
+#' @param ignore_istd A logical value (`TRUE` or `FALSE`). If `TRUE`, ISTD values with missing concentrations that are not used in any feature quantification will be ignored. Default is `FALSE`.
+#'
+#' @return A modified `MidarExperiment` object with updated concentration values for the analytes, based on the calibration curve calculations.
+#'
+#' @export
+calc_calibration_results <- function(data = NULL,
                                     overwrite_metadata_fit = FALSE,
                                     fit_method = c("linear", "quadratic"),
                                     fit_weighting = c(NA, "none", "1/x", "1/x^2"),
@@ -32,56 +108,86 @@ quantify_by_calibration <- function(data = NULL,
   calc_lm <- function(dt){
     tryCatch(
       {
-        res <- lm(formula = dt$formula[1], weights = weight, data = dt, na.action = na.exclude)
+        dt <- dt |>
+          mutate(weight = case_when(
+            fit_weighting[1] == "none" ~ 1,
+            fit_weighting[1] == "1/x" ~ 1 / concentration,
+            fit_weighting[1] == "1/x^2" ~ 1 / concentration^2,
+            fit_weighting[1] == "1/sqrt(x)" ~ 1 / sqrt(concentration),
+            TRUE ~ NA_real_  # Default case to handle any unexpected values
+          ))
+
+        formula <- ifelse(dt$fit_method[1] == "linear",
+                                  "feature_norm_intensity ~ concentration",
+                                  "feature_norm_intensity ~ poly(concentration, 2, raw = TRUE)")
+
+        res <- lm(formula = formula, weights = weight, data = dt, na.action = na.exclude)
 
         r.squared <- summary(res)$r.squared
-        slope <- res$coefficients[[2]]
-        intercept <- res$coefficients[1]
         sigma <- summary(res)$sigma
-        return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], r.squared = r.squared , slope = slope, intercept = intercept, sigma = sigma))
-
+        if(dt$fit_method[1] == "quadratic"){
+          return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model =  dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = r.squared , coef_a = res$coefficients[[3]], coef_b = res$coefficients[[2]], coef_c = res$coefficients[[1]], sigma = sigma))
+        } else {
+          return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model =  dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = r.squared , coef_a = res$coefficients[[2]], coef_b = res$coefficients[1], coef_c = NA_real_, sigma = sigma))
+        }
       },
       error = function(e) {
-        return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], r.squared = NA_real_ , slope = NA_real_, intercept = NA_real_))
+        if(dt$fit_method[1] == "quadratic"){
+          return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model =  dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = NA_real_ , coef_a = NA_real_, coef_b = NA_real_, coef_c = res$coefficients[[1]], sigma = NA_real_))
+        } else {
+          return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model =  dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = NA_real_ , coef_a = NA_real_, coef_b = NA_real_, coef_c = NA_real_, sigma = NA_real_))
+        }
       }
     )
   }
 
+  # mult_lowest_calib refert to multiplication factor of the lowest calibration
+  # point used when calculate LoD and LoQ with a quadratic model
+  # TODO: add reference
+  add_quantlimits <- function(data, mult_lowest_calib = 2) {
+
+
+    data <- data |>
+        mutate(slope_at_conc = if_else(.data$fit_model == "quadratic",
+                                       .data$coef_a + 2 * .data$coef_b * mult_lowest_calib,
+                                       .data$coef_a))
+
+    data <- data |>
+      mutate(
+        lod = 3 * .data$sigma / slope_at_conc,
+        loq = 10 * .data$sigma / slope_at_conc
+      )
+
+    data
+  }
+
+
   d_calib <- data@dataset |>
     dplyr::ungroup() |>
     dplyr::select(tidyselect::any_of(
       c("analysis_id", "sample_id", "qc_type", "feature_id", "is_istd", "is_quantifier", "feature_norm_intensity")
     )) |>
     filter(str_detect(qc_type, "^CAL$"), !is_istd, is_quantifier) |>
     dplyr::left_join(data@annot_qcconcentrations, by = c("sample_id" = "sample_id", "feature_id" = "feature_id")) |>
-    mutate(curve_id = "1")
-
-  d_calib <- d_calib |>
-    mutate(weight = case_when(
-      fit_weighting == "none" ~ 1,
-      fit_weighting == "1/x" ~ 1 / concentration,
-      fit_weighting == "1/x^2" ~ 1 / concentration^2,
-      fit_weighting == "1/sqrt(x)" ~ 1 / sqrt(concentration),
-      TRUE ~ NA_real_  # Default case to handle any unexpected values
-    ))
+    mutate(curve_id = "1") |>
+    mutate(fit_method = fit_method,
+           fit_weighting = fit_weighting)
 
-  d_calib$formula <- ifelse(fit_method == "linear", "feature_norm_intensity ~ concentration", "feature_norm_intensity ~ concentration + I(concentration^2)")
 
 
     d_calib <- d_calib |>
       dplyr::group_split(.data$feature_id, .data$curve_id)
 
+  d_stats <- map(d_calib, function(x) calc_lm(x)) |> bind_rows()
 
-  d_stats <- map(d_calib, function(x) calc_lm(x))
 
-  d_stats <- d_stats |> bind_rows() |>
-    dplyr::mutate(slopenorm = .data$slope,
-                  y0 = .data$intercept,
-                  lod =  3 * .data$sigma / .data$slope,
-                  loq =  10 * .data$sigma / .data$slope) |>
-    dplyr::select("feature_id", "curve_id", r2 = "r.squared", "slope", "y0", "sigma", "lod", "loq") |>
-    tidyr::pivot_wider(names_from = "curve_id", values_from = c("r2", "slope", "y0", "sigma", "lod", "loq"), names_prefix = "cal_")
+  d_stats <- add_quantlimits(d_stats, mult_lowest_calib = 3)
 
+  d_stats <- d_stats |>
+    dplyr::select("feature_id", "fit_model", "weighting", "lowest_cal", "curve_id", r2 = "r.squared", "coef_a", "coef_b", "coef_c", "sigma", "lowest_cal", "lod", "loq") |>
+    tidyr::pivot_wider(names_from = "curve_id", values_from = c("r2", "coef_a", "coef_b", "coef_c", "sigma", "lowest_cal", "lod", "loq"), names_prefix = "cal_")
+  data@metrics_calibration <- d_stats
 
+  data
 
   }
diff --git a/R/classes.R b/R/classes.R
@@ -22,6 +22,7 @@
 #' @slot annot_studysamples Annotation of study samples. Required fields:
 #' @slot annot_batches Annotation of batches. Required fields:
 #' @slot metrics_qc QC information for each measured feature
+#' @slot metrics_calibration Calibration metrics calculated from external calibration curves for each measured feature
 #' @slot parameters_processing Values of parameters used for the different processing steps
 #' @slot status_processing Status within the data processing workflow
 #' @slot is_istd_normalized Flag if data has been ISTD normalized
@@ -52,6 +53,7 @@ setClass("MidarExperiment",
     annot_studysamples = "tbl_df",
     annot_batches = "tbl_df",
     metrics_qc = "tbl_df",
+    metrics_calibration = "tbl_df",
     parameters_processing = "tbl_df",
     status_processing = "character",
     is_istd_normalized = "logical",
@@ -78,6 +80,7 @@ setClass("MidarExperiment",
     annot_studysamples = tibble::tibble(),
     annot_batches = tibble::tibble(),
     metrics_qc = tibble::tibble(),
+    metrics_calibration = tibble::tibble(),
     parameters_processing = pkg.env$table_templates$parameters_processing_template,
     status_processing = "No Data",
     is_isotope_corr = FALSE,

diff --git a/R/data-export.R b/R/data-export.R
@@ -116,6 +116,41 @@ save_report_xlsx <- function(data = NULL, path) {
 }
 
 
+#'  @export
+get_calibration_report <- function(data, qc_types, with_lod = TRUE, with_loq = TRUE, with_bias = TRUE,  with_coefficients = TRUE, with_sigma = TRUE){
+
+  d_qc_summary <- data@dataset |>
+    filter(.data$qc_type %in% qc_types, !.data$is_istd, .data$is_quantifier) |>
+    select("analysis_id", "qc_type", "sample_id", "feature_id", "feature_conc") |>
+    left_join(data@annot_qcconcentrations |> select("sample_id", "feature_id", target_concentration = "concentration"), by = c("sample_id", "feature_id")) |>
+    relocate(.data$target_concentration, .after = .data$feature_id) |>
+    mutate(
+      bias_perc = (.data$feature_conc - .data$target_concentration) / .data$target_concentration * 100
+    ) |>
+    summarise(
+      n = dplyr::n(),
+      conc_target = mean(.data$target_concentration, na.rm = FALSE),
+      conc_mean = mean(.data$feature_conc, na.rm = TRUE),
+      conc_sd = sd(.data$feature_conc, na.rm = TRUE),
+      bias_perc_mean = mean(.data$bias_perc, na.rm = TRUE),
+      bias_perc_sd = sd(.data$bias_perc, na.rm = TRUE),
+      cv_intra = .data$conc_sd / .data$conc_mean * 100,
+      .by = c("sample_id", "qc_type", "feature_id")) |>
+    select("feature_id", "qc_type", "bias_perc_mean", "bias_perc_sd", "cv_intra") |>
+    pivot_wider(names_from = "qc_type", values_from = c("bias_perc_mean", "bias_perc_sd", "cv_intra"),
+                names_sort = TRUE, names_glue = "{qc_type}_{.value}")
+
+
+  d_qc_summary <- d_qc_summary |> select(order(colnames(d_qc_summary))) |>
+    relocate(.data$feature_id, .before  = 1) |>
+    left_join(data@metrics_calibration |> select("feature_id", model = "fit_model", weighting = "weighting", r2 = "r2_cal_1", Cal_A_nM = "lowest_cal_cal_1", LoD_nM = "lod_cal_1", LoQ_nM = "loq_cal_1"), by = c("feature_id"))
+
+  d_qc_summary |>
+    arrange(.data$feature_id)
+}
+
+
+
 
 #' Export any parameter to a wide-format table
 #'

diff --git a/R/midar-package.R b/R/midar-package.R
@@ -6,7 +6,7 @@
 #' @importFrom glue glue
 #' @importFrom ggplot2 ggplot aes Stat unit geom_point geom_line geom_abline margin coord_flip geom_text geom_bar geom_segment scale_y_discrete scale_x_discrete scale_y_log10 xlab ylab vars theme facet_wrap position_jitter scale_color_manual labs scale_fill_manual theme_light geom_vline geom_rect geom_vline aes_string scale_fill_manual scale_shape_manual expand_limits geom_smooth geom_hline scale_y_continuous element_blank element_text theme_bw stat_ellipse element_rect element_line expansion scale_x_continuous geom_boxplot ggtitle position_dodge
 #' @importFrom tidyselect vars_select_helpers everything starts_with
-#' @importFrom dplyr select filter group_by desc ungroup mutate summarise slice if_else rowwise arrange left_join right_join inner_join full_join anti_join semi_join join_by row_number cur_group_id case_when distinct rename relocate pull across all_of any_of if_any ends_with if_all case_match case_when bind_rows group_split pick
+#' @importFrom dplyr n select filter group_by desc ungroup mutate summarise slice if_else rowwise arrange left_join right_join inner_join full_join anti_join semi_join join_by row_number cur_group_id case_when distinct rename relocate pull across all_of any_of if_any ends_with if_all case_match case_when bind_rows group_split pick
 #' @importFrom stringr str_remove str_replace str_replace_all str_detect str_trim str_extract str_squish fixed
 #' @importFrom tibble column_to_rownames as_tibble tibble
 #' @importFrom tidyr unite drop_na pivot_wider nest unnest replace_na

diff --git a/R/qc-filtering.R b/R/qc-filtering.R
@@ -8,9 +8,10 @@
 #' @param with_norm_intensity Include normalized intensity statistics of features. Default is TRUE.
 #' @param with_conc Include concentration statistics of features. Default is TRUE.
 #' @param with_linearity Include linearity statistics of response curves. This will increase the calculation time. Default is TRUE.
+#' @param with_calibration Include external calibration results if available.
 #' @return MidarExperiment object
 #' @export
-qc_calc_metrics <- function(data = NULL, batch_medians, with_norm_intensity = TRUE, with_conc = TRUE, with_linearity = TRUE ) {
+qc_calc_metrics <- function(data = NULL, batch_medians, with_norm_intensity = TRUE, with_conc = TRUE, with_linearity = TRUE, with_calibration = TRUE) {
   check_data(data)
    # TODO: remove later when fixed
   if (tolower(data@analysis_type) == "lipidomics") data <- lipidomics_get_lipid_class_names(data)
@@ -150,11 +151,19 @@ qc_calc_metrics <- function(data = NULL, batch_medians, with_norm_intensity = TR
       left_join(d_stats_var_final, by = "feature_id") |>
       relocate("feature_id", "feature_class", "in_data", "valid_feature", "is_quantifier", "precursor_mz", "product_mz", "collision_energy")
 
-  if (with_linearity & "RQC" %in% data@dataset$qc_type) {
-    d_rqc_stats <- get_response_curve_stats(data,  with_staturation_stats = FALSE, limit_to_rqc = TRUE)
-    data@metrics_qc <- data@metrics_qc |>
-      dplyr::left_join(d_rqc_stats, by = "feature_id")
-  }
+    if (with_calibration & nrow(data@metrics_calibration) > 0) {
+      data@metrics_qc <- data@metrics_qc |>
+        dplyr::left_join(data@metrics_calibration, by = "feature_id")
+    }
+
+    if (with_linearity & "RQC" %in% data@dataset$qc_type) {
+      d_rqc_stats <- get_response_curve_stats(data,  with_staturation_stats = FALSE, limit_to_rqc = TRUE)
+      data@metrics_qc <- data@metrics_qc |>
+        dplyr::left_join(d_rqc_stats, by = "feature_id")
+    }
+
+
+
 
   data
 }

diff --git a/data/dataset_plasma_lipidomics.rda b/data/dataset_plasma_lipidomics.rda
diff --git a/man/MidarExperiment-class.Rd b/man/MidarExperiment-class.Rd