Skip to content

Commit

Permalink
external calibration work
Browse files Browse the repository at this point in the history
  • Loading branch information
burlab committed Dec 18, 2024
1 parent 908ff83 commit 4009f74
Show file tree
Hide file tree
Showing 19 changed files with 435 additions and 39 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ RoxygenNote: 7.3.2
LazyData: true
LazyDataCompression: xz
Collate:
'calc-calibrations.R'
'calc-istd-normalization.R'
'midar-global-definitions.R'
'classes.R'
Expand All @@ -82,12 +83,12 @@ Collate:
'metadata-access.R'
'metadata-read.R'
'midar-package.R'
'plots-calibcurves.R'
'plots-eda.R'
'plots-qc-cv.R'
'plots-qc-filtering.R'
'plots-qc-pca.R'
'plots-qc-responsecurves.R'
'plots-calibcurves.R'
'plots-qc-runorder.R'
'qc-filtering.R'
'tibble-classes.R'
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export("metadata_responsecurves<-")
export(MidarExperiment)
export(add_metadata)
export(analysis_type)
export(calc_calibration_results)
export(combine_experiments)
export(corr_drift_fun)
export(correct_batch_centering)
Expand All @@ -30,12 +31,14 @@ export(import_metadata_analyses)
export(import_metadata_features)
export(import_metadata_istds)
export(import_metadata_midarxlm)
export(import_metadata_qcconcentrations)
export(lipidomics_get_lipid_class_names)
export(metadata_responsecurves)
export(normalize_by_istd)
export(parse_masshunter_csv)
export(parse_mrmkit_result)
export(parse_plain_csv)
export(plot_calibrationcurves)
export(plot_cv_normalization)
export(plot_pca)
export(plot_qc_summary_byclass)
Expand All @@ -46,6 +49,7 @@ export(plot_runscatter)
export(plot_runsequence)
export(plot_x_vs_y)
export(qc_calc_metrics)
export(quantify_by_calibration)
export(quantify_by_istd)
export(report_write_qc_metrics)
export(save_dataset_csv)
Expand Down Expand Up @@ -98,6 +102,7 @@ importFrom(dplyr,inner_join)
importFrom(dplyr,join_by)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pick)
importFrom(dplyr,pull)
importFrom(dplyr,relocate)
Expand Down
160 changes: 133 additions & 27 deletions R/calc-calibrations.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Calculate concentrations based on external calibration
#'
#' This function calculates the concentrations of analytes in samples based on a calibration curve derived from external calibration samples.
#' Concentrations are determined using ISTD-normalized intensities, and the determined concentration unit is based on the `sample_amount_unit` provided in the analysis metadata.

#' Concentrations are determined using ISTD-normalized intensities and external calibration curves. The determined concentration unit is based on values defined in the column `concentration_unit` of the qc_concentration metadata.
#'
#' The fit model (e.g., linear or quadratic) and the weighting method (e.g., none, 1/x, 1/x^2) used for the calibration curve can either be defined for each analyte in the metadata, or, if missing, the default values provided as arguments will be used.
#'
Expand All @@ -16,6 +16,82 @@
#'
#' @export
quantify_by_calibration <- function(data = NULL,
overwrite_metadata_fit = FALSE,
fit_method = c("linear", "quadratic"),
fit_weighting = c(NA, "none", "1/x", "1/x^2"),
missing_error = TRUE,
ignore_istd = FALSE) {

check_data(data)

rlang::arg_match(fit_method, c(NA, "linear", "quadratic"))
rlang::arg_match(fit_weighting, c(NA, "none", "1/x", "1/x^2"))

data <- calc_calibration_results(data = data,
overwrite_metadata_fit = overwrite_metadata_fit,
fit_method = fit_method,
fit_weighting = fit_weighting,
missing_error = missing_error,
ignore_istd = ignore_istd)

d_stats_calc <- data@metrics_calibration |>
dplyr::select("feature_id", "fit_model", "coef_a_cal_1", "coef_b_cal_1", "coef_c_cal_1")

data@dataset <- data@dataset |>
left_join(d_stats_calc, by = c("feature_id" = "feature_id")) |>
mutate(feature_conc = case_when(
fit_model != "quadratic" ~ (.data$feature_norm_intensity - .data$coef_b_cal_1) / .data$coef_a_cal_1,
TRUE ~ purrr::pmap_dbl(
list(coef_c_cal_1, coef_b_cal_1, coef_a_cal_1, feature_norm_intensity),
function(c, b, a, x) {
# Check for invalid coefficients that would lead to a degenerate polynomial
if (is.na(c) || is.na(b) || is.na(a) || is.na(x) || a == 0) {
return(NA_real_) # Return NA if coefficients are invalid
}

# Apply polyroot and extract the real root
root <- tryCatch({
polyroot(c(c - x, b, a))
}, error = function(e) {
# Catch any errors in polyroot and return NA in case of error
return(NA_complex_)
})

# If the root is a complex number, check if it is valid (not NA)
if (length(root) > 0 && !is.na(Re(root[1]))) {
return(Re(root[1])) # Return the real part of the first root
} else {
return(NA_real_) # Return NA if no valid root
}
}
)
)) |> select(-c("coef_a_cal_1", "coef_b_cal_1", "coef_c_cal_1"))

data
}





#' Calculate external calibration curve results
#'

#' Concentrations are determined using ISTD-normalized intensities and external calibration curves. The determined concentration unit is based on values defined in the column `concentration_unit` of the qc_concentration metadata.
#'
#' The fit model (e.g., linear or quadratic) and the weighting method (e.g., none, 1/x, 1/x^2) used for the calibration curve can either be defined for each analyte in the metadata, or, if missing, the default values provided as arguments will be used.
#'
#' @param data A `MidarExperiment` object
#' @param overwrite_metadata_fit A logical value (`TRUE` or `FALSE`). If `TRUE`, the function will ignore any fit method and weighting settings defined in the metadata and use the `fit_method` and `fit_weighting` values for all analytes.
#' @param fit_method A character string indicating the default regression fit method to use for the calibration curve. Must be one of `"linear"` or `"quadratic"`. This method will be used if no specific fit method is defined for a feature in the metadata.
#' @param fit_weighting A character string indicating the default weighting method for the regression points in the calibration curve. Must be one of `"none"`, `"1/x"`, or `"1/x^2"`. If no specific weighting method is defined for a feature in the metadata, this method will be used.
#' @param missing_error A logical value (`TRUE` or `FALSE`). If `TRUE`, an error will be raised if one or more ISTD concentrations and sample/ISTD amounts are missing. Default is `TRUE`.
#' @param ignore_istd A logical value (`TRUE` or `FALSE`). If `TRUE`, ISTD values with missing concentrations that are not used in any feature quantification will be ignored. Default is `FALSE`.
#'
#' @return A modified `MidarExperiment` object with updated concentration values for the analytes, based on the calibration curve calculations.
#'
#' @export
calc_calibration_results <- function(data = NULL,
overwrite_metadata_fit = FALSE,
fit_method = c("linear", "quadratic"),
fit_weighting = c(NA, "none", "1/x", "1/x^2"),
Expand All @@ -32,56 +108,86 @@ quantify_by_calibration <- function(data = NULL,
calc_lm <- function(dt){
tryCatch(
{
res <- lm(formula = dt$formula[1], weights = weight, data = dt, na.action = na.exclude)
dt <- dt |>
mutate(weight = case_when(
fit_weighting[1] == "none" ~ 1,
fit_weighting[1] == "1/x" ~ 1 / concentration,
fit_weighting[1] == "1/x^2" ~ 1 / concentration^2,
fit_weighting[1] == "1/sqrt(x)" ~ 1 / sqrt(concentration),
TRUE ~ NA_real_ # Default case to handle any unexpected values
))

formula <- ifelse(dt$fit_method[1] == "linear",
"feature_norm_intensity ~ concentration",
"feature_norm_intensity ~ poly(concentration, 2, raw = TRUE)")

res <- lm(formula = formula, weights = weight, data = dt, na.action = na.exclude)

r.squared <- summary(res)$r.squared
slope <- res$coefficients[[2]]
intercept <- res$coefficients[1]
sigma <- summary(res)$sigma
return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], r.squared = r.squared , slope = slope, intercept = intercept, sigma = sigma))

if(dt$fit_method[1] == "quadratic"){
return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model = dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = r.squared , coef_a = res$coefficients[[3]], coef_b = res$coefficients[[2]], coef_c = res$coefficients[[1]], sigma = sigma))
} else {
return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model = dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = r.squared , coef_a = res$coefficients[[2]], coef_b = res$coefficients[1], coef_c = NA_real_, sigma = sigma))
}
},
error = function(e) {
return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], r.squared = NA_real_ , slope = NA_real_, intercept = NA_real_))
if(dt$fit_method[1] == "quadratic"){
return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model = dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = NA_real_ , coef_a = NA_real_, coef_b = NA_real_, coef_c = res$coefficients[[1]], sigma = NA_real_))
} else {
return(list(feature_id = dt$feature_id[1], curve_id = dt$curve_id[1], fit_model = dt$fit_method[1], weighting = dt$fit_weighting[1], lowest_cal = sort(dt$concentration[dt$concentration != 0])[1], r.squared = NA_real_ , coef_a = NA_real_, coef_b = NA_real_, coef_c = NA_real_, sigma = NA_real_))
}
}
)
}

# mult_lowest_calib refert to multiplication factor of the lowest calibration
# point used when calculate LoD and LoQ with a quadratic model
# TODO: add reference
add_quantlimits <- function(data, mult_lowest_calib = 2) {


data <- data |>
mutate(slope_at_conc = if_else(.data$fit_model == "quadratic",
.data$coef_a + 2 * .data$coef_b * mult_lowest_calib,
.data$coef_a))

data <- data |>
mutate(
lod = 3 * .data$sigma / slope_at_conc,
loq = 10 * .data$sigma / slope_at_conc
)

data
}


d_calib <- data@dataset |>
dplyr::ungroup() |>
dplyr::select(tidyselect::any_of(
c("analysis_id", "sample_id", "qc_type", "feature_id", "is_istd", "is_quantifier", "feature_norm_intensity")
)) |>
filter(str_detect(qc_type, "^CAL$"), !is_istd, is_quantifier) |>
dplyr::left_join(data@annot_qcconcentrations, by = c("sample_id" = "sample_id", "feature_id" = "feature_id")) |>
mutate(curve_id = "1")

d_calib <- d_calib |>
mutate(weight = case_when(
fit_weighting == "none" ~ 1,
fit_weighting == "1/x" ~ 1 / concentration,
fit_weighting == "1/x^2" ~ 1 / concentration^2,
fit_weighting == "1/sqrt(x)" ~ 1 / sqrt(concentration),
TRUE ~ NA_real_ # Default case to handle any unexpected values
))
mutate(curve_id = "1") |>
mutate(fit_method = fit_method,
fit_weighting = fit_weighting)

d_calib$formula <- ifelse(fit_method == "linear", "feature_norm_intensity ~ concentration", "feature_norm_intensity ~ concentration + I(concentration^2)")


d_calib <- d_calib |>
dplyr::group_split(.data$feature_id, .data$curve_id)

d_stats <- map(d_calib, function(x) calc_lm(x)) |> bind_rows()

d_stats <- map(d_calib, function(x) calc_lm(x))

d_stats <- d_stats |> bind_rows() |>
dplyr::mutate(slopenorm = .data$slope,
y0 = .data$intercept,
lod = 3 * .data$sigma / .data$slope,
loq = 10 * .data$sigma / .data$slope) |>
dplyr::select("feature_id", "curve_id", r2 = "r.squared", "slope", "y0", "sigma", "lod", "loq") |>
tidyr::pivot_wider(names_from = "curve_id", values_from = c("r2", "slope", "y0", "sigma", "lod", "loq"), names_prefix = "cal_")
d_stats <- add_quantlimits(d_stats, mult_lowest_calib = 3)

d_stats <- d_stats |>
dplyr::select("feature_id", "fit_model", "weighting", "lowest_cal", "curve_id", r2 = "r.squared", "coef_a", "coef_b", "coef_c", "sigma", "lowest_cal", "lod", "loq") |>
tidyr::pivot_wider(names_from = "curve_id", values_from = c("r2", "coef_a", "coef_b", "coef_c", "sigma", "lowest_cal", "lod", "loq"), names_prefix = "cal_")
data@metrics_calibration <- d_stats

data

}
3 changes: 3 additions & 0 deletions R/classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#' @slot annot_studysamples Annotation of study samples. Required fields:
#' @slot annot_batches Annotation of batches. Required fields:
#' @slot metrics_qc QC information for each measured feature
#' @slot metrics_calibration Calibration metrics calculated from external calibration curves for each measured feature
#' @slot parameters_processing Values of parameters used for the different processing steps
#' @slot status_processing Status within the data processing workflow
#' @slot is_istd_normalized Flag if data has been ISTD normalized
Expand Down Expand Up @@ -52,6 +53,7 @@ setClass("MidarExperiment",
annot_studysamples = "tbl_df",
annot_batches = "tbl_df",
metrics_qc = "tbl_df",
metrics_calibration = "tbl_df",
parameters_processing = "tbl_df",
status_processing = "character",
is_istd_normalized = "logical",
Expand All @@ -78,6 +80,7 @@ setClass("MidarExperiment",
annot_studysamples = tibble::tibble(),
annot_batches = tibble::tibble(),
metrics_qc = tibble::tibble(),
metrics_calibration = tibble::tibble(),
parameters_processing = pkg.env$table_templates$parameters_processing_template,
status_processing = "No Data",
is_isotope_corr = FALSE,
Expand Down
35 changes: 35 additions & 0 deletions R/data-export.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,41 @@ save_report_xlsx <- function(data = NULL, path) {
}


#' @export
get_calibration_report <- function(data, qc_types, with_lod = TRUE, with_loq = TRUE, with_bias = TRUE, with_coefficients = TRUE, with_sigma = TRUE){

d_qc_summary <- data@dataset |>
filter(.data$qc_type %in% qc_types, !.data$is_istd, .data$is_quantifier) |>
select("analysis_id", "qc_type", "sample_id", "feature_id", "feature_conc") |>
left_join(data@annot_qcconcentrations |> select("sample_id", "feature_id", target_concentration = "concentration"), by = c("sample_id", "feature_id")) |>
relocate(.data$target_concentration, .after = .data$feature_id) |>
mutate(
bias_perc = (.data$feature_conc - .data$target_concentration) / .data$target_concentration * 100
) |>
summarise(
n = dplyr::n(),
conc_target = mean(.data$target_concentration, na.rm = FALSE),
conc_mean = mean(.data$feature_conc, na.rm = TRUE),
conc_sd = sd(.data$feature_conc, na.rm = TRUE),
bias_perc_mean = mean(.data$bias_perc, na.rm = TRUE),
bias_perc_sd = sd(.data$bias_perc, na.rm = TRUE),
cv_intra = .data$conc_sd / .data$conc_mean * 100,
.by = c("sample_id", "qc_type", "feature_id")) |>
select("feature_id", "qc_type", "bias_perc_mean", "bias_perc_sd", "cv_intra") |>
pivot_wider(names_from = "qc_type", values_from = c("bias_perc_mean", "bias_perc_sd", "cv_intra"),
names_sort = TRUE, names_glue = "{qc_type}_{.value}")


d_qc_summary <- d_qc_summary |> select(order(colnames(d_qc_summary))) |>
relocate(.data$feature_id, .before = 1) |>
left_join(data@metrics_calibration |> select("feature_id", model = "fit_model", weighting = "weighting", r2 = "r2_cal_1", Cal_A_nM = "lowest_cal_cal_1", LoD_nM = "lod_cal_1", LoQ_nM = "loq_cal_1"), by = c("feature_id"))

d_qc_summary |>
arrange(.data$feature_id)
}




#' Export any parameter to a wide-format table
#'
Expand Down
2 changes: 1 addition & 1 deletion R/midar-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @importFrom glue glue
#' @importFrom ggplot2 ggplot aes Stat unit geom_point geom_line geom_abline margin coord_flip geom_text geom_bar geom_segment scale_y_discrete scale_x_discrete scale_y_log10 xlab ylab vars theme facet_wrap position_jitter scale_color_manual labs scale_fill_manual theme_light geom_vline geom_rect geom_vline aes_string scale_fill_manual scale_shape_manual expand_limits geom_smooth geom_hline scale_y_continuous element_blank element_text theme_bw stat_ellipse element_rect element_line expansion scale_x_continuous geom_boxplot ggtitle position_dodge
#' @importFrom tidyselect vars_select_helpers everything starts_with
#' @importFrom dplyr select filter group_by desc ungroup mutate summarise slice if_else rowwise arrange left_join right_join inner_join full_join anti_join semi_join join_by row_number cur_group_id case_when distinct rename relocate pull across all_of any_of if_any ends_with if_all case_match case_when bind_rows group_split pick
#' @importFrom dplyr n select filter group_by desc ungroup mutate summarise slice if_else rowwise arrange left_join right_join inner_join full_join anti_join semi_join join_by row_number cur_group_id case_when distinct rename relocate pull across all_of any_of if_any ends_with if_all case_match case_when bind_rows group_split pick
#' @importFrom stringr str_remove str_replace str_replace_all str_detect str_trim str_extract str_squish fixed
#' @importFrom tibble column_to_rownames as_tibble tibble
#' @importFrom tidyr unite drop_na pivot_wider nest unnest replace_na
Expand Down
21 changes: 15 additions & 6 deletions R/qc-filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
#' @param with_norm_intensity Include normalized intensity statistics of features. Default is TRUE.
#' @param with_conc Include concentration statistics of features. Default is TRUE.
#' @param with_linearity Include linearity statistics of response curves. This will increase the calculation time. Default is TRUE.
#' @param with_calibration Include external calibration results if available.
#' @return MidarExperiment object
#' @export
qc_calc_metrics <- function(data = NULL, batch_medians, with_norm_intensity = TRUE, with_conc = TRUE, with_linearity = TRUE ) {
qc_calc_metrics <- function(data = NULL, batch_medians, with_norm_intensity = TRUE, with_conc = TRUE, with_linearity = TRUE, with_calibration = TRUE) {
check_data(data)
# TODO: remove later when fixed
if (tolower(data@analysis_type) == "lipidomics") data <- lipidomics_get_lipid_class_names(data)
Expand Down Expand Up @@ -150,11 +151,19 @@ qc_calc_metrics <- function(data = NULL, batch_medians, with_norm_intensity = TR
left_join(d_stats_var_final, by = "feature_id") |>
relocate("feature_id", "feature_class", "in_data", "valid_feature", "is_quantifier", "precursor_mz", "product_mz", "collision_energy")

if (with_linearity & "RQC" %in% data@dataset$qc_type) {
d_rqc_stats <- get_response_curve_stats(data, with_staturation_stats = FALSE, limit_to_rqc = TRUE)
data@metrics_qc <- data@metrics_qc |>
dplyr::left_join(d_rqc_stats, by = "feature_id")
}
if (with_calibration & nrow(data@metrics_calibration) > 0) {
data@metrics_qc <- data@metrics_qc |>
dplyr::left_join(data@metrics_calibration, by = "feature_id")
}

if (with_linearity & "RQC" %in% data@dataset$qc_type) {
d_rqc_stats <- get_response_curve_stats(data, with_staturation_stats = FALSE, limit_to_rqc = TRUE)
data@metrics_qc <- data@metrics_qc |>
dplyr::left_join(d_rqc_stats, by = "feature_id")
}




data
}
Expand Down
Binary file modified data/dataset_plasma_lipidomics.rda
Binary file not shown.
2 changes: 2 additions & 0 deletions man/MidarExperiment-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 4009f74

Please sign in to comment.