Skip to content

Commit

Permalink
add tests and fixes part 1
Browse files Browse the repository at this point in the history
  • Loading branch information
burlab committed Jan 14, 2025
1 parent 59082b1 commit 0661814
Show file tree
Hide file tree
Showing 64 changed files with 15,583 additions and 251,303 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ Collate:
'helper.R'
'lipidomics.R'
'metadata-access.R'
'metadata-read.R'
'metadata-import.R'
'midar-package.R'
'plots-calibcurves.R'
'plots-eda.R'
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,18 @@ export(data_load_example)
export(exclude_analyses)
export(exclude_features)
export(filter_features_qc)
export(get_analyis_end)
export(get_analyis_start)
export(get_analysis_breaks)
export(get_analysis_count)
export(get_analysis_duration)
export(get_analyticaldata)
export(get_batch_boundaries)
export(get_calibration_results)
export(get_feature_count)
export(get_featurelist)
export(get_response_curve_stats)
export(get_runtime_median)
export(import_data_csv)
export(import_data_masshunter)
export(import_data_mrmkit)
Expand Down
20 changes: 10 additions & 10 deletions R/calc-calibrations.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @param fit_method A character string indicating the default regression fit method to use for the calibration curve. Must be one of `"linear"` or `"quadratic"`. This method will be used if no specific fit method is defined for a feature in the metadata.
#' @param fit_weighting A character string indicating the default weighting method for the regression points in the calibration curve. Must be one of `"none"`, `"1/x"`, or `"1/x^2"`. If no specific weighting method is defined for a feature in the metadata, this method will be used.
#' @param error_failed_calibration If `TRUE`, an error will be raised if the calibration curve fitting failed for any feature. If `FALSE`, failed calibration curve fitting will be ignored, and resulting feature concentration will be `NA`.
#' @param error_missing_annotation Raise error if any of the following information is missing: calibration curve data, ISTD mix volume and sample amounts for any feature.
#' @param fail_missing_annotation Raise error if any of the following information is missing: calibration curve data, ISTD mix volume and sample amounts for any feature.
#' If `FALSE`, missing annotations will be ignored, and resulting feature concentration will be `NA`
#' @return A modified `MidarExperiment` object with updated concentration values.
#'
Expand All @@ -26,7 +26,7 @@ quantify_by_calibration <- function(data = NULL,
fit_method = c("linear", "quadratic"),
fit_weighting = c("none", "1/x", "1/x^2"),
error_failed_calibration = TRUE,
error_missing_annotation = TRUE
fail_missing_annotation = TRUE
) {

check_data(data)
Expand All @@ -39,23 +39,23 @@ quantify_by_calibration <- function(data = NULL,
overwrite_metadata = overwrite_metadata,
fit_method = fit_method,
fit_weighting = fit_weighting,
error_missing_annotation = error_missing_annotation)
fail_missing_annotation = fail_missing_annotation)
d_calib <- data@metrics_calibration

features_no_calib <- setdiff(d_calib$feature_id, get_featurelist(data, isistd = FALSE, isquantifier = TRUE))
features_no_calib <- setdiff(d_calib$feature_id, get_featurelist(data, is_istd = FALSE, is_quantifier = TRUE))
# Check if calibration curve data is missing for any feature
if (length(features_no_calib) > 0){
if (!error_missing_annotation) {
if (!fail_missing_annotation) {
cli::cli_alert_warning(cli::col_yellow("Calibration curve results for {length(feartures_no_calib)} features missing. Calculated concentrations of affected features will be `NA`."))
} else {
cli::cli_abort(cli::col_red("Calibration curve results for {length(feartures_no_calib)} features missing. Please update metadata or set `error_missing_annotation = FALSE`."))
cli::cli_abort(cli::col_red("Calibration curve results for {length(feartures_no_calib)} features missing. Please update metadata or set `fail_missing_annotation = FALSE`."))
}
}

features_failed_calib <- sum(d_calib$reg_failed)
# Check if calibration curve data is missing for any feature
if (features_failed_calib > 0){
if(!error_missing_annotation) {
if(!fail_missing_annotation) {
cli::cli_alert_warning(cli::col_yellow("Calibration curve fitting failed for {length(features_failed_calib)} features. Calculated concentrations of affected features will be `NA`."))
} else {
cli::cli_abort(cli::col_red("Calibration curve fitting failed for {length(features_failed_calib)} features. Please inspect calibration curve details, e.g. by plotting using `plot_calibrationcurves()`."))
Expand Down Expand Up @@ -119,7 +119,7 @@ quantify_by_calibration <- function(data = NULL,
#' @param overwrite_metadata A logical value (`TRUE` or `FALSE`). If `TRUE`, the function will ignore any fit method and weighting settings defined in the metadata and use the provided `fit_method` and `fit_weighting` values for all analytes.
#' @param fit_method A character string specifying the default regression fit method to use for the calibration curve. Must be one of `"linear"` or `"quadratic"`. This method will be applied if no specific fit method is defined for a feature in the metadata.
#' @param fit_weighting A character string specifying the default weighting method for the regression points in the calibration curve. Must be one of `"none"`, `"1/x"`, or `"1/x^2"`. This method will be applied if no specific weighting method is defined for a feature in the metadata.
#' @param error_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: calibration curve data, ISTD mix volume, and sample amounts for any feature.
#' @param fail_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: calibration curve data, ISTD mix volume, and sample amounts for any feature.
#' @param include_fit_object If `TRUE`, the function will return the full regression fit objects for each feature in the `metrics_calibration` table.
#'
#' @return A modified `MidarExperiment` object with an updated `metrics_calibration` table containing the calibration curve results, including concentrations, LoD, and LoQ values for each feature.
Expand All @@ -128,12 +128,12 @@ quantify_by_calibration <- function(data = NULL,
#'
#' @export

#TODO: implement error handling error_missing_annotation
#TODO: implement error handling fail_missing_annotation
calc_calibration_results <- function(data = NULL,
overwrite_metadata = FALSE,
fit_method = c("linear", "quadratic"),
fit_weighting = c("none", "1/x", "1/x^2"),
error_missing_annotation = TRUE,
fail_missing_annotation = TRUE,
include_fit_object = FALSE
) {

Expand Down
53 changes: 29 additions & 24 deletions R/calc-istd-normalization.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@
#'

#' @param data A `MidarExperiment` object.
#' @param error_missing_annotation If `TRUE`, the function
#' @param fail_missing_annotation If `TRUE`, the function
#' will raise an error when an ISTD is not defined for one or more features (excluding the ISTDs themselves).
#' If `FALSE`, features with missing ISTD annotations will have NA values in the normalized intensities.
#'
#' @return A `MidarExperiment` object with normalized feature intensities
#'
#' @export

normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
normalize_by_istd <- function(data = NULL, fail_missing_annotation = TRUE) {

check_data(data)

Expand All @@ -32,16 +32,15 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {

# Check if all ISTDs are defined as distinct feature in the feature metadata
d_annot <- data@annot_features |> select("feature_id", "istd_feature_id")
all_istds <- unique(d_annot$istd_feature_id)

all_istds <- unique(na.omit(d_annot$istd_feature_id))

if(!all(is.na(all_istds))){
istd_not_defined <- setdiff(all_istds, d_annot$feature_id)
if (length(istd_not_defined) > 0) {
cli::cli_abort(cli::col_red("All ISTDs must be defined as feature in the feature metadata, {nrow(istd_not_defined)} ISTD(s) were not. Please check metadata."))
cli::cli_abort(cli::col_red("{length(istd_not_defined)} ISTD(s) were not defined as individual feature(s). Please check feature metadata."))
}
} else {
cli::cli_abort(cli::col_red("No ISTDs defined in metadata. Please add feature metadata with ISTDs defined."))
cli::cli_abort(cli::col_red("No ISTDs defined in feature metadata. Please define ISTDs for each feature in feature metadata."))
}

# check if ISTDs are defined for all features (except ISTDs that are not defined for themselves)
Expand All @@ -50,10 +49,10 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
dplyr::semi_join(data@dataset, by = c("feature_id"))

if (nrow(features_no_istd) > 0) {
if(!error_missing_annotation)
cli::cli_alert_warning(cli::col_yellow("No ISTD was defined for {nrow(features_no_istd)} features, normalized intensities will be `NA` for these. "))
if(!fail_missing_annotation)
cli::cli_alert_warning(cli::col_yellow("For {nrow(features_no_istd)} feature(s) no ISTD was defined, normalized intensities will be `NA` for these features. "))
else
cli::cli_abort(cli::col_red("No ISTD was defined for {nrow(features_no_istd)} features. Please ammend feature metadata or set `error_missing_annotation = FALSE`."))
cli::cli_abort(cli::col_red("For {nrow(features_no_istd)} feature(s) no ISTD was defined. Please ammend feature metadata or set `fail_missing_annotation = FALSE`."))
}

# Add ISTD intensities to temporary dataset
Expand All @@ -63,15 +62,21 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
# Normalize intensities
d_temp <- d_temp |>
dplyr::group_by(.data$istd_feature_id, .data$analysis_id) |>
dplyr::mutate(feature_norm_intensity = .data$feature_intensity / .data$feature_intensity[.data$is_istd]) |>
dplyr::mutate(
feature_norm_intensity = ifelse(
!is.na(.data$istd_feature_id),
.data$feature_intensity / .data$feature_intensity[.data$is_istd],
NA_real_
)
) |>
dplyr::ungroup()

# Add normalized intensities to dataset table
data@dataset <- data@dataset |>
dplyr::inner_join(d_temp |> dplyr::select("analysis_id", "feature_id", "feature_norm_intensity"), by = c("analysis_id", "feature_id"))

# Print summary
n_features <- length(unique(d_temp$feature_id))
n_features <- length(unique(d_temp$feature_id)) - nrow(features_no_istd)

istds <- data@annot_features |> filter(.data$is_istd) |> pull(.data$feature_id)
n_used_istds <- length(intersect(istds, data@annot_features |> filter(!.data$is_istd) |> pull(.data$quant_istd_feature_id) |> unique()))
Expand All @@ -82,8 +87,8 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {

# Update status
data@status_processing <- "ISTD-normalized ata"
data <- change_is_normalized(data, TRUE)
data <- change_is_quantitated(data, FALSE)
data <- update_after_normalization(data, TRUE)
data <- update_after_quantitation(data, FALSE)
data@is_filtered <- FALSE
data@metrics_qc <- data@metrics_qc[FALSE,]
data
Expand All @@ -101,7 +106,7 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
#' named `feature_conc`.
#'
#' @param data A `MidarExperiment` object
#' @param error_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: ISTD concentration, ISTD mix volume, and sample amounts for any feature.
#' @param fail_missing_annotation If `TRUE`, an error will be raised if any of the following information is missing: ISTD concentration, ISTD mix volume, and sample amounts for any feature.
#' If `FALSE`, missing annotations will be ignored, and resulting feature concentration will be `NA`
#'
#' @return A `MidarExperiment` object with the calculated analyte concentrations added to the
Expand All @@ -111,7 +116,7 @@ normalize_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
#'
#' @export

quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {
quantify_by_istd <- function(data = NULL, fail_missing_annotation = TRUE) {

check_data(data)

Expand All @@ -121,14 +126,14 @@ quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {

# Check if sample and ISTD amounts are defined for all analyses
samples_no_amounts <- data@annot_analyses |>
filter(.data$valid_analysis, is.na(.data$sample_amount), is.na(.data$istd_volume)) |>
filter(.data$valid_analysis, is.na(.data$sample_amount) | is.na(.data$istd_volume)) |>
dplyr::semi_join(data@dataset, by = c("analysis_id"))

if (nrow(samples_no_amounts) > 0) {
if(!error_missing_annotation)
cli::cli_alert_warning(cli::col_yellow("Sample and/or ISTD solution amount(s) for {length(samples_no_amounts)} analyses missing, concentrations of all features for these analyses will be `NA`"))
if(!fail_missing_annotation)
cli::cli_alert_warning(cli::col_yellow("Sample and/or ISTD solution amount(s) for {nrow(samples_no_amounts)} analyses missing, concentrations of all features for these analyses will be `NA`"))
else
cli::cli_abort(cli::col_red("Sample and/or ISTD amount(s) for {nrow(samples_no_amounts)} analyses missing. Please ammend analysis metadata or set `error_missing_annotation = FALSE`."))
cli::cli_abort(cli::col_red("Sample and/or ISTD amount(s) for {nrow(samples_no_amounts)} analyses missing. Please ammend analysis metadata or set `fail_missing_annotation = FALSE`."))
}

# Add ISTD concentrations and sample amouts to temporary dataset
Expand All @@ -143,10 +148,10 @@ quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {

# Check if ISTD concentrations in spiked-in mix are defined for all ISTDs
if (length(istd_no_conc) > 0){
if(!error_missing_annotation) {
if(!fail_missing_annotation) {
cli::cli_alert_warning(cli::col_yellow("Spiked-in concentrations of {length(istd_no_conc)} ISTD(s) missing, calculated concentrations of affected features will be `NA`."))
} else {
cli::cli_abort(cli::col_red("Concentrations of {length(istd_no_conc)} ISTD(s) missing. Please ammend ISTD metadata or set `error_missing_annotation = FALSE`."))
cli::cli_abort(cli::col_red("Concentrations of {length(istd_no_conc)} ISTD(s) missing. Please ammend ISTD metadata or set `fail_missing_annotation = FALSE`."))
}
}

Expand All @@ -171,13 +176,13 @@ quantify_by_istd <- function(data = NULL, error_missing_annotation = TRUE) {

conc_unit <- get_conc_unit(data@annot_analyses$sample_amount_unit)

cli_alert_success(cli::col_green("{n_features_with_conc} feature concentrations calculated based on {n_istd} ISTDs and sample amounts of {get_analysis_count(data)} analyses."))
cli_alert_success(cli::col_green("{n_features_with_conc} feature concentrations calculated based on {n_istd} ISTDs and sample amounts of {get_analysis_count(data) - nrow(samples_no_amounts)} analyses."))
cli::cli_alert_info("Concentrations are given in {conc_unit}.")

data@status_processing <- "ISTD-quantitated data"

data <- change_is_normalized(data, TRUE)
data <- change_is_quantitated(data, TRUE)
data <- update_after_normalization(data, TRUE)
data <- update_after_quantitation(data, TRUE)
data@is_filtered <- FALSE
data@metrics_qc <- data@metrics_qc[FALSE,]

Expand Down
8 changes: 4 additions & 4 deletions R/correct-drift-batch.R
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ corr_drift_fun <- function(data = NULL,
last = " and ", width = 160)


nfeat <- get_feature_count(data, isistd = FALSE)
nfeat <- get_feature_count(data, is_istd = FALSE)

if(conditional_correction & within_batch)
count_feature_text <- glue::glue("of at least one batch for {features_corrected}
Expand Down Expand Up @@ -454,9 +454,9 @@ corr_drift_fun <- function(data = NULL,

# Invalidate downstream processed data
if(variable == "feature_intensity"){
data <- change_is_normalized(data, FALSE)
data <- update_after_normalization(data, FALSE)
} else if(variable == "feature_norm_intensity") {
data <- change_is_quantitated(data, FALSE)
data <- update_after_quantitation(data, FALSE)
}

data@status_processing <- "Drift-corrected concentrations"
Expand Down Expand Up @@ -746,7 +746,7 @@ correct_batch_centering <- function(data = NULL,
cv_diff_text = format(round(.data$cv_diff_median, 1), nsmall = 1)
)

nfeat <- get_feature_count(data, isistd = FALSE)
nfeat <- get_feature_count(data, is_istd = FALSE)

# Print summary
if (data@var_drift_corrected[[variable]]) {
Expand Down
2 changes: 1 addition & 1 deletion R/correct-isotope.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ correct_interferences <- function(data = NULL, variable = "feature_intensity") {

data@is_isotope_corr <- TRUE
data@status_processing <- "Isotope-corrected raw data"
data <- change_is_normalized(data, FALSE)
data <- update_after_normalization(data, FALSE)
data@var_drift_corrected <- c(feature_intensity = FALSE, feature_norm_intensity = FALSE, feature_conc = FALSE)
data@var_drift_corrected <- c(feature_intensity = FALSE, feature_norm_intensity = FALSE, feature_conc = FALSE)
data@is_filtered <- FALSE
Expand Down
31 changes: 11 additions & 20 deletions R/data-import.R
Original file line number Diff line number Diff line change
Expand Up @@ -151,30 +151,31 @@ import_data_main <- function(data = NULL, path, import_function, file_ext, silen

# VERIFY DATA, i.e. analysis_ids, feature_ids, and values are replicated ===
## which can be result of multiple imports of the same/overlapping data or due to parsing error

if (nrow(d_raw) > nrow(d_raw |> distinct(.data$analysis_id, .data$feature_id, .keep_all = FALSE))) {
n_idpairs_distinct <- d_raw |> select("analysis_id", "feature_id") |> distinct(.keep_all = FALSE) |> nrow()
if (nrow(d_raw) > n_idpairs_distinct) {
has_duplicated_id <- TRUE
if (nrow(d_raw) > nrow(d_raw |> distinct(.data$analysis_id, .data$feature_id, .keep_all = TRUE))) {
has_duplicated_id_values <- TRUE

n_idvalpairs_distinct <- d_raw |> select("analysis_id", "feature_id", any_of(c("feature_area", "feature_rt", "feature_intensity", "feature_height", "feature_conc", "feauture_norm_intensity"))) |> distinct(.keep_all = FALSE) |> nrow()

if (n_idvalpairs_distinct == n_idpairs_distinct ) {
has_duplicated_values <- TRUE
} else {
has_duplicated_id_values <- FALSE
has_duplicated_values <- FALSE
}
} else {
has_duplicated_id <- FALSE
}

if (has_duplicated_id) {
if (has_duplicated_id_values) {
cli::cli_abort(glue::glue("Imported data contains replicated reportings (analysis and feature pairs) with **identical** intensity values. Please check imported dataset(s)."))
if (has_duplicated_values) {
cli::cli_abort(glue::glue("Imported data contains duplicated reportings (analysis and feature pairs) with {cli::style_italic('identical')} feature variable values. Please check imported dataset(s)."))
} else {
cli::cli_abort(glue::glue("Imported data contains replicated reportings (analysis and feature pairs) with **different intensity values. Please check imported dataset(s)."))
cli::cli_abort(glue::glue("Imported data contains duplicated reportings (analysis and feature pairs) with {cli::style_italic('different')} feature variable values. Please check imported dataset(s)."))
}
}

data@dataset_orig <- dplyr::bind_rows(pkg.env$table_templates$dataset_orig_template, d_raw)



# TODO: excl_unmatched_analyses below

check_integrity(data, excl_unmatched_analyses = FALSE)
Expand Down Expand Up @@ -752,16 +753,6 @@ read_data_table <- function(path, value_type = c("area", "height", "intensity",
}


#' @title internal method to read csv files
#' @param path csv file name
#' @return tibble table
#' @noRd
# TODO remove this test function
.test_mult <- function(a, b) {
nms <- c(1, 2, 3)
n_col <- length(nms)
rep("numeric", n_col)
}



Expand Down
Loading

0 comments on commit 0661814

Please sign in to comment.