Skip to content

Commit

Permalink
Merge pull request #277 from workflow4metabolomics/CAMERA_SUITE
Browse files Browse the repository at this point in the history
Add camera new tools
  • Loading branch information
yguitton authored Nov 24, 2024
2 parents f858399 + 3924d96 commit 24d44ee
Show file tree
Hide file tree
Showing 23 changed files with 3,598 additions and 20 deletions.
49 changes: 45 additions & 4 deletions tools/camera/.shed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ definitions:

name: camera
owner: workflow4metabolomics
remote_repository_url:
homepage_url:
remote_repository_url:
homepage_url:
categories:
- Metabolomics

Expand Down Expand Up @@ -41,15 +41,56 @@ repositories:
owner: workflow4metabolomics
include:
[*default-includes, abims_CAMERA_combinexsAnnos.xml, CAMERA_combinexsAnnos.r]
camera_groupfwhm:
description: '[Metabolomics][W4M][LC-MS] CAMERA R Package - Peak Grouping by Full Width at Half Maximum (FWHM)'
long_description: |
Group peaks within a defined retention time window using Full Width at Half Maximum (FWHM).
Facilitates the detection of chromatographic features across different samples for LC-MS data.
Part of the W4M project: http://workflow4metabolomics.org
CAMERA: http://bioconductor.org/packages/release/bioc/html/CAMERA.html
owner: workflow4metabolomics
include:
[*default-includes, groupFWHM.xml, CAMERA_groupFWHM.R]
camera_groupcorr:
description: '[Metabolomics][W4M][LC-MS] CAMERA R Package - Peak Grouping by Correlation'
long_description: |
Group peaks based on their retention time and intensity correlations across samples,
identifying peaks likely to originate from the same compound. This helps enhance feature annotation
and identification in LC-MS data.
Part of the W4M project: http://workflow4metabolomics.org
CAMERA: http://bioconductor.org/packages/release/bioc/html/CAMERA.html
owner: workflow4metabolomics
include:
[*default-includes, groupCorr.xml, CAMERA_groupCorr.R]
camera_findisotopes:
description: '[Metabolomics][W4M][LC-MS] CAMERA R Package - Isotope Detection'
long_description: |
Detect isotope patterns in LC-MS peak lists based on mass differences and expected isotope ratios,
annotating isotopes for use in downstream metabolomic analysis.
Part of the W4M project: http://workflow4metabolomics.org
CAMERA: http://bioconductor.org/packages/release/bioc/html/CAMERA.html
owner: workflow4metabolomics
include:
[*default-includes, findIsotopes.xml, CAMERA_findIsotopes.R]
camera_findadducts:
description: '[Metabolomics][W4M][LC-MS] CAMERA R Package - Adduct Detection'
long_description: |
Identify possible adducts in mass spectrometry data by detecting characteristic mass shifts between peaks,
facilitating the determination of molecular ions and their adducts for more accurate metabolite identification.
Part of the W4M project: http://workflow4metabolomics.org
CAMERA: http://bioconductor.org/packages/release/bioc/html/CAMERA.html
owner: workflow4metabolomics
include:
[*default-includes, findAdducts.xml, CAMERA_findAdducts.R]

suite:
name: suite_camera
owner: lecorguille
owner: workflow4metabolomics
homepage_url: http://workflow4metabolomics.org
description: '[Metabolomics][W4M][LC-MS] CAMERA R Package - Annotation - Collection of annotation related methods for mass spectrometry data'
long_description: |
CAMERA: Annotation of peaklists generated by xcms, rule based annotation of
isotopes and adducts, EIC correlation based tagging of unknown adducts and
fragments
Part of the W4M project: http://workflow4metabolomics.org
CAMERA: http://bioconductor.org/packages/release/bioc/html/CAMERA.html
CAMERA: http://bioconductor.org/packages/release/bioc/html/CAMERA.html
173 changes: 173 additions & 0 deletions tools/camera/CAMERA_findAdducts.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#!/usr/bin/env Rscript

# ----- PACKAGE -----
cat("\tSESSION INFO\n")

# Import the different functions
source_local <- function(fname) {
argv <- commandArgs(trailingOnly = FALSE)
base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
source(paste(base_dir, fname, sep = "/"))
}
source_local("lib.r")

pkgs <- c("CAMERA", "xcms", "multtest", "batch")
loadAndDisplayPackages(pkgs)
cat("\n\n")
# ----- ARGUMENTS -----
cat("\tARGUMENTS INFO\n")

args <- parseCommandArgs(evaluate = FALSE) # interpretation of arguments given in command line as an R list of objects
write.table(as.matrix(args), col.names = FALSE, quote = FALSE, sep = "\t")

cat("\n\n")

print("Arguments retrieved from the command line:")
print(args)

# Function to convert "NULL" strings to actual NULL values
convertNullString <- function(x) {
if (x == "NULL") {
return(NULL)
}
return(x)
}

# Function to convert string to numeric lists
convert_psg_list <- function(x) {
# Check if x is NULL or has zero length before further processing
if (is.null(x) || length(x) == 0) {
return(NULL)
}

# Force conversion to character
x <- as.character(x)

if (grepl("^[0-9]+$", x)) {
# If the string represents a single numeric value
return(as.numeric(x))
} else {
# Convert string representation of a list to a numeric vector
# Use a regular expression to split by common separators
return(as.numeric(unlist(strsplit(x, "[,;\\s]+"))))
}
}

for (arg in names(args)) {
args[[arg]] <- convertNullString(args[[arg]])
}

# Convert only the 'psg_list' element in args
args$psg_list <- convert_psg_list(args$psg_list)

print("Argument types:")
print(sapply(args, class))

# Check if the image file exists
if (!file.exists(args$image)) {
stop("The RData file does not exist: ", args$image)
}

# ----- PROCESSING INFILE -----

# Load the RData file
load(args$image)
args$image <- NULL

# Save arguments for report generation
if (!exists("listOFlistArguments")) listOFlistArguments <- list()
listOFlistArguments[[format(Sys.time(), "%y%m%d-%H:%M:%S_findAdducts")]] <- args

# We unzip automatically the chromatograms from the zip files.
if (!exists("zipfile")) zipfile <- NULL
if (!exists("singlefile")) singlefile <- NULL
rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
zipfile <- rawFilePath$zipfile
singlefile <- rawFilePath$singlefile
args <- rawFilePath$args

print(paste("singlefile :", singlefile))
if (!is.null(singlefile)) {
directory <- retrieveRawfileInTheWorkingDir(singlefile, zipfile)
}

# Check if the 'rules' argument in 'args' is NULL
if (is.null(args$rules)) {
# If 'args$rules' is NULL, set 'rulset' to NULL
args$rulset <- NULL
} else {
# Try to read the rules file with different delimiters
delimiters <- c(";", "\t", ",") # List of possible delimiters
success <- FALSE # Flag to check if reading was successful

for (sep in delimiters) {
# Attempt to read the rules file with the current separator
args$rulset <- read.table(args$rules, header = TRUE, sep = sep)

# Check if the number of columns is at least 4
if (ncol(args$rulset) >= 4) {
success <- TRUE # Mark success if the format is correct
break # Exit the loop if the file was read successfully
}
}

# If reading the rules file failed for all delimiters
if (!success) {
# Display an error message if the file is not well formatted
error_message <- "The rules file appears to be improperly formatted. Accepted column separators are ;, tab, and ,."
print(error_message)
stop(error_message) # Stop execution with an error
}
}

# Verify that the object xa is loaded
if (!exists("xa")) {
stop("The object xa was not found in the RData file.")
}

print("Loaded xa object:")
print(xa)

# Apply the findAdducts function on the xsAnnotate object
print("Calling findAdducts function:")
xa <- findAdducts(xa, ppm = args$ppm, mzabs = args$mzabs, multiplier = args$multiplier, polarity = args$polarity, rules = args$rulset, max_peaks = args$max_peaks, psg_list = args$psg_list, intval = args$intval)

print("Result of findAdducts function:")
print(xa)

# Extract the list of annotated peaks
peakList <- getPeaklist(xa, intval = args$intval)

if (length(phenoData@data$sample_name) == 1) {
peakList$name <- make.unique(paste0("M", round(peakList[, "mz"], 0), "T", round(peakList[, "rt"], 0)), "_")
variableMetadata <- peakList[, c("name", setdiff(names(peakList), "name"))]
variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ)
} else {
names_default <- groupnames(xa@xcmsSet, mzdec = 0, rtdec = 0) # Names without decimals
names_custom <- groupnames(xa@xcmsSet, mzdec = args$numDigitsMZ, rtdec = args$numDigitsRT) # Names with "x" decimals

variableMetadata <- data.frame(
name = names_default,
name_custom = names_custom,
stringsAsFactors = FALSE
)
variableMetadata <- cbind(variableMetadata, peakList[, !(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))])
}

if (!exists("RTinMinute")) RTinMinute <- FALSE

if (args$convertRTMinute && RTinMinute == FALSE) {
RTinMinute <- TRUE
variableMetadata <- RTSecondToMinute(variableMetadata = variableMetadata, convertRTMinute = args$convertRTMinute)
}

# Save the extracted peak list as a TSV file named 'variableMetadata.tsv'
output_file_tsv <- "variableMetadata.tsv"
write.table(variableMetadata, file = output_file_tsv, sep = "\t", row.names = FALSE, quote = FALSE)

# Save the updated xsAnnotate object
output_file_RData <- "camera_findAdducts.RData"
objects2save <- c("xa", "variableMetadata", "listOFlistArguments", "zipfile", "singlefile", "RTinMinute", "phenoData")
save(list = objects2save[objects2save %in% ls()], file = output_file_RData)

cat("Output files generated:", output_file_tsv, "and", output_file_RData, "\n")
138 changes: 138 additions & 0 deletions tools/camera/CAMERA_findIsotopes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/usr/bin/env Rscript

# ----- PACKAGE -----
cat("\tSESSION INFO\n")

# Import the different functions
source_local <- function(fname) {
argv <- commandArgs(trailingOnly = FALSE)
base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
source(paste(base_dir, fname, sep = "/"))
}
source_local("lib.r")

pkgs <- c("CAMERA", "xcms", "multtest", "batch")
loadAndDisplayPackages(pkgs)
cat("\n\n")
# ----- ARGUMENTS -----
cat("\tARGUMENTS INFO\n")

args <- parseCommandArgs(evaluate = FALSE) # interpretation of arguments given in command line as an R list of objects
write.table(as.matrix(args), col.names = FALSE, quote = FALSE, sep = "\t")

cat("\n\n")

print("Arguments retrieved from the command line:")
print(args)

print("Argument types:")
print(sapply(args, class))

# Check if the image file exists
if (!file.exists(args$image)) {
stop("The RData file does not exist: ", args$image)
}

# ----- PROCESSING INFILE -----

# Load the RData file
load(args$image)
args$image <- NULL

# Save arguments to generate a report
if (!exists("listOFlistArguments")) listOFlistArguments <- list()
listOFlistArguments[[format(Sys.time(), "%y%m%d-%H:%M:%S_findIsotopes")]] <- args

# We unzip automatically the chromatograms from the zip files.
if (!exists("zipfile")) zipfile <- NULL
if (!exists("singlefile")) singlefile <- NULL
rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
zipfile <- rawFilePath$zipfile
singlefile <- rawFilePath$singlefile
args <- rawFilePath$args

print(paste("singlefile :", singlefile))
if (!is.null(singlefile)) {
directory <- retrieveRawfileInTheWorkingDir(singlefile, zipfile)
}

# Verify if the xa object is loaded
if (!exists("xa")) {
stop("The xa object was not found in the RData file.")
}

print("xa object loaded:")
print(xa)

print("calcIsotopeMatrix")
calcIsotopeMatrix <- function(maxiso = 4) {
if (!is.numeric(maxiso)) {
stop("Parameter maxiso is not numeric!\n")
} else if (maxiso < 1 || maxiso > 8) {
stop(paste(
"Parameter maxiso must be between 1 and 8. ",
"Otherwise, use your own IsotopeMatrix.\n"
), sep = "")
}

isotopeMatrix <- matrix(NA, 8, 4)
colnames(isotopeMatrix) <- c("mzmin", "mzmax", "intmin", "intmax")

isotopeMatrix[1, ] <- c(1.000, 1.0040, 1.0, 150)
isotopeMatrix[2, ] <- c(0.997, 1.0040, 0.01, 200)
isotopeMatrix[3, ] <- c(1.000, 1.0040, 0.001, 200)
isotopeMatrix[4, ] <- c(1.000, 1.0040, 0.0001, 200)
isotopeMatrix[5, ] <- c(1.000, 1.0040, 0.00001, 200)
isotopeMatrix[6, ] <- c(1.000, 1.0040, 0.000001, 200)
isotopeMatrix[7, ] <- c(1.000, 1.0040, 0.0000001, 200)
isotopeMatrix[8, ] <- c(1.000, 1.0040, 0.00000001, 200)

return(isotopeMatrix[1:maxiso, , drop = FALSE])
}

isotopeMatrix <- calcIsotopeMatrix(args$maxiso)

# Apply the findIsotopes function on the xsAnnotate object
print("Calling findIsotopes function:")
xa <- findIsotopes(xa, maxcharge = args$maxcharge, maxiso = args$maxiso, ppm = args$ppm, mzabs = args$mzabs, intval = args$intval, minfrac = args$minfrac, isotopeMatrix = isotopeMatrix, filter = args$filter)

print("Result of the findIsotopes function:")
print(xa)

# Extract the list of annotated peaks
peakList <- getPeaklist(xa, intval = args$intval)

if (length(phenoData@data$sample_name) == 1) {
peakList$name <- make.unique(paste0("M", round(peakList[, "mz"], 0), "T", round(peakList[, "rt"], 0)), "_")
variableMetadata <- peakList[, c("name", setdiff(names(peakList), "name"))]
variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT = args$numDigitsRT, numDigitsMZ = args$numDigitsMZ)
} else {
names_default <- groupnames(xa@xcmsSet, mzdec = 0, rtdec = 0) # Names without decimals
names_custom <- groupnames(xa@xcmsSet, mzdec = args$numDigitsMZ, rtdec = args$numDigitsRT) # Names with "x" decimals

variableMetadata <- data.frame(
name = names_default,
name_custom = names_custom,
stringsAsFactors = FALSE
)
variableMetadata <- cbind(variableMetadata, peakList[, !(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))])
}

if (!exists("RTinMinute")) RTinMinute <- FALSE

if (args$convertRTMinute && RTinMinute == FALSE) {
RTinMinute <- TRUE
variableMetadata <- RTSecondToMinute(variableMetadata = variableMetadata, convertRTMinute = args$convertRTMinute)
}

# Saves the extracted peak list as a TSV file named 'variableMetadata.tsv'
output_file_tsv <- "variableMetadata.tsv"
write.table(variableMetadata, file = output_file_tsv, sep = "\t", row.names = FALSE, quote = FALSE)

# Save the updated xsAnnotate object
output_file_RData <- "camera_findIsotopes.RData"

objects2save <- c("xa", "variableMetadata", "listOFlistArguments", "zipfile", "singlefile", "RTinMinute", "phenoData")
save(list = objects2save[objects2save %in% ls()], file = output_file_RData)

cat("Output files generated:", output_file_tsv, "and", output_file_RData, "\n")
Loading

0 comments on commit 24d44ee

Please sign in to comment.