-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Imported QFeatures Reading from Norman
- Loading branch information
Showing
9 changed files
with
449 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
Package: MetFamily | ||
Type: Package | ||
Title: MetFamily: Discovering Regulated Metabolite Families in Untargeted Metabolomics Studies | ||
Version: 0.99.3 | ||
Date: 2024-03-02 | ||
Version: 0.99.4 | ||
Date: 2024-08-22 | ||
Author: c( person("Hendrik", "Treutler, role = c("aut"), email = "[email protected]"), | ||
person("Khabat", "Vahabi, role = c("aut"), email = "[email protected]"), | ||
person("Norman", "Storz", role = c("aut"), email = "[email protected]"), | ||
person(given = "Steffen", family = "Neumann", email = "[email protected]", | ||
role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7899-7192")) ) | ||
Depends: | ||
|
@@ -37,6 +38,7 @@ Imports: | |
graphics, | ||
grDevices, | ||
methods, | ||
QFeatures, | ||
stats, | ||
utils | ||
Remotes: decisionpatterns/searchable | ||
|
@@ -63,5 +65,8 @@ Collate: | |
'TreeAlgorithms.R' | ||
'Plots.R' | ||
'R_packages.R' | ||
parsePeakAbundanceMatrixQF.R | ||
readMSDial.R | ||
readMetaboScape.R | ||
RoxygenNote: 7.2.3 | ||
Encoding: UTF-8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
#' Title | ||
#' | ||
#' @param qfeatures Object of type QFeatures | ||
#' @param doPrecursorDeisotoping | ||
#' @param mzDeviationInPPM_precursorDeisotoping | ||
#' @param mzDeviationAbsolute_precursorDeisotoping | ||
#' @param maximumRtDifference | ||
#' @param progress boolean whether or not to show a progress bar | ||
#' | ||
#' @return | ||
#' @export | ||
#' | ||
#' @examples | ||
parsePeakAbundanceMatrixQF <- function(qfeatures, | ||
doPrecursorDeisotoping, | ||
mzDeviationInPPM_precursorDeisotoping, | ||
mzDeviationAbsolute_precursorDeisotoping, | ||
maximumRtDifference, | ||
progress=FALSE) | ||
{ | ||
## read file | ||
|
||
if(!is.na(progress)) { | ||
if(progress) { | ||
incProgress(amount = 0.1, detail = paste("Parsing MS1 file content...", sep = "")) | ||
} else { | ||
print(paste("Parsing MS1 file content...", sep = "")) | ||
} | ||
} | ||
|
||
cols_to_exclude <- c("Reference RT","Reference m/z","Comment", | ||
"Manually modified for quantification", | ||
"Total score","RT similarity","Average","Stdev") | ||
|
||
cols_to_keep <- which(!colnames(rowData(qfeatures))[[1]] %in% cols_to_exclude) | ||
|
||
dataFrame <- cbind(rowData(qfeatures)[[1]][,cols_to_keep] ,assay(qfeatures)) | ||
|
||
ncol(rowData(qfeatures)[[1]]) | ||
numberOfPrecursors <- nrow(dataFrame) | ||
numberOfPrecursorsPrior <- numberOfPrecursors | ||
|
||
|
||
|
||
if(nrow(colData(qfeatures))>0){ | ||
|
||
dataColumnStartEndIndeces <- 1 | ||
numberOfDataColumns <- nrow(colData(qfeatures)) | ||
sampleClass <- colData(qfeatures)$Class | ||
sampleType <- colData(qfeatures)$Type | ||
sampleInjectionOrder <- colData(qfeatures)$"Injection order" | ||
batchID <- NULL | ||
if(! is.null(colData(qfeatures)$BatchID)) | ||
batchID <- colData(qfeatures)$BatchID | ||
|
||
} else { | ||
dataColumnStartEndIndeces <- NULL | ||
numberOfDataColumns <- 0 | ||
sampleClass <- NULL | ||
sampleType <- NULL | ||
sampleInjectionOrder <- NULL | ||
batchID <- NULL | ||
} | ||
|
||
commaNumbers <- sum(grepl(x = dataFrame$"Average Mz", pattern = "^(\\d+,\\d+$)|(^\\d+$)")) | ||
decimalSeparatorIsComma <- commaNumbers == nrow(dataFrame) | ||
if(decimalSeparatorIsComma){ | ||
if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- gsub(x = gsub(x = dataFrame$"Average Rt(min)", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") | ||
if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- gsub(x = gsub(x = dataFrame$"Average Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") | ||
if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- gsub(x = gsub(x = dataFrame$"Fill %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") | ||
if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- gsub(x = gsub(x = dataFrame$"Dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") | ||
if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- gsub(x = gsub(x = dataFrame$"Reverse dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") | ||
if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- gsub(x = gsub(x = dataFrame$"Fragment presence %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") | ||
|
||
## replace -1 by 0 | ||
if(numberOfDataColumns > 0) { | ||
for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){ | ||
dataFrame[ , colIdx] <- gsub(x = gsub(x = dataFrame[ , colIdx], pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") | ||
} | ||
} | ||
} | ||
|
||
################### | ||
## column formats | ||
if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- as.numeric(dataFrame$"Average Rt(min)") | ||
if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- as.numeric(dataFrame$"Average Mz") | ||
if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- as.numeric(dataFrame$"Fill %") | ||
if(!is.null(dataFrame$"MS/MS included")) dataFrame$"MS/MS included" <- as.logical(dataFrame$"MS/MS included") | ||
if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- as.numeric(dataFrame$"Dot product") | ||
if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- as.numeric(dataFrame$"Reverse dot product") | ||
if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- as.numeric(dataFrame$"Fragment presence %") | ||
|
||
##################### | ||
## sorted by m/z (needed for deisotoping) | ||
if(!is.null(dataFrame$"Average Mz")) | ||
dataFrame <- dataFrame[order(dataFrame$"Average Mz"), ] | ||
|
||
## replace -1 by 0 | ||
if(numberOfDataColumns > 0){ | ||
for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){ | ||
dataFrame[ , colIdx] <- as.numeric(dataFrame[ , colIdx]) | ||
if(!is.na(sum(dataFrame[,colIdx] == -1))) | ||
dataFrame[(dataFrame[,colIdx] == -1),colIdx] <- 0 | ||
} | ||
} | ||
|
||
## deisotoping | ||
numberOfRemovedIsotopePeaks <- 0 | ||
if(doPrecursorDeisotoping & !is.null(dataFrame$"Average Mz")){ | ||
if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Precursor deisotoping...", sep = "")) else print(paste("Precursor deisotoping...", sep = "")) | ||
distance13Cminus12C <- 1.0033548378 | ||
|
||
## mark isotope precursors | ||
precursorsToRemove <- vector(mode = "logical", length = numberOfPrecursors) | ||
|
||
if(numberOfDataColumns > 0){ | ||
intensities <- dataFrame[ , dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]] | ||
medians <- apply(X = as.matrix(intensities), MARGIN = 1, FUN = median) | ||
} | ||
|
||
for(precursorIdx in seq_len(numberOfPrecursors)){ | ||
if((precursorIdx %% (as.integer(numberOfPrecursors/10))) == 0) | ||
if(!is.na(progress)) if(progress) incProgress(amount = 0.0, detail = paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) else print(paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) | ||
|
||
mzError <- dataFrame$"Average Mz"[[precursorIdx]] * mzDeviationInPPM_precursorDeisotoping / 1000000 | ||
mzError <- max(mzError, mzDeviationAbsolute_precursorDeisotoping) | ||
|
||
## RT difference <= maximumRtDifference | ||
validPrecursorsInRt <- abs(dataFrame$"Average Rt(min)"[[precursorIdx]] - dataFrame$"Average Rt(min)"[-precursorIdx]) <= maximumRtDifference | ||
|
||
## MZ difference around 1.0033548378 (first isotope) or 1.0033548378 * 2 (second isotope) | ||
validPrecursorsInMz1 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 1) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError | ||
validPrecursorsInMz2 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 2) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError | ||
validPrecursorsInMz <- validPrecursorsInMz1 | validPrecursorsInMz2 | ||
|
||
## intensity gets smaller in the isotope spectrum | ||
if(numberOfDataColumns > 0){ | ||
validPrecursorsInIntensity <- (medians[-precursorIdx] - medians[[precursorIdx]]) > 0 | ||
} else { | ||
validPrecursorsInIntensity <- TRUE | ||
} | ||
|
||
if(any(validPrecursorsInRt & validPrecursorsInMz & validPrecursorsInIntensity)) | ||
precursorsToRemove[[precursorIdx]] <- TRUE | ||
} | ||
|
||
## remove isotopes | ||
dataFrame <- dataFrame[!precursorsToRemove, ] | ||
|
||
numberOfRemovedIsotopePeaks <- sum(precursorsToRemove) | ||
numberOfPrecursors <- nrow(dataFrame) | ||
} | ||
|
||
if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Boxing...", sep = "")) else print(paste("Boxing...", sep = "")) | ||
returnObj <- list() | ||
returnObj$dataFrame <- dataFrame | ||
|
||
## meta | ||
returnObj$oldFormat <- oldFormat | ||
returnObj$numberOfPrecursors <- numberOfPrecursors | ||
returnObj$dataColumnStartEndIndeces <- dataColumnStartEndIndeces | ||
returnObj$numberOfDataColumns <- numberOfDataColumns | ||
|
||
## group anno | ||
returnObj$sampleClass <- sampleClass | ||
returnObj$sampleType <- sampleType | ||
returnObj$sampleInjectionOrder <- sampleInjectionOrder | ||
returnObj$batchID <- batchID | ||
|
||
## misc | ||
returnObj$numberOfPrecursorsPrior <- numberOfPrecursorsPrior | ||
returnObj$numberOfRemovedIsotopePeaks <- numberOfRemovedIsotopePeaks | ||
|
||
return (returnObj) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#' Read MS-DIAL Output File into a QFeatures Object | ||
#' | ||
#' This function reads the output file from MS-DIAL and | ||
#' converts it into a QFeatures object. | ||
#' | ||
#' @param file A string with the path to the MS-DIAL output file. | ||
#' @param version A character string specifying the version of MS-DIAL used to generate the file. | ||
#' This parameter is currently not used. | ||
#' | ||
#' @return A QFeatures object containing: | ||
#' \itemize{ | ||
#' \item An assay named "exampleAssay" with the metabolite counts. | ||
#' \item Row data (feature metadata) extracted from the input file. | ||
#' \item Column data (sample metadata) extracted from the input file. | ||
#' } | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Assuming you have an MS-DIAL output file named "Metabolite_profile_showcase.txt" in a "data" directory: | ||
#' qf <- readMSDial("data/Metabolite_profile_showcase.txt") | ||
#' | ||
#' # Examine the structure of the resulting QFeatures object | ||
#' qf | ||
#' | ||
#' # Access the assay data | ||
#' assay(qf[["exampleAssay"]]) | ||
#' | ||
#' # Access the row data (feature metadata) | ||
#' rowData(qf[["exampleAssay"]]) | ||
#' | ||
#' # Access the column data (sample metadata) | ||
#' colData(qf) | ||
#' } | ||
#' | ||
#' @importFrom QFeatures QFeatures | ||
#' @importFrom SummarizedExperiment SummarizedExperiment | ||
#' @importFrom S4Vectors DataFrame | ||
#' | ||
#' @seealso | ||
#' \code{\link[QFeatures]{QFeatures}} for more information on the QFeatures class. | ||
#' \code{\link[SummarizedExperiment]{SummarizedExperiment}} for details on the underlying data structure. | ||
#' | ||
#' @note | ||
#' | ||
#' | ||
#' @references | ||
#' | ||
readMSDial <- function(file, version){ | ||
table <- read.table(file, fill = TRUE, sep = "\t", | ||
quote = "", header = FALSE) | ||
|
||
# Identify the starting row and column of the data | ||
startRow <- which(table[, 1] != "")[1] | ||
startCol <- which(table[1, ] != "")[1] | ||
##TODO: version dependent error message if startRow or startCol are not as expected. | ||
|
||
# Split the table in parts | ||
colDataRaw <- table[1:startRow, startCol:ncol(table)] | ||
rowDataRaw <- table[startRow:nrow(table), 1:(startCol)] | ||
countsRaw <- table[startRow:nrow(table), startCol:ncol(table)] | ||
|
||
# Extract ids and counts data | ||
ids <- rowDataRaw[-1, 1] | ||
counts <- as.matrix(countsRaw[-1, -1]) | ||
counts <- matrix(as.numeric(counts), nrow = nrow(counts), ncol = ncol(counts)) | ||
colnames(counts) <- as.character(countsRaw[1, -1]) | ||
rownames(counts) <- ids | ||
|
||
# Ensure row names of colData match counts column names | ||
colData <- DataFrame(t(colDataRaw[-nrow(colDataRaw), -1])) | ||
rownames(colData) <- as.character(colDataRaw[nrow(colDataRaw), -1]) | ||
colnames(colData) <- as.character(colDataRaw[-nrow(colDataRaw), 1]) | ||
|
||
# Ensure row names of rowData match counts row names | ||
rowData <- DataFrame(rowDataRaw[-1, ], row.names = ids) | ||
colnames(rowData) <- as.character(rowDataRaw[1,]) | ||
|
||
# Create SummarizedExperiment object | ||
|
||
sumExp <- SummarizedExperiment(assays = list(counts = counts), | ||
rowData = rowData, | ||
colData = colData) | ||
##TODO: Metadata with data source and version | ||
|
||
# Create QFeatures object | ||
qf <- QFeatures(list(exampleAssay = sumExp), colData = colData(sumExp)) | ||
qf | ||
##TODO: name | ||
} |
Oops, something went wrong.