Skip to content

Commit

Permalink
Imported QFeatures Reading from Norman
Browse files Browse the repository at this point in the history
  • Loading branch information
sneumann committed Aug 22, 2024
1 parent 4617bdc commit b47dad7
Show file tree
Hide file tree
Showing 9 changed files with 449 additions and 21 deletions.
9 changes: 7 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
Package: MetFamily
Type: Package
Title: MetFamily: Discovering Regulated Metabolite Families in Untargeted Metabolomics Studies
Version: 0.99.3
Date: 2024-03-02
Version: 0.99.4
Date: 2024-08-22
Author: c( person("Hendrik", "Treutler, role = c("aut"), email = "[email protected]"),
person("Khabat", "Vahabi, role = c("aut"), email = "[email protected]"),
person("Norman", "Storz", role = c("aut"), email = "[email protected]"),
person(given = "Steffen", family = "Neumann", email = "[email protected]",
role = c("aut", "cre"), comment = c(ORCID = "0000-0002-7899-7192")) )
Depends:
Expand Down Expand Up @@ -37,6 +38,7 @@ Imports:
graphics,
grDevices,
methods,
QFeatures,
stats,
utils
Remotes: decisionpatterns/searchable
Expand All @@ -63,5 +65,8 @@ Collate:
'TreeAlgorithms.R'
'Plots.R'
'R_packages.R'
parsePeakAbundanceMatrixQF.R
readMSDial.R
readMetaboScape.R
RoxygenNote: 7.2.3
Encoding: UTF-8
39 changes: 26 additions & 13 deletions NAMESPACE
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,26 @@
# Generated by roxyXXXgen2: do not edit by hand
# Generated by roxygen2: do not edit by hand

export(calcPlotDendrogram_plotly)
export(calcPlotHeatmapLegend)
export(castListEntries)
export(data.numericmatrix)
export(metaboliteFamilyVersusClass)
export(mzClustGeneric)
export(parsePeakAbundanceMatrixQF)
export(processMS1data)
export(readClusterDataFromProjectFile)
export(readMSDial)
export(readMetaboscape)
export(readProjectData)
export(runMetFamily)
importFrom(QFeatures,QFeatures)
importFrom(S4Vectors,DataFrame)
importFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(grDevices,colorRampPalette)
importFrom(grDevices,rainbow)
importFrom(grDevices,rgb)
importFrom(openxlsx2,read_xlsx)


## Hardcoded:
exportPattern("^[^\\.]")
Expand All @@ -14,15 +36,6 @@ importFrom("stats", "as.dendrogram", "cor", "dendrapply", "dist",
importFrom("utils", "flush.console", "read.table")


## From roxygen:
export(calcPlotDendrogram_plotly)
export(calcPlotHeatmapLegend)
export(castListEntries)
export(data.numericmatrix)
export(metaboliteFamilyVersusClass)
export(mzClustGeneric)
export(processMS1data)
export(readClusterDataFromProjectFile)
export(readProjectData)
export(runMetFamily)
importFrom(grDevices,colorRampPalette)
importFrom(SummarizedExperiment,colData)
importFrom(SummarizedExperiment,rowData)
importFrom(SummarizedExperiment,assay)
15 changes: 9 additions & 6 deletions R/FragmentMatrixFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1396,7 +1396,7 @@ mzClustGeneric <- function(p,
return(list(mat=groupmat,idx=groupindex))
}

convertToProjectFile <- function(filePeakMatrix,
convertToProjectFile <- function(filePeakMatrixPath,
fileSpectra,
parameterSet,
progress = FALSE){
Expand Down Expand Up @@ -1434,8 +1434,11 @@ convertToProjectFile <- function(filePeakMatrix,

rm(returnObj)


filePeakMatrixQF <- readMSDial(filePeakMatrixPath)

returnObj <- convertToProjectFile2(
filePeakMatrix = filePeakMatrix,
filePeakMatrixQF = filePeakMatrixQF,
spectraList = spectraList, precursorMz = precursorMz, precursorRt = precursorRt,
metaboliteFamilies = rep(x = "", times = numberOfSpectra), uniqueMetaboliteFamilies = NULL, metaboliteFamilyColors = NULL,
parameterSet = parameterSet,
Expand All @@ -1454,7 +1457,7 @@ convertToProjectFile <- function(filePeakMatrix,
return(returnObj)
}

convertToProjectFile2 <- function(filePeakMatrix,
convertToProjectFile2 <- function(filePeakMatrixQF,
spectraList,
precursorMz,
precursorRt,
Expand All @@ -1471,9 +1474,9 @@ convertToProjectFile2 <- function(filePeakMatrix,
## metabolite profile
if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = paste("Parsing MS1 file...", sep = "")) else print(paste("Parsing MS1 file...", sep = ""))

if(!is.null(filePeakMatrix)){
returnObj <- parsePeakAbundanceMatrix(
filePeakMatrix = filePeakMatrix,
if(!is.null(filePeakMatrixQF)){
returnObj <- parsePeakAbundanceMatrixQF(
qfeatures = filePeakMatrixQF,
doPrecursorDeisotoping = parameterSet$doPrecursorDeisotoping,
mzDeviationInPPM_precursorDeisotoping = parameterSet$mzDeviationInPPM_precursorDeisotoping,
mzDeviationAbsolute_precursorDeisotoping = parameterSet$mzDeviationAbsolute_precursorDeisotoping,
Expand Down
175 changes: 175 additions & 0 deletions R/parsePeakAbundanceMatrixQF.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#' Title
#'
#' @param qfeatures Object of type QFeatures
#' @param doPrecursorDeisotoping
#' @param mzDeviationInPPM_precursorDeisotoping
#' @param mzDeviationAbsolute_precursorDeisotoping
#' @param maximumRtDifference
#' @param progress boolean whether or not to show a progress bar
#'
#' @return
#' @export
#'
#' @examples
parsePeakAbundanceMatrixQF <- function(qfeatures,
doPrecursorDeisotoping,
mzDeviationInPPM_precursorDeisotoping,
mzDeviationAbsolute_precursorDeisotoping,
maximumRtDifference,
progress=FALSE)
{
## read file

if(!is.na(progress)) {
if(progress) {
incProgress(amount = 0.1, detail = paste("Parsing MS1 file content...", sep = ""))
} else {
print(paste("Parsing MS1 file content...", sep = ""))
}
}

cols_to_exclude <- c("Reference RT","Reference m/z","Comment",
"Manually modified for quantification",
"Total score","RT similarity","Average","Stdev")

cols_to_keep <- which(!colnames(rowData(qfeatures))[[1]] %in% cols_to_exclude)

dataFrame <- cbind(rowData(qfeatures)[[1]][,cols_to_keep] ,assay(qfeatures))

ncol(rowData(qfeatures)[[1]])
numberOfPrecursors <- nrow(dataFrame)
numberOfPrecursorsPrior <- numberOfPrecursors



if(nrow(colData(qfeatures))>0){

dataColumnStartEndIndeces <- 1
numberOfDataColumns <- nrow(colData(qfeatures))
sampleClass <- colData(qfeatures)$Class
sampleType <- colData(qfeatures)$Type
sampleInjectionOrder <- colData(qfeatures)$"Injection order"
batchID <- NULL
if(! is.null(colData(qfeatures)$BatchID))
batchID <- colData(qfeatures)$BatchID

} else {
dataColumnStartEndIndeces <- NULL
numberOfDataColumns <- 0
sampleClass <- NULL
sampleType <- NULL
sampleInjectionOrder <- NULL
batchID <- NULL
}

commaNumbers <- sum(grepl(x = dataFrame$"Average Mz", pattern = "^(\\d+,\\d+$)|(^\\d+$)"))
decimalSeparatorIsComma <- commaNumbers == nrow(dataFrame)
if(decimalSeparatorIsComma){
if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- gsub(x = gsub(x = dataFrame$"Average Rt(min)", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- gsub(x = gsub(x = dataFrame$"Average Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- gsub(x = gsub(x = dataFrame$"Fill %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- gsub(x = gsub(x = dataFrame$"Dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- gsub(x = gsub(x = dataFrame$"Reverse dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- gsub(x = gsub(x = dataFrame$"Fragment presence %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")

## replace -1 by 0
if(numberOfDataColumns > 0) {
for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){
dataFrame[ , colIdx] <- gsub(x = gsub(x = dataFrame[ , colIdx], pattern = "\\.", replacement = ""), pattern = ",", replacement = ".")
}
}
}

###################
## column formats
if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- as.numeric(dataFrame$"Average Rt(min)")
if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- as.numeric(dataFrame$"Average Mz")
if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- as.numeric(dataFrame$"Fill %")
if(!is.null(dataFrame$"MS/MS included")) dataFrame$"MS/MS included" <- as.logical(dataFrame$"MS/MS included")
if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- as.numeric(dataFrame$"Dot product")
if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- as.numeric(dataFrame$"Reverse dot product")
if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- as.numeric(dataFrame$"Fragment presence %")

#####################
## sorted by m/z (needed for deisotoping)
if(!is.null(dataFrame$"Average Mz"))
dataFrame <- dataFrame[order(dataFrame$"Average Mz"), ]

## replace -1 by 0
if(numberOfDataColumns > 0){
for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){
dataFrame[ , colIdx] <- as.numeric(dataFrame[ , colIdx])
if(!is.na(sum(dataFrame[,colIdx] == -1)))
dataFrame[(dataFrame[,colIdx] == -1),colIdx] <- 0
}
}

## deisotoping
numberOfRemovedIsotopePeaks <- 0
if(doPrecursorDeisotoping & !is.null(dataFrame$"Average Mz")){
if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Precursor deisotoping...", sep = "")) else print(paste("Precursor deisotoping...", sep = ""))
distance13Cminus12C <- 1.0033548378

## mark isotope precursors
precursorsToRemove <- vector(mode = "logical", length = numberOfPrecursors)

if(numberOfDataColumns > 0){
intensities <- dataFrame[ , dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]]
medians <- apply(X = as.matrix(intensities), MARGIN = 1, FUN = median)
}

for(precursorIdx in seq_len(numberOfPrecursors)){
if((precursorIdx %% (as.integer(numberOfPrecursors/10))) == 0)
if(!is.na(progress)) if(progress) incProgress(amount = 0.0, detail = paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) else print(paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = ""))

mzError <- dataFrame$"Average Mz"[[precursorIdx]] * mzDeviationInPPM_precursorDeisotoping / 1000000
mzError <- max(mzError, mzDeviationAbsolute_precursorDeisotoping)

## RT difference <= maximumRtDifference
validPrecursorsInRt <- abs(dataFrame$"Average Rt(min)"[[precursorIdx]] - dataFrame$"Average Rt(min)"[-precursorIdx]) <= maximumRtDifference

## MZ difference around 1.0033548378 (first isotope) or 1.0033548378 * 2 (second isotope)
validPrecursorsInMz1 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 1) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError
validPrecursorsInMz2 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 2) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError
validPrecursorsInMz <- validPrecursorsInMz1 | validPrecursorsInMz2

## intensity gets smaller in the isotope spectrum
if(numberOfDataColumns > 0){
validPrecursorsInIntensity <- (medians[-precursorIdx] - medians[[precursorIdx]]) > 0
} else {
validPrecursorsInIntensity <- TRUE
}

if(any(validPrecursorsInRt & validPrecursorsInMz & validPrecursorsInIntensity))
precursorsToRemove[[precursorIdx]] <- TRUE
}

## remove isotopes
dataFrame <- dataFrame[!precursorsToRemove, ]

numberOfRemovedIsotopePeaks <- sum(precursorsToRemove)
numberOfPrecursors <- nrow(dataFrame)
}

if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Boxing...", sep = "")) else print(paste("Boxing...", sep = ""))
returnObj <- list()
returnObj$dataFrame <- dataFrame

## meta
returnObj$oldFormat <- oldFormat
returnObj$numberOfPrecursors <- numberOfPrecursors
returnObj$dataColumnStartEndIndeces <- dataColumnStartEndIndeces
returnObj$numberOfDataColumns <- numberOfDataColumns

## group anno
returnObj$sampleClass <- sampleClass
returnObj$sampleType <- sampleType
returnObj$sampleInjectionOrder <- sampleInjectionOrder
returnObj$batchID <- batchID

## misc
returnObj$numberOfPrecursorsPrior <- numberOfPrecursorsPrior
returnObj$numberOfRemovedIsotopePeaks <- numberOfRemovedIsotopePeaks

return (returnObj)
}
91 changes: 91 additions & 0 deletions R/readMSDial.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#' Read MS-DIAL Output File into a QFeatures Object
#'
#' This function reads the output file from MS-DIAL and
#' converts it into a QFeatures object.
#'
#' @param file A string with the path to the MS-DIAL output file.
#' @param version A character string specifying the version of MS-DIAL used to generate the file.
#' This parameter is currently not used.
#'
#' @return A QFeatures object containing:
#' \itemize{
#' \item An assay named "exampleAssay" with the metabolite counts.
#' \item Row data (feature metadata) extracted from the input file.
#' \item Column data (sample metadata) extracted from the input file.
#' }
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Assuming you have an MS-DIAL output file named "Metabolite_profile_showcase.txt" in a "data" directory:
#' qf <- readMSDial("data/Metabolite_profile_showcase.txt")
#'
#' # Examine the structure of the resulting QFeatures object
#' qf
#'
#' # Access the assay data
#' assay(qf[["exampleAssay"]])
#'
#' # Access the row data (feature metadata)
#' rowData(qf[["exampleAssay"]])
#'
#' # Access the column data (sample metadata)
#' colData(qf)
#' }
#'
#' @importFrom QFeatures QFeatures
#' @importFrom SummarizedExperiment SummarizedExperiment
#' @importFrom S4Vectors DataFrame
#'
#' @seealso
#' \code{\link[QFeatures]{QFeatures}} for more information on the QFeatures class.
#' \code{\link[SummarizedExperiment]{SummarizedExperiment}} for details on the underlying data structure.
#'
#' @note
#'
#'
#' @references
#'
readMSDial <- function(file, version){
table <- read.table(file, fill = TRUE, sep = "\t",
quote = "", header = FALSE)

# Identify the starting row and column of the data
startRow <- which(table[, 1] != "")[1]
startCol <- which(table[1, ] != "")[1]
##TODO: version dependent error message if startRow or startCol are not as expected.

# Split the table in parts
colDataRaw <- table[1:startRow, startCol:ncol(table)]
rowDataRaw <- table[startRow:nrow(table), 1:(startCol)]
countsRaw <- table[startRow:nrow(table), startCol:ncol(table)]

# Extract ids and counts data
ids <- rowDataRaw[-1, 1]
counts <- as.matrix(countsRaw[-1, -1])
counts <- matrix(as.numeric(counts), nrow = nrow(counts), ncol = ncol(counts))
colnames(counts) <- as.character(countsRaw[1, -1])
rownames(counts) <- ids

# Ensure row names of colData match counts column names
colData <- DataFrame(t(colDataRaw[-nrow(colDataRaw), -1]))
rownames(colData) <- as.character(colDataRaw[nrow(colDataRaw), -1])
colnames(colData) <- as.character(colDataRaw[-nrow(colDataRaw), 1])

# Ensure row names of rowData match counts row names
rowData <- DataFrame(rowDataRaw[-1, ], row.names = ids)
colnames(rowData) <- as.character(rowDataRaw[1,])

# Create SummarizedExperiment object

sumExp <- SummarizedExperiment(assays = list(counts = counts),
rowData = rowData,
colData = colData)
##TODO: Metadata with data source and version

# Create QFeatures object
qf <- QFeatures(list(exampleAssay = sumExp), colData = colData(sumExp))
qf
##TODO: name
}
Loading

0 comments on commit b47dad7

Please sign in to comment.