diff --git a/DESCRIPTION b/DESCRIPTION index 8e3d005..c75c4f2 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: MetFamily Type: Package Title: MetFamily: Discovering Regulated Metabolite Families in Untargeted Metabolomics Studies -Version: 0.99.2 +Version: 0.99.3 Date: 2024-03-02 Author: c( person("Hendrik", "Treutler, role = c("aut"), email = "hendrik.treutler@ipb-halle.de"), person("Khabat", "Vahabi, role = c("aut"), email = "khabat.vahabi@ipb-halle.de"), @@ -27,31 +27,40 @@ Depends: searchable, tools, stringr, - xcms, mzR, matrixStats, plotrix, slam, - plotly, - egg + plotly +Imports: + egg, + graphics, + grDevices, + methods, + stats, + utils Remotes: decisionpatterns/searchable Suggests: knitr, testthat VignetteBuilder: knitr Maintainer: Hendrik Treutler -Description: We present a novel approach for the untargeted discovery of metabolite families offering a bird's eye view of metabolic regulation in comparative metabolomics. We implemented the presented methodology in the easy-to-use web application MetFamily to enable the analysis of comprehensive metabolomics studies for all researchers worldwide. -License: GPL (>= 2) +Description: We present a novel approach for the untargeted discovery + of metabolite families offering a bird's eye view of metabolic regulation + in comparative metabolomics. We implemented the presented methodology + in the easy-to-use web application MetFamily to enable the analysis + of comprehensive metabolomics studies for all researchers worldwide. +License: GPL (>= 2) + file LICENSE biocViews: Software, Visualization Collate: 'Annotation.R' 'Classifiers.R' 'runMetFamily.R' - 'R_packages.R' 'FragmentMatrixFunctions.R' 'DataProcessing.R' 'Analysis.R' 'TreeAlgorithms.R' 'Plots.R' + 'R_packages.R' RoxygenNote: 7.2.3 Encoding: UTF-8 diff --git a/Dockerfile b/Dockerfile index 651a3be..5effef6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ #FROM sneumann/metfamily-base:latest -#FROM sneumann/metfamily-base:4.3.2 +FROM sneumann/metfamily-base:4.3.2 #FROM sneumann/metfamily-base:4.0.5 -FROM sneumann/metfamily-base:3.6.3 +#FROM sneumann/metfamily-base:3.6.3 MAINTAINER Steffen Neumann diff --git a/NAMESPACE b/NAMESPACE index 2d60eee..0156efc 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,3 +1,28 @@ -#export(startMetFamily) +# Generated by roxyXXXgen2: do not edit by hand + +## Hardcoded: exportPattern("^[^\\.]") import(egg) +importFrom("grDevices", "as.raster", "rainbow", "rgb") +importFrom("methods", "as") +importFrom("graphics", "axis", "mtext", "par", "plot.new", + "plot.window", "points", "rasterImage", "rect", "segments", + "title") +importFrom("methods", "as", "is") +importFrom("stats", "as.dendrogram", "cor", "dendrapply", "dist", + "hclust", "is.leaf", "median", "na.omit", "predict", "sd") +importFrom("utils", "flush.console", "read.table") + + +## From roxygen: +export(calcPlotDendrogram_plotly) +export(calcPlotHeatmapLegend) +export(castListEntries) +export(data.numericmatrix) +export(metaboliteFamilyVersusClass) +export(mzClustGeneric) +export(processMS1data) +export(readClusterDataFromProjectFile) +export(readProjectData) +export(runMetFamily) +importFrom(grDevices,colorRampPalette) diff --git a/NEWS b/NEWS index b694569..430ca02 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,13 @@ CHANGES IN VERSION 0.99.2 ------------------------- +BUG FIXES + + o Now producing correct PCA on R-4.0.0+ by using a custom data.matrix() function with old R-3.6.3 behaviour + +CHANGES IN VERSION 0.99.2 +------------------------- + NEW FEATURES o More and better colors available for plotting (thanks @khabatv) diff --git a/R/Analysis.R b/R/Analysis.R index 2c43ed9..ea7461a 100644 --- a/R/Analysis.R +++ b/R/Analysis.R @@ -1,7 +1,7 @@ ######################################################################################### ## constants -filterData <- function(dataList, groups, sampleSet, filterBySamples, filter_average, filter_lfc, filterList_ms2_masses, filter_ms2_ppm, filter_ms1_masses, filter_ms1_ppm, includeIgnoredPrecursors, progress = FALSE){ +filterData <- function(dataList, grouXXXps, sampleSet, filterBySamples, filter_average, filter_lfc, filterList_ms2_masses, filter_ms2_ppm, filter_ms1_masses, filter_ms1_ppm, includeIgnoredPrecursors, progress = FALSE){ ########################################## ## filter filter <- rep(x = TRUE, times = dataList$numberOfPrecursors) @@ -10,9 +10,9 @@ filterData <- function(dataList, groups, sampleSet, filterBySamples, filter_aver if(!is.null(filter_average)){ #print("this is entering the line 11") #if(filterBySamples){ - # filter <- filter & apply(X = as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(groups), FUN = dataList$dataMeanColumnNameFunctionFromName)]), MARGIN = 1, FUN = mean) >= filter_average + # filter <- filter & apply(X = as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(grouXXXps), FUN = dataList$dataMeanColumnNameFunctionFromName)]), MARGIN = 1, FUN = mean) >= filter_average #} else { - filter <- filter & apply(X = as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(groups), FUN = dataList$dataMeanColumnNameFunctionFromName)]), MARGIN = 1, FUN = mean) >= filter_average + filter <- filter & apply(X = as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(grouXXXps), FUN = dataList$dataMeanColumnNameFunctionFromName)]), MARGIN = 1, FUN = mean) >= filter_average #print(filter) #print(names(filter)[unname(filter)]) #} @@ -21,11 +21,11 @@ filterData <- function(dataList, groups, sampleSet, filterBySamples, filter_aver ## filter_lfc if(!is.null(filter_lfc)){ if(filter_lfc != 0){ - if(length(groups) != 2){ stop("The number of groups for LFC is not equal to two!") } + if(length(grouXXXps) != 2){ stop("The number of grouXXXps for LFC is not equal to two!") } if(filter_lfc > 0) - filter <- filter & dataList$dataFrameMeasurements[, dataList$lfcColumnNameFunctionFromName(groups[[1]], groups[[2]])] >= filter_lfc + filter <- filter & dataList$dataFrameMeasurements[, dataList$lfcColumnNameFunctionFromName(grouXXXps[[1]], grouXXXps[[2]])] >= filter_lfc else - filter <- filter & dataList$dataFrameMeasurements[, dataList$lfcColumnNameFunctionFromName(groups[[1]], groups[[2]])] <= filter_lfc + filter <- filter & dataList$dataFrameMeasurements[, dataList$lfcColumnNameFunctionFromName(grouXXXps[[1]], grouXXXps[[2]])] <= filter_lfc } } @@ -80,16 +80,16 @@ filterData <- function(dataList, groups, sampleSet, filterBySamples, filter_aver resultObj$filter <- filter resultObj$numberOfPrecursors <- dataList$numberOfPrecursors resultObj$numberOfPrecursorsFiltered <- length(filter) - if(is.null(groups)){ - resultObj$groups <- list() + if(is.null(grouXXXps)){ + resultObj$grouXXXps <- list() resultObj$sampleSet <- list() resultObj$filterBySamples <- NA } else { - resultObj$groups <- groups + resultObj$grouXXXps <- grouXXXps resultObj$sampleSet <- sampleSet resultObj$filterBySamples <- filterBySamples } - #resultObj$groups <- ifelse(test = is.null(groups), yes = NA, no = groups) + #resultObj$grouXXXps <- ifelse(test = is.null(grouXXXps), yes = NA, no = grouXXXps) resultObj$filter_average <- ifelse(test = is.null(filter_average), yes = 0, no = filter_average) resultObj$filter_lfc <- ifelse(test = is.null(filter_lfc), yes = 0, no = filter_lfc) if(is.null(filterList_ms2_masses)){ @@ -974,7 +974,7 @@ performPca <- function(dataList, dataFrame2, ms1AnalysisMethod){ #.. ..$ : chr [1:5] "Dim.1" "Dim.2" "Dim.3" "Dim.4" ... #$ variance: num [1:18] 35.96 21.67 11.66 8.15 4.56 ... # - ## artificial data two groups + ## artificial data two grouXXXps #$ scores : num [1:2, 1] 0 0 #..- attr(*, "dimnames")=List of 2 #.. ..$ : chr [1:2] "A_1" "B_2" @@ -1054,7 +1054,7 @@ calculatePCA <- function(dataList, filterObj, ms1AnalysisMethod, scaling, logTra if(filterObj$filterBySamples){ dataFrame <- dataList$dataFrameMeasurements[filterObj$filter, filterObj$sampleSet] } else { - dataFrame <- dataList$dataFrameMeasurements[filterObj$filter, dataList$dataColumnsNameFunctionFromGroupNames(groups = filterObj$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))] + dataFrame <- dataList$dataFrameMeasurements[filterObj$filter, dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = filterObj$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))] } dataFrame <- t(dataFrame) diff --git a/R/Annotation.R b/R/Annotation.R index d5312f4..36dab12 100644 --- a/R/Annotation.R +++ b/R/Annotation.R @@ -822,6 +822,24 @@ evaluatePutativeMetaboliteFamiliesOfPrecursorSet_old <- function(dataList, precu return(printPutativeMetaboliteFamilies) } + + +#' Title +#' +#' @param dataList +#' @param precursorSet +#' @param classToSpectra_class +#' @param properties_class +#' @param classifierClass +#' @param mappingSpectraToClassDf +#' @param addClassifierConsensusSpectrum +#' +#' @return +#' @export +#' @importFrom grDevices colorRampPalette +#' +#' +#' @examples metaboliteFamilyVersusClass <- function(dataList, precursorSet, classToSpectra_class, properties_class, classifierClass, mappingSpectraToClassDf, addClassifierConsensusSpectrum){ returnObj <- getSpectrumStatistics(dataList = dataList, precursorSet = precursorSet) masses_spec <- returnObj$fragmentMasses diff --git a/R/DataProcessing.R b/R/DataProcessing.R index cc50c7c..b8858ee 100644 --- a/R/DataProcessing.R +++ b/R/DataProcessing.R @@ -1,3 +1,27 @@ +#' Convert data.frame columns to numeric +#' +#' The data.numericmatrix() function works similar to base::data.matrix() +#' before R-4.0.0 converting character columns to numeric without converting +#' to factor first, thus returning the actual numeric values. +#' +#' @param x The data.frame to convert +#' +#' @return A matrix with all columns converted to numeric +#' @export +#' +#' @examples +#' data.numericmatrix(data.frame(a = c("1", "2", "3"), +#' b = c("4", "5", "6"))) +#' +data.numericmatrix <- function(x) { + for (i in 1:ncol(x)) { + if (is.character(x[, i])) { + x[, i] <- as.numeric(as.character(x[, i])) + } + } + as.matrix(x) +} + ######################################################################################### ## annotate and process matrix @@ -22,10 +46,30 @@ sparseMatrixToString <- function(matrixRows, matrixCols, matrixVals, parameterSe return(lines) } -readClusterDataFromProjectFile <- function(file, progress = FALSE){ - if(!is.na(progress)) if(progress) setProgress(value = 0, detail = "Parsing") else print("Parsing") +#' Read MetFamily Project data saved by the export function +#' +#' Supports reading from plain and gzip'ed files +#' +#' @param file Path to file to read +#' @param progress Whether to update a shiny Progress bar +#' +#' @return A big dataList. +#' +#' @seealso [readProjectData] +#' @export +#' +#' @examples +readClusterDataFromProjectFile <- function(file, progress = FALSE) +{ + if(!is.na(progress)) + if(progress) + setProgress(value = 0, detail = "Parsing") + else + print("Parsing") + extension <- file_ext(file) - if(extension == "gz"){ + + if(extension == "gz") { file <- gzfile(file, "r") } else { file <- file(file, "r") @@ -41,13 +85,31 @@ readClusterDataFromProjectFile <- function(file, progress = FALSE){ return(dataList) } -readProjectData <- function(fileLines, progress = FALSE){ + + +#' Read MetFamily Project data saved by the export function +#' +#' @param fileLines Character vector with content of a project file +#' @param progress Whether to update a shiny Progress bar +#' +#' @return A big dataList. +#' +#' @seealso [processMS1data] +#' @export +#' +#' @examples +readProjectData <- function(fileLines, progress = FALSE) +{ allowedTags <- c("ID") allowedTagPrefixes <- c("AnnotationColors=") ################################################################################################## ## parse data - if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "Preprocessing") else print("Preprocessing") + if(!is.na(progress)) + if(progress) + incProgress(amount = 0.1, detail = "Preprocessing") + else + print("Preprocessing") numberOfRows <- length(fileLines) numberOfMS1features <- as.integer(numberOfRows - 3) @@ -84,11 +146,13 @@ readProjectData <- function(fileLines, progress = FALSE){ line3Tokens <- NULL if(any(duplicated(metaboliteProfileColumnNames))) - stop(paste("Duplicated column names in the metabolite profile: ", paste(sort(unique(metaboliteProfileColumnNames[duplicated(metaboliteProfileColumnNames)])), collapse = "; "))) + stop(paste("Duplicated column names in the metabolite profile: ", + paste(sort(unique(metaboliteProfileColumnNames[duplicated(metaboliteProfileColumnNames)])), collapse = "; "))) ######################################################################### ## extract metabolite profile and fragment matrix - metaboliteProfile <- as.data.frame(matrix(nrow = numberOfMS1features, ncol = numberOfMetaboliteProfileColumns)) + metaboliteProfile <- as.data.frame(matrix(nrow = numberOfMS1features, + ncol = numberOfMetaboliteProfileColumns)) colnames(metaboliteProfile) <- metaboliteProfileColumnNames listMatrixRows <- list() @@ -103,7 +167,12 @@ readProjectData <- function(fileLines, progress = FALSE){ lastOut <- time rowProgress <- (rowIdx - lastRow) / numberOfMS1features lastRow <- rowIdx - if(!is.na(progress)) if(progress) incProgress(amount = rowProgress*0.2, detail = paste("Preprocessing ", rowIdx, " / ", numberOfMS1features, sep = "")) else print(paste("Preprocessing ", rowIdx, " / ", numberOfMS1features, sep = "")) + if(!is.na(progress)) + if(progress) + incProgress(amount = rowProgress*0.2, + detail = paste("Preprocessing ", rowIdx, " / ", numberOfMS1features, sep = "")) + else + print(paste("Preprocessing ", rowIdx, " / ", numberOfMS1features, sep = "")) } lineIdx <- rowIdx + 3 @@ -133,15 +202,16 @@ readProjectData <- function(fileLines, progress = FALSE){ data.frame(rbind( c(importParameters, rep(x = "", times = numberOfMetaboliteProfileColumns - 1)), tagsSector, - metaboliteProfileColumnNames - ), stringsAsFactors = FALSE), + metaboliteProfileColumnNames), stringsAsFactors = FALSE), data.frame(rbind( fragmentGroupsNumberOfFramgents, fragmentGroupsAverageIntensity, fragmentGroupsAverageMass ), stringsAsFactors = FALSE) ) - headerLabels <- c("HeaderForFragmentCounts", "HeaderForGroupsAndFragmentIntensities", "Header") + headerLabels <- c("HeaderForFragmentCounts", + "HeaderForGroupsAndFragmentIntensities", + "Header") rownames(dataFrameHeader) <- headerLabels headerColumnNames <- c(metaboliteProfileColumnNames, fragmentGroupsAverageMass) colnames(dataFrameHeader) <- headerColumnNames @@ -171,7 +241,9 @@ readProjectData <- function(fileLines, progress = FALSE){ dataFrameHeader[, (target+1):numberOfColumns, drop=FALSE] ) numberOfMetaboliteProfileColumns <- numberOfMetaboliteProfileColumns + 1 - metaboliteProfileColumnNames <- c(metaboliteProfileColumnNames[seq_len(target)], annotationColumnName, metaboliteProfileColumnNames[(target+1):numberOfMetaboliteProfileColumns]) + metaboliteProfileColumnNames <- c(metaboliteProfileColumnNames[seq_len(target)], + annotationColumnName, + metaboliteProfileColumnNames[(target+1):numberOfMetaboliteProfileColumns]) colnames(metaboliteProfile) <- metaboliteProfileColumnNames headerColumnNames <- c(metaboliteProfileColumnNames, fragmentGroupsAverageMass) colnames(dataFrameHeader) <- headerColumnNames @@ -251,7 +323,6 @@ readProjectData <- function(fileLines, progress = FALSE){ mzAfter <- paste( mzAfter, paste(rep(x = "0", times = maximumNumberOfDecimalPlacesForMz - nchar(mzAfter)), collapse = ""), - #paste(rep(x = " ", times = maximumNumberOfDecimalPlacesForMz - nchar(mzAfter)), collapse = ""), sep = "" ) @@ -266,7 +337,6 @@ readProjectData <- function(fileLines, progress = FALSE){ if(nchar(rtAfter) < maximumNumberOfDecimalPlacesForRt) rtAfter <- paste( rtAfter, - #paste(rep(x = " ", times = maximumNumberOfDecimalPlacesForRt - nchar(rtAfter)), collapse = ""), paste(rep(x = "0", times = maximumNumberOfDecimalPlacesForRt - nchar(rtAfter)), collapse = ""), sep = "" ) @@ -313,27 +383,19 @@ readProjectData <- function(fileLines, progress = FALSE){ ## get features featureIndeces <- list() featureCount <- vector(mode = "numeric", length = numberOfMS1features) - #fragmentMassPresent <- rep(x = FALSE, times = length(fragmentGroupsAverageMass)) + for(i in seq_len(numberOfMS1features)){ - # if(numberOfMS1features >= 10 & ((i %% (as.integer(numberOfMS1features/10))) == 0)) - # if(progress) incProgress(amount = 0.3 / 10, detail = paste("Features ", i, " / ", numberOfMS1features, sep = "")) - ## data indecesHere <- which(matrixRows == i) featureIndecesHere <- matrixCols[indecesHere] numberOfFeatures <- length(featureIndecesHere) featureIndeces[[i]] <- featureIndecesHere featureCount[[i]] <- numberOfFeatures - #fragmentMassPresent[featureIndecesHere] <- TRUE } if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "Feature postprocessing") else print("Feature postprocessing") ## ms2 plot data - # resultObj <- getMS2plotData(matrixRows, matrixCols, matrixVals, fragmentMasses = fragmentGroupsAverageMass) - # ms2PlotDataNumberOfFragments <- resultObj$numberOfFragments - # ms2PlotDataAverageAbundance <- resultObj$averageAbundance - # ms2PlotDataFragmentMasses <- resultObj$masses ms2PlotDataNumberOfFragments <- fragmentGroupsNumberOfFramgents ms2PlotDataAverageAbundance <- fragmentGroupsAverageIntensity ms2PlotDataFragmentMasses <- fragmentGroupsAverageMass @@ -349,8 +411,6 @@ readProjectData <- function(fileLines, progress = FALSE){ matrixCols <- NULL matrixVals <- NULL - #fragmentGroupsAverageMass <- fragmentGroupsAverageMass[1:ncol(featureMatrix)] - #fragmentGroupsAverageMass <- fragmentGroupsAverageMass[fragmentMassPresent] rownames(featureMatrix) <- precursorLabels colnames(featureMatrix) <- fragmentGroupsAverageMass @@ -360,10 +420,6 @@ readProjectData <- function(fileLines, progress = FALSE){ for(i in seq_len(numberOfMS1features)) featureIndexMatrix[i, seq_len(length(featureIndeces[[i]]))] <- featureIndeces[[i]] - # ## remove columns without data - # fragmentThere <- apply(X = featureMatrix, MARGIN = 2, FUN = function(x){any(x != 0)}) - # minimumMass <- min(fragmentGroupsAverageMass[fragmentThere]) - # maximumMass <- max(fragmentGroupsAverageMass[fragmentThere]) minimumMass <- min(fragmentGroupsAverageMass) maximumMass <- max(fragmentGroupsAverageMass) @@ -379,33 +435,31 @@ readProjectData <- function(fileLines, progress = FALSE){ sampleColumns <- which(sampleColumns) sampleColumnsStartEnd <- c(min(sampleColumns), max(sampleColumns)) - groups <- unique(tagsSector[sampleColumns]) - numberOfGroups <- length(groups) + grouXXXps <- unique(tagsSector[sampleColumns]) + numberOfGroups <- length(grouXXXps) sampleNamesToExclude <- NULL dataColumnIndecesFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude = NULL){ - which(tagsSector == groups[[groupIdx]] & !(metaboliteProfileColumnNames %in% sampleNamesToExclude)) + which(tagsSector == grouXXXps[[groupIdx]] & !(metaboliteProfileColumnNames %in% sampleNamesToExclude)) } dataColumnsNameFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude = NULL){ - #sampleNames = paste(groups[[groupIdx]], "_", metaboliteProfileColumnNames[dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)], sep = "") sampleNames = metaboliteProfileColumnNames[dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)] - #sampleNames = sampleNames[!(sampleNames %in% sampleNamesToExclude)] return(sampleNames) } dataColumnsNameFunctionFromGroupName <- function(group, sampleNamesToExclude = NULL){ - dataColumnsNameFunctionFromGroupIndex(groupIdx = match(x = group, table = groups), sampleNamesToExclude = sampleNamesToExclude) + dataColumnsNameFunctionFromGroupIndex(groupIdx = match(x = group, table = grouXXXps), sampleNamesToExclude = sampleNamesToExclude) } - dataColumnsNameFunctionFromGroupNames <- function(groups, sampleNamesToExclude = NULL){ - unlist(lapply(X = groups, FUN = function(x){dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude)})) + dataColumnsNameFunctionFromGroupNames <- function(grouXXXps, sampleNamesToExclude = NULL){ + unlist(lapply(X = grouXXXps, FUN = function(x){dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude)})) } groupNameFunctionFromDataColumnName <- function(dataColumnName, sampleNamesToExclude = NULL){ - groupIdx <- which(unlist(lapply(X = groups, FUN = function(x){ + groupIdx <- which(unlist(lapply(X = grouXXXps, FUN = function(x){ dataColumnNames <- dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude) any(dataColumnNames == dataColumnName) }))) - groups[[groupIdx]] + grouXXXps[[groupIdx]] } lfcColumnNameFunctionFromString <- function(columnName){ tokens <- strsplit(x = columnName, split = "_vs_")[[1]] @@ -437,7 +491,7 @@ readProjectData <- function(fileLines, progress = FALSE){ returnObj <- processMS1data( sampleNamesToExclude=sampleNamesToExclude, numberOfMS1features=numberOfMS1features, precursorLabels=precursorLabels, - groups=groups, metaboliteProfileColumnNames=metaboliteProfileColumnNames, tagsSector = tagsSector, + grouXXXps=grouXXXps, metaboliteProfileColumnNames=metaboliteProfileColumnNames, tagsSector = tagsSector, dataColumnIndecesFunctionFromGroupIndex=dataColumnIndecesFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupIndex=dataColumnsNameFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupName=dataColumnsNameFunctionFromGroupName, dataColumnsNameFunctionFromGroupNames=dataColumnsNameFunctionFromGroupNames, groupNameFunctionFromDataColumnName=groupNameFunctionFromDataColumnName, metaboliteProfile=metaboliteProfile, progress=progress ) @@ -469,17 +523,13 @@ readProjectData <- function(fileLines, progress = FALSE){ ## present annotations annotations <- vector(mode='list', length=numberOfMS1features) - #annotations[1:numberOfMS1features] <- dataFrame[, annotationColumnName] annoVals <- metaboliteProfile[, annotationColumnName] for(i in seq_len(numberOfMS1features)){ - #print(paste(i, annoVals[[i]], nchar(annoVals[[i]]), class(annoVals[[i]]))) if(nchar(annoVals[[i]]) > 0){ annotations[[i]] <- as.list(unlist(strsplit(x = annoVals[[i]], split = ", "))) - #print(paste("a1", i, annotations[[i]], length(annotations[[i]]), class(annotations[[i]]))) } else{ annotations[[i]] <- list() - #print(paste("a2", i, annotations[[i]], length(annotations[[i]]), class(annotations[[i]]))) } } @@ -494,8 +544,6 @@ readProjectData <- function(fileLines, progress = FALSE){ annotations[[i]] <- annotations[[i]][-idx] } annoArrayOfLists[[i]] <- annotations[[i]] - #print(paste("b", i, annoArrayOfLists[[i]], length(annoArrayOfLists[[i]]), class(annoArrayOfLists[[i]]))) - annoArrayIsArtifact[[i]] <- ignoreThere } @@ -507,20 +555,7 @@ readProjectData <- function(fileLines, progress = FALSE){ ) if(nchar(annotationColorsMapValue) > 0){ - #annotationColorsMapValuePairsTmp <- unlist(strsplit(x = annotationColorsMapValue, split = "=")) - #annotationColorsMapValues <- sapply(X = strsplit(x = annotationColorsMapValuePairsTmp[2:length(annotationColorsMapValuePairsTmp)], split = ", "), FUN = function(token){ - # token[[1]] - #}) - #if(length(annotationColorsMapValuePairsTmp) < 3){ - # annotationColorsMapKeys <- annotationColorsMapValuePairsTmp[[1]] - #}else{ - # annotationColorsMapKeys <- c(annotationColorsMapValuePairsTmp[[1]], substr( - # x = annotationColorsMapValuePairsTmp[2:(length(annotationColorsMapValuePairsTmp) - 1)], - # start = nchar(annotationColorsMapValues) + nchar(", ") + 1, - # stop = nchar(annotationColorsMapValuePairsTmp[2:length(annotationColorsMapValuePairsTmp)]) - # )) - #} - + annotationColorsMapValuePairs <- unlist(strsplit(x = annotationColorsMapValue, split = ", ")) annotationColorsMapValues <- unlist(strsplit(x = annotationColorsMapValuePairs, split = "=")) annotationColorsMapKeys <- annotationColorsMapValues[seq(from = 1, to = length(annotationColorsMapValues), by = 2)] @@ -561,7 +596,7 @@ readProjectData <- function(fileLines, progress = FALSE){ dataList$importParameterSet <- importParameterSet dataList$numberOfPrecursors <- numberOfMS1features dataList$numberOfDuplicatedPrecursors <- numberOfDuplicated - dataList$groups <- groups + dataList$grouXXXps <- grouXXXps dataList$columnGroupLabels <- columnGroupLabels dataList$groupSampleDataFrame <- groupSampleDataFrame dataList$metaboliteProfileColumnNames <- metaboliteProfileColumnNames @@ -621,32 +656,29 @@ readProjectData <- function(fileLines, progress = FALSE){ ## redefine MS1 column functions dataColumnIndecesFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude){ - which(dataList$tagsSector == dataList$groups[[groupIdx]] & !(dataList$metaboliteProfileColumnNames %in% sampleNamesToExclude)) + which(dataList$tagsSector == dataList$grouXXXps[[groupIdx]] & !(dataList$metaboliteProfileColumnNames %in% sampleNamesToExclude)) } dataList$dataColumnIndecesFunctionFromGroupIndex <- dataColumnIndecesFunctionFromGroupIndex dataColumnsNameFunctionFromGroupIndex <- function(groupIdx, sampleNamesToExclude){ - #sampleNames = paste(dataList$groups[[groupIdx]], "_", metaboliteProfileColumnNames[dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)], sep = "") dataList$metaboliteProfileColumnNames[dataList$dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)] - #sampleNames = sampleNames[!(sampleNames %in% sampleNamesToExclude)] - #return(sampleNames) } dataList$dataColumnsNameFunctionFromGroupIndex <- dataColumnsNameFunctionFromGroupIndex dataColumnsNameFunctionFromGroupName <- function(group, sampleNamesToExclude){ - dataColumns <- dataList$dataColumnsNameFunctionFromGroupIndex(groupIdx = match(x = group, table = dataList$groups), sampleNamesToExclude = sampleNamesToExclude) + dataColumns <- dataList$dataColumnsNameFunctionFromGroupIndex(groupIdx = match(x = group, table = dataList$grouXXXps), sampleNamesToExclude = sampleNamesToExclude) } dataList$dataColumnsNameFunctionFromGroupName <- dataColumnsNameFunctionFromGroupName - dataColumnsNameFunctionFromGroupNames <- function(groups, sampleNamesToExclude){ - unlist(lapply(X = groups, FUN = function(x){ + dataColumnsNameFunctionFromGroupNames <- function(grouXXXps, sampleNamesToExclude){ + unlist(lapply(X = grouXXXps, FUN = function(x){ dataList$dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude) })) } dataList$dataColumnsNameFunctionFromGroupNames <- dataColumnsNameFunctionFromGroupNames groupNameFunctionFromDataColumnName <- function(dataColumnName, sampleNamesToExclude){ - groupIdx <- which(unlist(lapply(X = dataList$groups, FUN = function(x){ + groupIdx <- which(unlist(lapply(X = dataList$grouXXXps, FUN = function(x){ dataColumnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude) any(dataColumnNames == dataColumnName) }))) - dataList$groups[[groupIdx]] + dataList$grouXXXps[[groupIdx]] } dataList$groupNameFunctionFromDataColumnName <- groupNameFunctionFromDataColumnName @@ -660,19 +692,17 @@ readProjectData <- function(fileLines, progress = FALSE){ dataList$orderColumnNames <- orderColumnNames ## define sample in-/exclusion functions - excludedSamples <- function(groupSampleDataFrame, groups = dataList$groups){ - #dataList$groupSampleDataFrame[, "Sample"][ dataList$groupSampleDataFrame[, "Exclude"]] + excludedSamples <- function(groupSampleDataFrame, grouXXXps = dataList$grouXXXps){ samples = groupSampleDataFrame[, "Sample"] isExcluded = groupSampleDataFrame[, "Exclude"] - isGroup = groupSampleDataFrame[, "Group"] %in% groups + isGroup = groupSampleDataFrame[, "Group"] %in% grouXXXps return(samples[isExcluded & isGroup]) } dataList$excludedSamples <- excludedSamples - includedSamples <- function(groupSampleDataFrame, groups = dataList$groups){ - #dataList$groupSampleDataFrame[, "Sample"][!dataList$groupSampleDataFrame[, "Exclude"]] + includedSamples <- function(groupSampleDataFrame, grouXXXps = dataList$grouXXXps){ samples = groupSampleDataFrame[, "Sample"] isIncluded = !groupSampleDataFrame[, "Exclude"] - isGroup = groupSampleDataFrame[, "Group"] %in% groups + isGroup = groupSampleDataFrame[, "Group"] %in% grouXXXps return(samples[isIncluded & isGroup]) } dataList$includedSamples <- includedSamples @@ -684,94 +714,137 @@ readProjectData <- function(fileLines, progress = FALSE){ } dataList$includedGroups <- includedGroups excludedGroups <- function(groupSampleDataFrame, samples = dataList$groupSampleDataFrame[, "Sample"]){ - setdiff(dataList$groups, dataList$includedGroups(groupSampleDataFrame, samples)) + setdiff(dataList$grouXXXps, dataList$includedGroups(groupSampleDataFrame, samples)) } dataList$excludedGroups <- excludedGroups - - ## 950 932 688 - ## 634 336 248 - ## 321 972 296 - ## 9 090 088 - ## 11 753 432 - ## 13 272 240 - #print(sort( sapply(ls(),function(x){object.size(get(x))}))) - #memory.profile() - return(dataList) } -## tagsSector <- dataFrameMS1Header[2, ] -## dataList$dataFrameInfos <- metaboliteProfile -processMS1data <- function( - sampleNamesToExclude, numberOfMS1features, precursorLabels, - groups, metaboliteProfileColumnNames, - dataColumnIndecesFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupName, dataColumnsNameFunctionFromGroupNames, groupNameFunctionFromDataColumnName, - tagsSector, metaboliteProfile, progress -){ - numberOfGroups <- length(groups) +#' Process MS-Dial-like MS1 data.frame +#' +#' Processing of MS-Dial-like MS1 data.frame. Includes calculation +#' of MS1 data mean and log-fold-change (LFC) data +#' +#' @param sampleNamesToExclude +#' @param numberOfMS1features +#' @param precursorLabels +#' @param grouXXXps +#' @param metaboliteProfileColumnNames +#' @param dataColumnIndecesFunctionFromGroupIndex +#' @param dataColumnsNameFunctionFromGroupIndex +#' @param dataColumnsNameFunctionFromGroupName +#' @param dataColumnsNameFunctionFromGroupNames +#' @param groupNameFunctionFromDataColumnName +#' @param tagsSector +#' @param metaboliteProfile +#' @param progress +#' +#' @return +#' @export +#' @importFrom grDevices colorRampPalette rainbow +#' +#' @examples +processMS1data <- function(sampleNamesToExclude, + numberOfMS1features, + precursorLabels, + grouXXXps, + metaboliteProfileColumnNames, + dataColumnIndecesFunctionFromGroupIndex, + dataColumnsNameFunctionFromGroupIndex, + dataColumnsNameFunctionFromGroupName, + dataColumnsNameFunctionFromGroupNames, + groupNameFunctionFromDataColumnName, + tagsSector, + metaboliteProfile, + progress=FALSE) +{ + numberOfGroups <- length(grouXXXps) #################### ## MS1 measurement data: mean and LFC - if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "Coloring") else print("Coloring") - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "Coloring init") else print("Coloring init") + if(!is.na(progress)) + if(progress) + incProgress(amount = 0.1, detail = "Coloring") + else + print("Coloring") + + if(!is.na(progress)) + if(progress) + incProgress(amount = 0, detail = "Coloring init") + else + print("Coloring init") dataFrameMeasurements <- data.frame(matrix(nrow = numberOfMS1features, ncol = 0)) rownames(dataFrameMeasurements) <- precursorLabels ## column name functions - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "Coloring naming functions") else print("Coloring naming functions") + if(!is.na(progress)) + if(progress) + incProgress(amount = 0, detail = "Coloring naming functions") + else + print("Coloring naming functions") - ## store data of groups + ## store data of grouXXXps dataColumnNames <- list() for(groupIdx in seq_len(numberOfGroups)){ - dataColumnNamesHere <- dataColumnsNameFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude) + dataColumnNamesHere <- dataColumnsNameFunctionFromGroupIndex(groupIdx = groupIdx, + sampleNamesToExclude = sampleNamesToExclude) dataColumnNames <- c(dataColumnNames, dataColumnNamesHere) - dataFrameMeasurements[, dataColumnNamesHere] <- data.matrix(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude), drop = FALSE]) + dataFrameMeasurements[, dataColumnNamesHere] <- data.numericmatrix(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, + sampleNamesToExclude = sampleNamesToExclude), + drop = FALSE]) } dataColumnNames <- unlist(dataColumnNames) dataMeanColumnNameFunctionFromName <- function(group){ return(paste(group, "_mean", sep = "")) } + dataMeanColumnNameFunctionFromIndex <- function(groupIdx){ - return(dataMeanColumnNameFunctionFromName(groups[[groupIdx]])) + return(dataMeanColumnNameFunctionFromName(grouXXXps[[groupIdx]])) } lfcColumnNameFunctionFromName <- function(groupOne, groupTwo){ return(paste("LFC", groupOne, "vs", groupTwo, sep = "_")) } + lfcColumnNameFunctionFromIndex <- function(groupIdxOne, groupIdxTwo){ - lfcColumnNameFunctionFromName(groups[[groupIdxOne]], groups[[groupIdxTwo]]) + lfcColumnNameFunctionFromName(grouXXXps[[groupIdxOne]], grouXXXps[[groupIdxTwo]]) } groupNameFromGroupIndex <- function(groupIdx){ - return(groups[[groupIdx]]) + return(grouXXXps[[groupIdx]]) } + groupIdxFromGroupName <- function(group){ - return(match(x = group, table = groups)) + return(match(x = group, table = grouXXXps)) } - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "Coloring gather data") else print("Coloring gather data") + if(!is.na(progress)) + if(progress) incProgress(amount = 0, detail = "Coloring gather data") + else + print("Coloring gather data") + ## mean data columns dataMeanColumnNames <- list() for(groupIdx in seq_len(numberOfGroups)){ dataMeanColumnName <- dataMeanColumnNameFunctionFromIndex(groupIdx) dataMeanColumnNames[[groupIdx]] <- dataMeanColumnName - if(class(unlist(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)])) == "character") + if(is(unlist(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)]),"character")) for(colIdx in dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude)) metaboliteProfile[, colIdx] <- as.numeric(metaboliteProfile[, colIdx]) - dataFrameMeasurements[, dataMeanColumnName] <- apply(X = data.matrix(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude), drop=FALSE]), MARGIN = 1, FUN = mean) + dataFrameMeasurements[, dataMeanColumnName] <- apply(X = data.numericmatrix(metaboliteProfile[, dataColumnIndecesFunctionFromGroupIndex(groupIdx = groupIdx, sampleNamesToExclude = sampleNamesToExclude), drop=FALSE]), MARGIN = 1, FUN = mean) dataFrameMeasurements[is.na(dataFrameMeasurements[, dataMeanColumnName]), dataMeanColumnName] <- 0 } dataMeanColumnNames <- unlist(dataMeanColumnNames) ## all replicates mean dataFrameMeasurements[, "meanAllNormed"] <- apply( - X = data.matrix(metaboliteProfile[, - unlist(lapply(X = seq_len(numberOfGroups), FUN = function(x) {dataColumnIndecesFunctionFromGroupIndex(groupIdx = x, sampleNamesToExclude = sampleNamesToExclude)})), - drop=FALSE]), + X = data.numericmatrix(metaboliteProfile[, + unlist(lapply(X = seq_len(numberOfGroups), FUN = function(x) {dataColumnIndecesFunctionFromGroupIndex(groupIdx = x, sampleNamesToExclude = sampleNamesToExclude)})), + drop=FALSE]), MARGIN = 1, FUN = mean ) @@ -779,7 +852,7 @@ processMS1data <- function( if(meanAllMax != 0) dataFrameMeasurements[, "meanAllNormed"] <- dataFrameMeasurements[, "meanAllNormed"] / meanAllMax - ## log fold change between groups + ## log fold change between grouXXXps lfcColumnNames <- list() for(groupIdx1 in seq_len(numberOfGroups)) for(groupIdx2 in seq_len(numberOfGroups)){ @@ -799,24 +872,24 @@ processMS1data <- function( ######################################################################################### ## MS1 measurement data to colors - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "Coloring matrix") else print("Coloring matrix") + if(!is.na(progress)) + if(progress) + incProgress(amount = 0, detail = "Coloring matrix") + else + print("Coloring matrix") - matrixDataFrame <- data.matrix(dataFrameMeasurements) + matrixDataFrame <- data.numericmatrix(dataFrameMeasurements) matrixDataFrame[, dataColumnNames ][matrixDataFrame[, dataColumnNames ] < 1] <- 1 matrixDataFrame[, dataMeanColumnNames][matrixDataFrame[, dataMeanColumnNames] < 1] <- 1 - #matrixDataFrame[matrixDataFrame[, dataMeanColumnNames] < 1] <- 1 - + matrixDataFrame[, dataColumnNames] <- log10(matrixDataFrame[, dataColumnNames]) matrixDataFrame[, dataMeanColumnNames] <- log10(matrixDataFrame[, dataMeanColumnNames]) matrixDataFrame[is.infinite(matrixDataFrame)] <- 0 - #matrixDataFrame[matrixDataFrame < 0] <- 0 - + ## min / max logAbsMin <- min(0, min(matrixDataFrame[, dataMeanColumnNames])) - #logAbsMax <- max(matrixDataFrame[, dataMeanColumnNames]) logAbsMax <- max(matrixDataFrame[, c(dataColumnNames, dataMeanColumnNames)]) - #logAbsMax <- max(matrixDataFrame[, dataColumnsNameFunctionFromGroupNames(groups = groups)]) logFoldChangeMinMax <- c(min(matrixDataFrame[, lfcColumnNames]), max(matrixDataFrame[, lfcColumnNames])) logFoldChangeMax <- max(abs(logFoldChangeMinMax)) if(logFoldChangeMax < 1) @@ -825,7 +898,6 @@ processMS1data <- function( ## maps colorMapAbsoluteData <- makecmap( x = c(logAbsMin, logAbsMax), n = 100, - #colFn = colorRampPalette(c('white', 'black')) colFn = colorRampPalette(rainbow(18)[10:1]) ) colorMapLogFoldChange <- makecmap( @@ -833,10 +905,20 @@ processMS1data <- function( colFn = colorRampPalette(c('blue', 'white', 'red')) ) - columnGroupLabels <- sapply(X = groups, FUN = function(x){ rep(x = x, times = length(dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = sampleNamesToExclude))) }) + columnGroupLabels <- sapply(X = grouXXXps, + FUN = function(x){ + rep(x = x, + times = length(dataColumnsNameFunctionFromGroupName(group = x, + sampleNamesToExclude = sampleNamesToExclude))) + }) ## translate and box colors - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "Coloring box") else print("Coloring box") + if(!is.na(progress)) + if(progress) + incProgress(amount = 0, detail = "Coloring box") + else + print("Coloring box") + colorDataFrame <- dataFrameMeasurements colorDataFrame[, dataColumnNames ] <- cmap(x = matrixDataFrame[, dataColumnNames ], map = colorMapAbsoluteData) colorDataFrame[, dataMeanColumnNames] <- cmap(x = matrixDataFrame[, dataMeanColumnNames], map = colorMapAbsoluteData) @@ -845,10 +927,6 @@ processMS1data <- function( returnObj <- list( ## name functions - #dataColumnsNameFunctionFromGroupIndex=dataColumnsNameFunctionFromGroupIndex, - #dataColumnsNameFunctionFromGroupName=dataColumnsNameFunctionFromGroupName, - #dataColumnsNameFunctionFromGroupNames=dataColumnsNameFunctionFromGroupNames, - #groupNameFunctionFromDataColumnName=groupNameFunctionFromDataColumnName, dataMeanColumnNameFunctionFromIndex=dataMeanColumnNameFunctionFromIndex, dataMeanColumnNameFunctionFromName=dataMeanColumnNameFunctionFromName, lfcColumnNameFunctionFromIndex=lfcColumnNameFunctionFromIndex, @@ -857,11 +935,8 @@ processMS1data <- function( groupIdxFromGroupName=groupIdxFromGroupName, ## data and names dataFrameMeasurements=dataFrameMeasurements, - #dataMeanColumnNames=dataMeanColumnNames, - #lfcColumnNames=lfcColumnNames, ## colors colorMatrixDataFrame=colorMatrixDataFrame, - #matrixDataFrame=matrixDataFrame, colorMapAbsoluteData=colorMapAbsoluteData, colorMapLogFoldChange=colorMapLogFoldChange, columnGroupLabels=columnGroupLabels, @@ -872,7 +947,8 @@ processMS1data <- function( ) } -serializeSampleSelectionAndOrder <- function(groupSampleDataFrame){ +serializeSampleSelectionAndOrder <- function(groupSampleDataFrame) +{ ## wrap columns columnsSerialized <- sapply(X = seq_len(ncol(groupSampleDataFrame)), FUN = function(colIdx){ cellContent <- paste(groupSampleDataFrame[, colIdx], collapse = "; ") @@ -885,6 +961,7 @@ serializeSampleSelectionAndOrder <- function(groupSampleDataFrame){ return(groupSampleDataFrameFieldValue) } + deserializeSampleSelectionAndOrder <- function(groupSampleDataFrameFieldValue){ ## unbox groupSampleDataFrameName <- "SampleSelectionAndOrder" @@ -931,6 +1008,7 @@ serializeParameterSetFile <- function(importParameterSet, toolName, toolVersion) importParametersFileValue <- paste(comment, importParametersValue, sep = "\n") return(importParametersFileValue) } + deserializeParameterSetFile <- function(importParametersFileContent){ ## remove comments importParametersValuePairs <- importParametersFileContent[-grep(pattern = "#.*", x = importParametersFileContent)] @@ -938,6 +1016,7 @@ deserializeParameterSetFile <- function(importParametersFileContent){ importParameterSet <- deserializeParameterSetKeyValuePairs(importParametersValuePairs) return(importParameterSet) } + serializeParameterSet <- function(importParameterSet){ ## wrap importParametersValue <- paste(names(importParameterSet), importParameterSet, sep = "=", collapse = "; ") @@ -946,6 +1025,7 @@ serializeParameterSet <- function(importParameterSet){ importParametersFieldValue <- paste(importParametersName, "={", importParametersValue, "}", sep = "") return(importParametersFieldValue) } + deserializeParameterSet <- function(importParametersFieldValue){ ## unbox importParametersName <- "ImportParameters" @@ -960,6 +1040,7 @@ deserializeParameterSet <- function(importParametersFieldValue){ importParameterSet <- deserializeParameterSetKeyValuePairs(importParametersValuePairs) return(importParameterSet) } + deserializeParameterSetKeyValuePairs <- function(importParametersValuePairs){ ## unwrap importParametersValuePairsList <- strsplit(x = importParametersValuePairs, split = "=") @@ -982,6 +1063,20 @@ deserializeParameterSetKeyValuePairs <- function(importParametersValuePairs){ importParameterSet <- castListEntries(importParameterSet) return(importParameterSet) } + +#' Cast logical's and numeric's in a list or data.frame +#' +#' Tries to cast a list entry (or column in a data.frame) to logical's, +#' if that does not create any missing values, it is assumed +#' to be a logical will be replaced by `as.logical()` conversion. +#' Similarly for numeric entries (or columns). Everything else remains strings +#' +#' @param list +#' +#' @return list of the same lenght with logical's and numeric's casted +#' @export +#' +#' @examples castListEntries <- function(list){ ## cast logical's and numeric's suppressWarnings( @@ -1273,9 +1368,9 @@ getMS2spectrumInfoForCluster <- function(dataList, clusterDataList, treeLabel){ featuresIntersection <- clusterDataList$innerNodeFeaturesIntersection[[clusterIndex]] featuresUnion <- clusterDataList$innerNodeFeaturesUnion[[clusterIndex]] #fragmentsX <- dataList$fragmentMasses[featuresIntersection] - #fragmentsY <- apply(X = data.matrix(dataList$featureMatrix[clusterMembersPrecursors, featuresIntersection]), MARGIN = 2, FUN = mean) + #fragmentsY <- apply(X = data.numericmatrix(dataList$featureMatrix[clusterMembersPrecursors, featuresIntersection]), MARGIN = 2, FUN = mean) fragmentsX <- dataList$fragmentMasses[featuresUnion] - fragmentsY <- apply(X = data.matrix(dataList$featureMatrix[clusterMembersPrecursors, featuresUnion]), MARGIN = 2, FUN = mean) + fragmentsY <- apply(X = data.numericmatrix(dataList$featureMatrix[clusterMembersPrecursors, featuresUnion]), MARGIN = 2, FUN = mean) selectedPositive <- clusterDataList$innerNodeFeaturesCountsMatrix[clusterIndex, featuresUnion] coverageSelected <- selectedPositive / numberOfClusterMembers @@ -1366,7 +1461,7 @@ getTableFromPrecursorSet <- function(dataList, precursorSet){ numberOfPrecursors <- length(precursorSet) ## measurements - columnNames <- unlist(lapply(X = dataList$groups, FUN = dataList$dataMeanColumnNameFunctionFromName)) + columnNames <- unlist(lapply(X = dataList$grouXXXps, FUN = dataList$dataMeanColumnNameFunctionFromName)) dataFrameMeasurements <- data.frame(dataList$dataFrameMeasurements[precursorSet, columnNames, drop=FALSE]) colnames(dataFrameMeasurements) <- columnNames rownames(dataFrameMeasurements) <- dataList$precursorLabels[precursorSet] diff --git a/R/FragmentMatrixFunctions.R b/R/FragmentMatrixFunctions.R index 4e817f3..938d44e 100644 --- a/R/FragmentMatrixFunctions.R +++ b/R/FragmentMatrixFunctions.R @@ -1,45 +1,42 @@ - -#library("xcms") -library("Matrix") -library("stringr") - #################################################################################### ## aligned spectra -parsePeakAbundanceMatrix <- function(filePeakMatrix, doPrecursorDeisotoping, mzDeviationInPPM_precursorDeisotoping, mzDeviationAbsolute_precursorDeisotoping, maximumRtDifference, progress){ +parsePeakAbundanceMatrix <- function(filePeakMatrix, + doPrecursorDeisotoping, + mzDeviationInPPM_precursorDeisotoping, + mzDeviationAbsolute_precursorDeisotoping, + maximumRtDifference, + progress=FALSE) +{ ## read file if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = paste("Parsing MS1 file content...", sep = "")) else print(paste("Parsing MS1 file content...", sep = "")) - ################ - #dataFrameAll <- read.table(filePeakMatrix, header=FALSE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") - #dataFrameAll <- read.table("/home/achimmir/Downloads/Height_1_202010311146_ave_removed.txt", header=FALSE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") - #dataFrameAll1 <- read.table(filePeakMatrix, header=TRUE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") - #dataFrameAll1 <- read.table("/home/achimmir/Downloads/Height_1_202010311146_ave_removed.txt", header=TRUE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") - ############### + dataFrameAll <- read.table(filePeakMatrix, header=FALSE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") dataFrameAll1 <- read.table(filePeakMatrix, header=TRUE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") - ######## + oldFormat <- max(which(dataFrameAll[1:5, 1] == "")) == 3 header_rowNumber <- ifelse(test = oldFormat, yes = 4, no = 5) dataFrameHeader <- dataFrameAll[1:header_rowNumber, ] - #################### + `%notin%` <- Negate(`%in%`) - TR<- c("Reference RT","Reference m/z","Comment","Manually modified for quantification","Total score","RT similarity","Average","Stdev") + TR<- c("Reference RT","Reference m/z","Comment", + "Manually modified for quantification", + "Total score","RT similarity","Average","Stdev") IN2<-which(unname(dataFrameHeader[4,]) %notin% TR) dataFrameHeader1 <-dataFrameHeader[IN2] - ############## + dataFrame <- dataFrameAll[(header_rowNumber + 1):nrow(dataFrameAll), ] dataFrame1 <- dataFrame[IN2] - ######### + colnames(dataFrame) <- dataFrameHeader[header_rowNumber, ] colnames(dataFrame1) <- dataFrameHeader1[header_rowNumber, ] - ########### + numberOfPrecursors <- nrow(dataFrame1) numberOfPrecursorsPrior <- numberOfPrecursors - ########### - columnIndexEndOfAnnotation <- max(match(x = "Class", table = dataFrameHeader1[1, ]), na.rm = TRUE) - print("entering the line ...line 26") - #print(columnIndexEndOfAnnotation) - #print(ncol(dataFrame)) - ######################### + + columnIndexEndOfAnnotation <- max(match(x = "Class", + table = dataFrameHeader1[1, ]), + na.rm = TRUE) + if(ncol(dataFrame1) > columnIndexEndOfAnnotation){ dataColumnStartEndIndeces <- c(columnIndexEndOfAnnotation + 1, ncol(dataFrame1)) numberOfDataColumns <- dataColumnStartEndIndeces[[2]] - dataColumnStartEndIndeces[[1]] + 1 @@ -62,49 +59,34 @@ parsePeakAbundanceMatrix <- function(filePeakMatrix, doPrecursorDeisotoping, mzD batchID <- NULL } - ######################## - commaNumbers <- sum(grepl(x = dataFrame1$"Average Mz", pattern = "^(\\d+,\\d+$)|(^\\d+$)")) decimalSeparatorIsComma <- commaNumbers == nrow(dataFrame1) if(decimalSeparatorIsComma){ - #print("entering the line..206") if(!is.null(dataFrame1$"Average Rt(min)")) dataFrame1$"Average Rt(min)" <- gsub(x = gsub(x = dataFrame1$"Average Rt(min)", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Average.Rt.min.")) dataFrame$"Average.Rt.min." <- gsub(x = gsub(x = dataFrame$"Average.Rt.min.", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") if(!is.null(dataFrame1$"Average Mz")) dataFrame1$"Average Mz" <- gsub(x = gsub(x = dataFrame1$"Average Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Average.Mz")) dataFrame$"Average.Mz" <- gsub(x = gsub(x = dataFrame$"Average.Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") if(!is.null(dataFrame1$"Fill %")) dataFrame1$"Fill %" <- gsub(x = gsub(x = dataFrame1$"Fill %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Fill..")) dataFrame$"Fill.." <- gsub(x = gsub(x = dataFrame$"Fill..", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") if(!is.null(dataFrame1$"Dot product")) dataFrame1$"Dot product" <- gsub(x = gsub(x = dataFrame1$"Dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Dot.product")) dataFrame$"Dot.product" <- gsub(x = gsub(x = dataFrame$"Dot.product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") if(!is.null(dataFrame1$"Reverse dot product")) dataFrame1$"Reverse dot product" <- gsub(x = gsub(x = dataFrame1$"Reverse dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Reverse.dot.product")) dataFrame$"Reverse.dot.product" <- gsub(x = gsub(x = dataFrame$"Reverse.dot.product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") if(!is.null(dataFrame1$"Fragment presence %")) dataFrame1$"Fragment presence %" <- gsub(x = gsub(x = dataFrame1$"Fragment presence %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Fragment.presence.%")) dataFrame$"Fragment.presence.%" <- gsub(x = gsub(x = dataFrame$"Fragment.presence.%", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") ## replace -1 by 0 - if(numberOfDataColumns > 0){ - + if(numberOfDataColumns > 0) { for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){ dataFrame1[ , colIdx] <- gsub(x = gsub(x = dataFrame1[ , colIdx], pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") } } } + ################### ## column formats if(!is.null(dataFrame1$"Average Rt(min)")) dataFrame1$"Average Rt(min)" <- as.numeric(dataFrame1$"Average Rt(min)") - #if(!is.null(dataFrame$"Average.Rt.min.")) dataFrame$"Average.Rt.min." <- as.numeric(dataFrame$"Average.Rt.min.") if(!is.null(dataFrame1$"Average Mz")) dataFrame1$"Average Mz" <- as.numeric(dataFrame1$"Average Mz") - #if(!is.null(dataFrame$"Average.Mz")) dataFrame$"Average.Mz" <- as.numeric(dataFrame$"Average.Mz") if(!is.null(dataFrame1$"Fill %")) dataFrame1$"Fill %" <- as.numeric(dataFrame1$"Fill %") - #if(!is.null(dataFrame$"Fill..")) dataFrame$"Fill.." <- as.numeric(dataFrame$"Fill..") if(!is.null(dataFrame1$"MS/MS included")) dataFrame1$"MS/MS included" <- as.logical(dataFrame1$"MS/MS included") - #if(!is.null(dataFrame$"MS.MS.included")) dataFrame$"MS.MS.included" <- as.logical(dataFrame$"MS.MS.included") if(!is.null(dataFrame1$"Dot product")) dataFrame1$"Dot product" <- as.numeric(dataFrame1$"Dot product") - #if(!is.null(dataFrame$"Dot.product")) dataFrame$"Dot.product" <- as.numeric(dataFrame$"Dot.product") if(!is.null(dataFrame1$"Reverse dot product")) dataFrame1$"Reverse dot product" <- as.numeric(dataFrame1$"Reverse dot product") - #if(!is.null(dataFrame$"Reverse.dot.product")) dataFrame$"Reverse.dot.product" <- as.numeric(dataFrame$"Reverse.dot.product") if(!is.null(dataFrame1$"Fragment presence %")) dataFrame1$"Fragment presence %" <- as.numeric(dataFrame1$"Fragment presence %") - #if(!is.null(dataFrame$"Fragment.presence.%")) dataFrame$"Fragment.presence.%" <- as.numeric(dataFrame$"Fragment.presence.%") + ##################### ## sorted by m/z (needed for deisotoping) if(!is.null(dataFrame1$"Average Mz")) @@ -118,12 +100,13 @@ parsePeakAbundanceMatrix <- function(filePeakMatrix, doPrecursorDeisotoping, mzD dataFrame1[(dataFrame1[,colIdx] == -1),colIdx] <- 0 } } + ## deisotoping numberOfRemovedIsotopePeaks <- 0 if(doPrecursorDeisotoping & !is.null(dataFrame1$"Average Mz")){ if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Precursor deisotoping...", sep = "")) else print(paste("Precursor deisotoping...", sep = "")) - distance13Cminus12C <- 1.0033548378 + ## mark isotope precursors precursorsToRemove <- vector(mode = "logical", length = numberOfPrecursors) @@ -131,6 +114,7 @@ parsePeakAbundanceMatrix <- function(filePeakMatrix, doPrecursorDeisotoping, mzD intensities <- dataFrame1[ , dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]] medians <- apply(X = as.matrix(intensities), MARGIN = 1, FUN = median) } + for(precursorIdx in seq_len(numberOfPrecursors)){ if((precursorIdx %% (as.integer(numberOfPrecursors/10))) == 0) if(!is.na(progress)) if(progress) incProgress(amount = 0.0, detail = paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) else print(paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) @@ -140,10 +124,12 @@ parsePeakAbundanceMatrix <- function(filePeakMatrix, doPrecursorDeisotoping, mzD ## RT difference <= maximumRtDifference validPrecursorsInRt <- abs(dataFrame1$"Average Rt(min)"[[precursorIdx]] - dataFrame1$"Average Rt(min)"[-precursorIdx]) <= maximumRtDifference + ## MZ difference around 1.0033548378 (first isotope) or 1.0033548378 * 2 (second isotope) validPrecursorsInMz1 <- abs((dataFrame1$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 1) - dataFrame1$"Average Mz"[-precursorIdx]) <= mzError validPrecursorsInMz2 <- abs((dataFrame1$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 2) - dataFrame1$"Average Mz"[-precursorIdx]) <= mzError validPrecursorsInMz <- validPrecursorsInMz1 | validPrecursorsInMz2 + ## intensity gets smaller in the isotope spectrum if(numberOfDataColumns > 0){ validPrecursorsInIntensity <- (medians[-precursorIdx] - medians[[precursorIdx]]) > 0 @@ -165,356 +151,19 @@ parsePeakAbundanceMatrix <- function(filePeakMatrix, doPrecursorDeisotoping, mzD if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Boxing...", sep = "")) else print(paste("Boxing...", sep = "")) returnObj <- list() returnObj$dataFrame1 <- dataFrame1 - ## meta - returnObj$oldFormat <- oldFormat - returnObj$numberOfPrecursors <- numberOfPrecursors - returnObj$dataColumnStartEndIndeces <- dataColumnStartEndIndeces - returnObj$numberOfDataColumns <- numberOfDataColumns - ## group anno - returnObj$sampleClass <- sampleClass - returnObj$sampleType <- sampleType - returnObj$sampleInjectionOrder <- sampleInjectionOrder - returnObj$batchID <- batchID - ## misc - returnObj$numberOfPrecursorsPrior <- numberOfPrecursorsPrior - returnObj$numberOfRemovedIsotopePeaks <- numberOfRemovedIsotopePeaks - - return (returnObj) - #print(returnObj) -} -########################################### -# parsePeakAbundanceMatrix <- function(filePeakMatrix, doPrecursorDeisotoping, mzDeviationInPPM_precursorDeisotoping, mzDeviationAbsolute_precursorDeisotoping, maximumRtDifference, progress){ -# ## read file -# if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = paste("Parsing MS1 file content...", sep = "")) else print(paste("Parsing MS1 file content...", sep = "")) -# #dataFrameAll <- read.table(filePeakMatrix, header=FALSE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") -# dataFrameAll <- read.table("/home/achimmir/Downloads/Height_1_202010311146_ave_removed.txt", header=FALSE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") -# #dataFrameAll1 <- read.table(filePeakMatrix, header=TRUE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") -# ######## -# dataFrameAll1 <- read.table("/home/achimmir/Downloads/Height_1_202010311146_ave_removed.txt", header=TRUE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") -# ######## -# oldFormat <- max(which(dataFrameAll[1:5, 1] == "")) == 3 -# header_rowNumber <- ifelse(test = oldFormat, yes = 4, no = 5) -# dataFrameHeader <- dataFrameAll[1:header_rowNumber, ] -# #################### -# `%notin%` <- Negate(`%in%`) -# TR<- c("Reference RT","Reference m/z","Comment","Manually modified for quantification","Total score","RT similarity","Average","Stdev") -# IN2<-which(unname(dataFrameHeader[4,]) %notin% TR) -# dataFrameHeader1 <-dataFrameHeader[IN2] -# ############## -# dataFrame <- dataFrameAll[(header_rowNumber + 1):nrow(dataFrameAll), ] -# dataFrame1 <- dataFrame[IN2] -# ######### -# colnames(dataFrame) <- dataFrameHeader[header_rowNumber, ] -# colnames(dataFrame1) <- dataFrameHeader1[header_rowNumber, ] -# ########### -# numberOfPrecursors <- nrow(dataFrame1) -# numberOfPrecursorsPrior <- numberOfPrecursors -# ########### -# columnIndexEndOfAnnotation <- max(match(x = "Class", table = dataFrameHeader[1, ]), na.rm = TRUE) -# print("entering the line ...line 26") -# #print(columnIndexEndOfAnnotation) -# #print(ncol(dataFrame)) -# -# if(ncol(dataFrame) > columnIndexEndOfAnnotation){ -# dataColumnStartEndIndeces <- c(columnIndexEndOfAnnotation + 1, ncol(dataFrame)) -# numberOfDataColumns <- dataColumnStartEndIndeces[[2]] - dataColumnStartEndIndeces[[1]] + 1 -# dataColumnNames <- colnames(dataFrame)[dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]] -# -# sampleClass <- dataFrameHeader[1, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] -# sampleType <- dataFrameHeader[2, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] -# sampleInjectionOrder <- dataFrameHeader[3, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] -# batchID <- NULL -# if(!oldFormat) -# batchID <- dataFrameHeader[4, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] -# } else { -# dataColumnStartEndIndeces <- NULL -# numberOfDataColumns <- 0 -# dataColumnNames <- NULL -# -# sampleClass <- NULL -# sampleType <- NULL -# sampleInjectionOrder <- NULL -# batchID <- NULL -# } -# -# ######################## -# -# commaNumbers <- sum(grepl(x = dataFrame1$"Average Mz", pattern = "^(\\d+,\\d+$)|(^\\d+$)")) -# decimalSeparatorIsComma <- commaNumbers == nrow(dataFrame1) -# if(decimalSeparatorIsComma){ -# #print("entering the line..206") -# if(!is.null(dataFrame1$"Average Rt(min)")) dataFrame1$"Average Rt(min)" <- gsub(x = gsub(x = dataFrame1$"Average Rt(min)", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# #if(!is.null(dataFrame$"Average.Rt.min.")) dataFrame$"Average.Rt.min." <- gsub(x = gsub(x = dataFrame$"Average.Rt.min.", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# if(!is.null(dataFrame1$"Average Mz")) dataFrame1$"Average Mz" <- gsub(x = gsub(x = dataFrame1$"Average Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# #if(!is.null(dataFrame$"Average.Mz")) dataFrame$"Average.Mz" <- gsub(x = gsub(x = dataFrame$"Average.Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# if(!is.null(dataFrame1$"Fill %")) dataFrame1$"Fill %" <- gsub(x = gsub(x = dataFrame1$"Fill %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# #if(!is.null(dataFrame$"Fill..")) dataFrame$"Fill.." <- gsub(x = gsub(x = dataFrame$"Fill..", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# if(!is.null(dataFrame1$"Dot product")) dataFrame1$"Dot product" <- gsub(x = gsub(x = dataFrame1$"Dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# #if(!is.null(dataFrame$"Dot.product")) dataFrame$"Dot.product" <- gsub(x = gsub(x = dataFrame$"Dot.product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# if(!is.null(dataFrame1$"Reverse dot product")) dataFrame1$"Reverse dot product" <- gsub(x = gsub(x = dataFrame1$"Reverse dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# #if(!is.null(dataFrame$"Reverse.dot.product")) dataFrame$"Reverse.dot.product" <- gsub(x = gsub(x = dataFrame$"Reverse.dot.product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# if(!is.null(dataFrame1$"Fragment presence %")) dataFrame1$"Fragment presence %" <- gsub(x = gsub(x = dataFrame1$"Fragment presence %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# #if(!is.null(dataFrame$"Fragment.presence.%")) dataFrame$"Fragment.presence.%" <- gsub(x = gsub(x = dataFrame$"Fragment.presence.%", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# -# ## replace -1 by 0 -# if(numberOfDataColumns > 0){ -# -# for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){ -# dataFrame1[ , colIdx] <- gsub(x = gsub(x = dataFrame1[ , colIdx], pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") -# } -# } -# } -# ################### -# ## column formats -# if(!is.null(dataFrame1$"Average Rt(min)")) dataFrame1$"Average Rt(min)" <- as.numeric(dataFrame1$"Average Rt(min)") -# #if(!is.null(dataFrame$"Average.Rt.min.")) dataFrame$"Average.Rt.min." <- as.numeric(dataFrame$"Average.Rt.min.") -# if(!is.null(dataFrame1$"Average Mz")) dataFrame1$"Average Mz" <- as.numeric(dataFrame1$"Average Mz") -# #if(!is.null(dataFrame$"Average.Mz")) dataFrame$"Average.Mz" <- as.numeric(dataFrame$"Average.Mz") -# if(!is.null(dataFrame1$"Fill %")) dataFrame1$"Fill %" <- as.numeric(dataFrame1$"Fill %") -# #if(!is.null(dataFrame$"Fill..")) dataFrame$"Fill.." <- as.numeric(dataFrame$"Fill..") -# if(!is.null(dataFrame1$"MS/MS included")) dataFrame1$"MS/MS included" <- as.logical(dataFrame1$"MS/MS included") -# #if(!is.null(dataFrame$"MS.MS.included")) dataFrame$"MS.MS.included" <- as.logical(dataFrame$"MS.MS.included") -# if(!is.null(dataFrame1$"Dot product")) dataFrame1$"Dot product" <- as.numeric(dataFrame1$"Dot product") -# #if(!is.null(dataFrame$"Dot.product")) dataFrame$"Dot.product" <- as.numeric(dataFrame$"Dot.product") -# if(!is.null(dataFrame1$"Reverse dot product")) dataFrame1$"Reverse dot product" <- as.numeric(dataFrame1$"Reverse dot product") -# #if(!is.null(dataFrame$"Reverse.dot.product")) dataFrame$"Reverse.dot.product" <- as.numeric(dataFrame$"Reverse.dot.product") -# if(!is.null(dataFrame1$"Fragment presence %")) dataFrame1$"Fragment presence %" <- as.numeric(dataFrame1$"Fragment presence %") -# #if(!is.null(dataFrame$"Fragment.presence.%")) dataFrame$"Fragment.presence.%" <- as.numeric(dataFrame$"Fragment.presence.%") -# ##################### -# ## sorted by m/z (needed for deisotoping) -# if(!is.null(dataFrame1$"Average Mz")) -# dataFrame1 <- dataFrame1[order(dataFrame1$"Average Mz"), ] -# -# ## replace -1 by 0 -# if(numberOfDataColumns > 0){ -# for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){ -# dataFrame1[ , colIdx] <- as.numeric(dataFrame1[ , colIdx]) -# if(!is.na(sum(dataFrame1[,colIdx] == -1))) -# dataFrame1[(dataFrame1[,colIdx] == -1),colIdx] <- 0 -# } -# } -# ## deisotoping -# numberOfRemovedIsotopePeaks <- 0 -# if(doPrecursorDeisotoping & !is.null(dataFrame1$"Average Mz")){ -# if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Precursor deisotoping...", sep = "")) else print(paste("Precursor deisotoping...", sep = "")) -# -# distance13Cminus12C <- 1.0033548378 -# ## mark isotope precursors -# precursorsToRemove <- vector(mode = "logical", length = numberOfPrecursors) -# -# if(numberOfDataColumns > 0){ -# intensities <- dataFrame1[ , dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]] -# medians <- apply(X = as.matrix(intensities), MARGIN = 1, FUN = median) -# } -# for(precursorIdx in seq_len(numberOfPrecursors)){ -# if((precursorIdx %% (as.integer(numberOfPrecursors/10))) == 0) -# if(!is.na(progress)) if(progress) incProgress(amount = 0.0, detail = paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) else print(paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) -# -# mzError <- dataFrame1$"Average Mz"[[precursorIdx]] * mzDeviationInPPM_precursorDeisotoping / 1000000 -# mzError <- max(mzError, mzDeviationAbsolute_precursorDeisotoping) -# -# ## RT difference <= maximumRtDifference -# validPrecursorsInRt <- abs(dataFrame1$"Average Rt(min)"[[precursorIdx]] - dataFrame1$"Average Rt(min)"[-precursorIdx]) <= maximumRtDifference -# ## MZ difference around 1.0033548378 (first isotope) or 1.0033548378 * 2 (second isotope) -# validPrecursorsInMz1 <- abs((dataFrame1$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 1) - dataFrame1$"Average Mz"[-precursorIdx]) <= mzError -# validPrecursorsInMz2 <- abs((dataFrame1$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 2) - dataFrame1$"Average Mz"[-precursorIdx]) <= mzError -# validPrecursorsInMz <- validPrecursorsInMz1 | validPrecursorsInMz2 -# ## intensity gets smaller in the isotope spectrum -# if(numberOfDataColumns > 0){ -# validPrecursorsInIntensity <- (medians[-precursorIdx] - medians[[precursorIdx]]) > 0 -# } else { -# validPrecursorsInIntensity <- TRUE -# } -# -# if(any(validPrecursorsInRt & validPrecursorsInMz & validPrecursorsInIntensity)) -# precursorsToRemove[[precursorIdx]] <- TRUE -# } -# -# ## remove isotopes -# dataFrame1 <- dataFrame1[!precursorsToRemove, ] -# -# numberOfRemovedIsotopePeaks <- sum(precursorsToRemove) -# numberOfPrecursors <- nrow(dataFrame1) -# } -# -# if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Boxing...", sep = "")) else print(paste("Boxing...", sep = "")) -# returnObj <- list() -# returnObj$dataFrame1 <- dataFrame1 -# ## meta -# returnObj$oldFormat <- oldFormat -# returnObj$numberOfPrecursors <- numberOfPrecursors -# returnObj$dataColumnStartEndIndeces <- dataColumnStartEndIndeces -# returnObj$numberOfDataColumns <- numberOfDataColumns -# ## group anno -# returnObj$sampleClass <- sampleClass -# returnObj$sampleType <- sampleType -# returnObj$sampleInjectionOrder <- sampleInjectionOrder -# returnObj$batchID <- batchID -# ## misc -# returnObj$numberOfPrecursorsPrior <- numberOfPrecursorsPrior -# returnObj$numberOfRemovedIsotopePeaks <- numberOfRemovedIsotopePeaks -# -# return (returnObj) -# #print(returnObj) -# } -#print("entering the line ...318") -#################################################################################### -## - -################################################### -parsePeakAbundanceMatrix.OLD <- function(filePeakMatrix, doPrecursorDeisotoping, mzDeviationInPPM_precursorDeisotoping, mzDeviationAbsolute_precursorDeisotoping, maximumRtDifference, progress){ - ## read file - if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = paste("Parsing MS1 file content...", sep = "")) else print(paste("Parsing MS1 file content...", sep = "")) - - dataFrameAll <- read.table(filePeakMatrix, header=FALSE, sep = "\t", as.is=TRUE, quote = "\"", check.names = FALSE, comment.char = "") - - oldFormat <- max(which(dataFrameAll[1:5, 1] == "")) == 3 - header_rowNumber <- ifelse(test = oldFormat, yes = 4, no = 5) - - dataFrameHeader <- dataFrameAll[1:header_rowNumber, ] - dataFrame <- dataFrameAll[(header_rowNumber + 1):nrow(dataFrameAll), ] - colnames(dataFrame) <- dataFrameHeader[header_rowNumber, ] - - numberOfPrecursors <- nrow(dataFrame) - numberOfPrecursorsPrior <- numberOfPrecursors - - ## columns - columnIndexEndOfAnnotation <- max(match(x = "Class", table = dataFrameHeader[1, ]), na.rm = TRUE) - - if(ncol(dataFrame) > columnIndexEndOfAnnotation){ - dataColumnStartEndIndeces <- c(columnIndexEndOfAnnotation + 1, ncol(dataFrame)) - numberOfDataColumns <- dataColumnStartEndIndeces[[2]] - dataColumnStartEndIndeces[[1]] + 1 - dataColumnNames <- colnames(dataFrame)[dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]] - - sampleClass <- dataFrameHeader[1, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] - sampleType <- dataFrameHeader[2, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] - sampleInjectionOrder <- dataFrameHeader[3, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] - batchID <- NULL - if(!oldFormat) - batchID <- dataFrameHeader[4, (columnIndexEndOfAnnotation + 1):ncol(dataFrameHeader)] - } else { - dataColumnStartEndIndeces <- NULL - numberOfDataColumns <- 0 - dataColumnNames <- NULL - - sampleClass <- NULL - sampleType <- NULL - sampleInjectionOrder <- NULL - batchID <- NULL - } - - commaNumbers <- sum(grepl(x = dataFrame$"Average Mz", pattern = "^(\\d+,\\d+$)|(^\\d+$)")) - decimalSeparatorIsComma <- commaNumbers == nrow(dataFrame) - if(decimalSeparatorIsComma){ - if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- gsub(x = gsub(x = dataFrame$"Average Rt(min)", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Average.Rt.min.")) dataFrame$"Average.Rt.min." <- gsub(x = gsub(x = dataFrame$"Average.Rt.min.", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- gsub(x = gsub(x = dataFrame$"Average Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Average.Mz")) dataFrame$"Average.Mz" <- gsub(x = gsub(x = dataFrame$"Average.Mz", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- gsub(x = gsub(x = dataFrame$"Fill %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Fill..")) dataFrame$"Fill.." <- gsub(x = gsub(x = dataFrame$"Fill..", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- gsub(x = gsub(x = dataFrame$"Dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Dot.product")) dataFrame$"Dot.product" <- gsub(x = gsub(x = dataFrame$"Dot.product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- gsub(x = gsub(x = dataFrame$"Reverse dot product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Reverse.dot.product")) dataFrame$"Reverse.dot.product" <- gsub(x = gsub(x = dataFrame$"Reverse.dot.product", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- gsub(x = gsub(x = dataFrame$"Fragment presence %", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - #if(!is.null(dataFrame$"Fragment.presence.%")) dataFrame$"Fragment.presence.%" <- gsub(x = gsub(x = dataFrame$"Fragment.presence.%", pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - - ## replace -1 by 0 - if(numberOfDataColumns > 0){ - for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){ - dataFrame[ , colIdx] <- gsub(x = gsub(x = dataFrame[ , colIdx], pattern = "\\.", replacement = ""), pattern = ",", replacement = ".") - } - } - } - - ######################################## - ## column formats - if(!is.null(dataFrame$"Average Rt(min)")) dataFrame$"Average Rt(min)" <- as.numeric(dataFrame$"Average Rt(min)") - #if(!is.null(dataFrame$"Average.Rt.min.")) dataFrame$"Average.Rt.min." <- as.numeric(dataFrame$"Average.Rt.min.") - if(!is.null(dataFrame$"Average Mz")) dataFrame$"Average Mz" <- as.numeric(dataFrame$"Average Mz") - #if(!is.null(dataFrame$"Average.Mz")) dataFrame$"Average.Mz" <- as.numeric(dataFrame$"Average.Mz") - if(!is.null(dataFrame$"Fill %")) dataFrame$"Fill %" <- as.numeric(dataFrame$"Fill %") - #if(!is.null(dataFrame$"Fill..")) dataFrame$"Fill.." <- as.numeric(dataFrame$"Fill..") - if(!is.null(dataFrame$"MS/MS included")) dataFrame$"MS/MS included" <- as.logical(dataFrame$"MS/MS included") - #if(!is.null(dataFrame$"MS.MS.included")) dataFrame$"MS.MS.included" <- as.logical(dataFrame$"MS.MS.included") - if(!is.null(dataFrame$"Dot product")) dataFrame$"Dot product" <- as.numeric(dataFrame$"Dot product") - #if(!is.null(dataFrame$"Dot.product")) dataFrame$"Dot.product" <- as.numeric(dataFrame$"Dot.product") - if(!is.null(dataFrame$"Reverse dot product")) dataFrame$"Reverse dot product" <- as.numeric(dataFrame$"Reverse dot product") - #if(!is.null(dataFrame$"Reverse.dot.product")) dataFrame$"Reverse.dot.product" <- as.numeric(dataFrame$"Reverse.dot.product") - if(!is.null(dataFrame$"Fragment presence %")) dataFrame$"Fragment presence %" <- as.numeric(dataFrame$"Fragment presence %") - #if(!is.null(dataFrame$"Fragment.presence.%")) dataFrame$"Fragment.presence.%" <- as.numeric(dataFrame$"Fragment.presence.%") - - ## sorted by m/z (needed for deisotoping) - if(!is.null(dataFrame$"Average Mz")) - dataFrame <- dataFrame[order(dataFrame$"Average Mz"), ] - - ## replace -1 by 0 - if(numberOfDataColumns > 0){ - for(colIdx in dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]){ - dataFrame[ , colIdx] <- as.numeric(dataFrame[ , colIdx]) - if(!is.na(sum(dataFrame[,colIdx] == -1))) - dataFrame[(dataFrame[,colIdx] == -1),colIdx] <- 0 - } - } - - ######################################## - ## deisotoping - numberOfRemovedIsotopePeaks <- 0 - if(doPrecursorDeisotoping & !is.null(dataFrame$"Average Mz")){ - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Precursor deisotoping...", sep = "")) else print(paste("Precursor deisotoping...", sep = "")) - - distance13Cminus12C <- 1.0033548378 - ## mark isotope precursors - precursorsToRemove <- vector(mode = "logical", length = numberOfPrecursors) - - if(numberOfDataColumns > 0){ - intensities <- dataFrame[ , dataColumnStartEndIndeces[[1]]:dataColumnStartEndIndeces[[2]]] - medians <- apply(X = as.matrix(intensities), MARGIN = 1, FUN = median) - } - for(precursorIdx in seq_len(numberOfPrecursors)){ - if((precursorIdx %% (as.integer(numberOfPrecursors/10))) == 0) - if(!is.na(progress)) if(progress) incProgress(amount = 0.0, detail = paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) else print(paste("Precursor deisotoping ", precursorIdx, " / ", numberOfPrecursors, sep = "")) - - mzError <- dataFrame$"Average Mz"[[precursorIdx]] * mzDeviationInPPM_precursorDeisotoping / 1000000 - mzError <- max(mzError, mzDeviationAbsolute_precursorDeisotoping) - - ## RT difference <= maximumRtDifference - validPrecursorsInRt <- abs(dataFrame$"Average Rt(min)"[[precursorIdx]] - dataFrame$"Average Rt(min)"[-precursorIdx]) <= maximumRtDifference - ## MZ difference around 1.0033548378 (first isotope) or 1.0033548378 * 2 (second isotope) - validPrecursorsInMz1 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 1) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError - validPrecursorsInMz2 <- abs((dataFrame$"Average Mz"[[precursorIdx]] - distance13Cminus12C * 2) - dataFrame$"Average Mz"[-precursorIdx]) <= mzError - validPrecursorsInMz <- validPrecursorsInMz1 | validPrecursorsInMz2 - ## intensity gets smaller in the isotope spectrum - if(numberOfDataColumns > 0){ - validPrecursorsInIntensity <- (medians[-precursorIdx] - medians[[precursorIdx]]) > 0 - } else { - validPrecursorsInIntensity <- TRUE - } - - if(any(validPrecursorsInRt & validPrecursorsInMz & validPrecursorsInIntensity)) - precursorsToRemove[[precursorIdx]] <- TRUE - } - - ## remove isotopes - dataFrame <- dataFrame[!precursorsToRemove, ] - - numberOfRemovedIsotopePeaks <- sum(precursorsToRemove) - numberOfPrecursors <- nrow(dataFrame) - } - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Boxing...", sep = "")) else print(paste("Boxing...", sep = "")) - returnObj <- list() - returnObj$dataFrame <- dataFrame ## meta returnObj$oldFormat <- oldFormat returnObj$numberOfPrecursors <- numberOfPrecursors returnObj$dataColumnStartEndIndeces <- dataColumnStartEndIndeces returnObj$numberOfDataColumns <- numberOfDataColumns + ## group anno returnObj$sampleClass <- sampleClass returnObj$sampleType <- sampleType returnObj$sampleInjectionOrder <- sampleInjectionOrder returnObj$batchID <- batchID + ## misc returnObj$numberOfPrecursorsPrior <- numberOfPrecursorsPrior returnObj$numberOfRemovedIsotopePeaks <- numberOfRemovedIsotopePeaks @@ -522,17 +171,34 @@ parsePeakAbundanceMatrix.OLD <- function(filePeakMatrix, doPrecursorDeisotoping, return (returnObj) } + #################################################################################### ## parse MS/MS spectra -parseMSP <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks, neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments, progress = FALSE){ +parseMSP <- function(fileSpectra, + minimumIntensityOfMaximalMS2peak, + minimumProportionOfMS2peaks, + neutralLossesPrecursorToFragments, + neutralLossesFragmentsToFragments, + progress = FALSE){ fileLines <- readLines(con = fileSpectra) - returnObj <- parseMSP_chunk(fileLines, minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks, neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments, progress) + returnObj <- parseMSP_chunk(fileLines, + minimumIntensityOfMaximalMS2peak, + minimumProportionOfMS2peaks, + neutralLossesPrecursorToFragments, + neutralLossesFragmentsToFragments, + progress=FALSE) returnObj$fileSpectra <- fileSpectra return(returnObj) } -parseMSP_big <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks, neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments, progress = FALSE){ + +parseMSP_big <- function(fileSpectra, + minimumIntensityOfMaximalMS2peak, + minimumProportionOfMS2peaks, + neutralLossesPrecursorToFragments, + neutralLossesFragmentsToFragments, + progress = FALSE){ if(progress) incProgress(amount = 0, detail = "MS/MS file: Read file") else print("MS/MS file: Read file") fileLines <- readLines(con = fileSpectra) @@ -565,6 +231,7 @@ parseMSP_big <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimumP if(ncol(entryIntervals) > 0) entryIntervals <- entryIntervals - min(entryIntervals) + 1 } + numberOfChunks <- length(fileLineChunks) fileLineCountOfChunks <- unlist(lapply(X = fileLineChunks, FUN = length)) fileLineOffsetOfChunks <- c(0, sapply(X = seq_along(fileLineCountOfChunks)[-1], FUN = function(x){sum(fileLineCountOfChunks[1:(x-1)])})) @@ -590,8 +257,10 @@ parseMSP_big <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimumP returnObj2 <- parseMSP_chunk( fileLines = fileLines, - minimumIntensityOfMaximalMS2peak = minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks = minimumProportionOfMS2peaks, - neutralLossesPrecursorToFragments = neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments = neutralLossesFragmentsToFragments, + minimumIntensityOfMaximalMS2peak = minimumIntensityOfMaximalMS2peak, + minimumProportionOfMS2peaks = minimumProportionOfMS2peaks, + neutralLossesPrecursorToFragments = neutralLossesPrecursorToFragments, + neutralLossesFragmentsToFragments = neutralLossesFragmentsToFragments, offset = fileLineOffsetOfChunks[[chunkIdx]], progress = NA ) @@ -612,8 +281,16 @@ parseMSP_big <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimumP return(returnObj) } + ##################### -parseMSP_chunk <- function(fileLines, minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks, neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments, offset = 0, progress = FALSE){ +parseMSP_chunk <- function(fileLines, + minimumIntensityOfMaximalMS2peak, + minimumProportionOfMS2peaks, + neutralLossesPrecursorToFragments, + neutralLossesFragmentsToFragments, + offset = 0, + progress = FALSE) +{ ## LC-MS/MS entry: ## NAME: Unknown @@ -855,1618 +532,94 @@ parseMSP_chunk <- function(fileLines, minimumIntensityOfMaximalMS2peak, minimumP "[M+H]" = { mz <- mzTmp + 1.008 }, "[M+Na]+"= { mz <- mzTmp + 22.9898 }, "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) ) } if(any(isEMass2) & any(is.null(mz), is.na(mz), ifelse(test = is.null(mz), yes = FALSE, no = mz%%1==0))){ - #if(any(isEMass2) & any(is.null(mz), is.na(mz), ifelse(test = is.null(mz), yes = FALSE, no = mz%%1==0)) & any(isPrety2)){ mz <- parsedEMass2[[which(isEMass2)[[1]]]] - #mzTmp <- parsedEMass2[[which(isEMass2)[[1]]]] - #pit <- parsedPrety2[[which(isPrety2)[[1]]]] - #switch(pit, - # "[M-H]-" = { mz <- mzTmp - 1.008 }, - # "[M-H]" = { mz <- mzTmp - 1.008 }, - # "[M+H]+" = { mz <- mzTmp + 1.008 }, - # "[M+H]" = { mz <- mzTmp + 1.008 }, - # "[M+Na]+"= { mz <- mzTmp + 22.9898 }, - # "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - # #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) - #) } if(any(isPMass2) & any(is.null(mz), is.na(mz), ifelse(test = is.null(mz), yes = FALSE, no = mz%%1==0))){ if(!is.na(parsedPMass2[[which(isPMass2)[[1]]]])){ mz <- parsedPMass2[[which(isPMass2)[[1]]]] } else { - tmp <- as.numeric(strsplit(x = trimws(substring(text = fileLines_022[[which(isPMass2)[[1]]]], first = nchar("PEPMASS=") + 1)), split = "[\t ]")[[1]]) - mz <- tmp[[1]] - if(length(tmp) > 1) - ms1Int <- tmp[[2]] - } - } - if(any(isE2Mass2) & any(is.null(mz), is.na(mz), ifelse(test = is.null(mz), yes = FALSE, no = mz%%1==0))){ - mz <- parsedE2Mass2[[which(isE2Mass2)[[1]]]] - } - if(any(isMetN2)) - metName <- parsedMetN2 [[which(isMetN2)[[1]]]] - if(any(isAddN2) & adduct == "Unknown") - ## adduct - adduct <- parsedAddN2 [[which(isAddN2)[[1]]]] - if(any(isPrety2) & adduct == "Unknown") - adduct <- parsedPrety2[[which(isPrety2)[[1]]]] - if(any(isPretyp2) & adduct == "Unknown") - adduct <- parsedPretyp2[[which(isPretyp2)[[1]]]] - if(any(isScanN2)) - scanNumber <- parsedScanN2[[which(isScanN2)[[1]]]] - if(any(isMIon2)) - quantMass <- parsedMIon2 [[which(isMIon2)[[1]]]] - if(any(isNumP2)) - ## #peaks - peakNumber <- parsedNumP2 [[which(isNumP2)[[1]]]] - if(any(isPeak2)){ - ## MS2 peaks: "178.88669\t230" - ms2Peaks_mz <- parsedms2Peaks_mz2 [which(isPeak2)] - ms2Peaks_int <- parsedms2Peaks_int2[which(isPeak2)] - } - if(any(isCoCl2)) - ## compound class - compoundClass <- parsedCoCl2 [[which(isCoCl2)[[1]]]] - if(any(isInty2)) - ## instrument type - instrumentType <- parsedInty2 [[which(isInty2)[[1]]]] - if(any(isIntype2)) - ## instrument type - instrumentType <- parsedIntype2 [[which(isIntype2)[[1]]]] - if(any(isIntyp2)) - ## instrument type - instrumentType <- parsedIntyp2 [[which(isIntyp2)[[1]]]] - if(all(any(isInt2), instrumentType %in% c("Unknown", "NA"))) - ## instrument - instrumentType <- parsedInt2 [[which(isInt2)[[1]]]] - if(any(isInchi2)) - ## structure - inchi <- parsedInchi2 [[which(isInchi2)[[1]]]] - if(any(isInchiKey2)) - ## structure - inchiKey <- parsedInchiKey2 [[which(isInchiKey2)[[1]]]] - if(any(isSmiles2)) - ## structure - smiles <- parsedSmiles2 [[which(isSmiles2)[[1]]]] - ## end of parsing - - if(is.null(rt)) - rt <- 0 - - #if(is.null(mz)) - # ## in case of gc - # mz <- max(ms2Peaks_mz) - ms2Peaks_mz_original <- ms2Peaks_mz - ms2Peaks_int_original <- ms2Peaks_int - - numberOfMS2PeaksOriginal <<- numberOfMS2PeaksOriginal + length(ms2Peaks_mz) - if(length(ms2Peaks_mz) == 0) - numberOfSpectraDiscardedDueToNoPeaks <<- numberOfSpectraDiscardedDueToNoPeaks + 1 - - ################################################################### - ## filter fragments with mass greater than precursor - numberOfTooHeavyFragmentsHere <- 0 - if(all(!is.null(mz), !is.na(mz))){ - tooHeavy <- ms2Peaks_mz > mz - ms2Peaks_mz <- ms2Peaks_mz [!tooHeavy] - ms2Peaks_int <- ms2Peaks_int[!tooHeavy] - numberOfTooHeavyFragmentsHere <- sum(tooHeavy) - - if(length(ms2Peaks_mz) == 0 & numberOfTooHeavyFragmentsHere > 0) - numberOfSpectraDiscardedDueToTooHeavy <<- numberOfSpectraDiscardedDueToTooHeavy + 1 - } - numberOfTooHeavyFragments <<- numberOfTooHeavyFragments + numberOfTooHeavyFragmentsHere - - ################################################################### - ## filter for ms2 peak intensity - peakNumber <- length(ms2Peaks_mz) - if(peakNumber > 0){ - ################################################################### - ## filter for ms2 peak intensity relative to maximum peak intensity in spectrum - maximumIntensity <- max(ms2Peaks_int) - if(maximumIntensity >= minimumIntensityOfMaximalMS2peak){ - ## spectrum is considered - intensityThreshold <- maximumIntensity * minimumProportionOfMS2peaks - fragmentsAboveThreshold <- ms2Peaks_int >= intensityThreshold - - ms2Peaks_mz <- ms2Peaks_mz [fragmentsAboveThreshold] - ms2Peaks_int <- ms2Peaks_int[fragmentsAboveThreshold] - numberOfMS2PeaksAboveThreshold <<- numberOfMS2PeaksAboveThreshold + sum( fragmentsAboveThreshold) - numberOfMS2PeaksBelowThreshold <<- numberOfMS2PeaksBelowThreshold + sum(!fragmentsAboveThreshold) - } else { - ## spectrum is not considered - numberOfMS2PeaksBelowThreshold <<- numberOfMS2PeaksBelowThreshold + length(ms2Peaks_mz) - numberOfSpectraDiscardedDueToMaxIntensity <<- numberOfSpectraDiscardedDueToMaxIntensity + 1 - ms2Peaks_mz <- vector(mode = "numeric") - ms2Peaks_int <- vector(mode = "numeric") - } - } - - ################################################################### - ## normalize ms2 peaks to maximum = 1 - peakNumber <- length(ms2Peaks_mz) - if(peakNumber > 0){ - max <- max(ms2Peaks_int) - ms2Peaks_int <- ms2Peaks_int / max - } - - ################################################################### - ## add neutral losses - if(peakNumber > 0){ - ################################# - ## neutral losses regarding the precursor - if(all(!is.null(mz), !is.na(mz), neutralLossesPrecursorToFragments)){ - ms2PeaksNLPF_mz <- ms2Peaks_mz - as.numeric(mz) - ms2PeaksNLPF_int <- ms2Peaks_int - } else { - ms2PeaksNLPF_mz <- vector(mode = "numeric") - ms2PeaksNLPF_int <- vector(mode = "numeric") - } - ################################# - ## neutral losses amongst fragments - if(neutralLossesFragmentsToFragments){ - m_mz <- outer(X = ms2Peaks_mz, Y = ms2Peaks_mz, FUN = function(x,y){x-y}) - m_int <- outer(X = ms2Peaks_int, Y = ms2Peaks_int, FUN = function(x,y){(x+y) / 2}) - upper <- upper.tri(x = m_mz) - ms2PeaksNLFF_mz <- m_mz [upper] - ms2PeaksNLFF_int <- m_int[upper] - } else { - ms2PeaksNLFF_mz <- vector(mode = "numeric") - ms2PeaksNLFF_int <- vector(mode = "numeric") - } - - ms2Peaks_mz <- c(ms2Peaks_mz, ms2PeaksNLPF_mz, ms2PeaksNLFF_mz) - ms2Peaks_int <- c(ms2Peaks_int, ms2PeaksNLPF_int, ms2PeaksNLFF_int) - } - - ################################################################### - ## precursor mz - #mz <- ifelse(test = !is.null(mz), yes = round(as.numeric(mz), digits = 4), no = ifelse(test = !is.null(scanNumber), yes = scanNumber, no = max(ms2Peaks_mz))) - if(all(!is.null(mz), !is.na(mz))){ - mz <- round(as.numeric(mz), digits = 4) - } else { - if(!is.na(quantMass)){ - mz <- quantMass - } else { - if(!is.na(scanNumber)){ - mz <- scanNumber - } else { - mz <- max(ms2Peaks_mz) - } - } - } - - ################################################################### - ## string representation of spectrum - spectrumString <- paste(ms2Peaks_mz_original, ms2Peaks_int_original, sep = " ", collapse = ";") - - ################################################################### - ## built ms set - spectrumItem <- list( - name = name, - ms1Int = ms1Int, - #rt = round(as.numeric(rt), digits = 2), - rt = rt, - mz = mz, - metName = metName, - adduct = adduct, - quantMass = quantMass, - compoundClass = compoundClass, - instrumentType = instrumentType, - inchi = inchi, - inchiKey = inchiKey, - smiles = smiles, - #peakNumber = as.numeric(peakNumber), - peakNumber = length(ms2Peaks_mz), - ms2Peaks_mz = ms2Peaks_mz, - ms2Peaks_int = ms2Peaks_int, - spectrumString = spectrumString, - entryInterval = x + offset - ) - if(spectrumItem$peakNumber > 0){ - ## add - numberOfMS2PeaksWithNeutralLosses <<- numberOfMS2PeaksWithNeutralLosses + spectrumItem$peakNumber - return(spectrumItem) - } else - return(NULL) - }) - )## suppressWarnings - - rm( - isName, - isNAME, - isRT, - isRt, - isMZ, - isMz, - isTotEm, - isEMass, - isE2Mass, - isPMass, - isMetN, - isAddN, - isScanN, - isMIon, - isPrety, - isNumP, - isPeak, - isCoCl, - isInty, - isInchi, - isInchiKey, - isSmiles, - parsedName, - parsedNAME, - parsedRT, - parsedRt, - parsedMZ, - parsedMz, - parsedTotEm, - parsedEMass, - parsedE2Mass, - parsedPMass, - parsedMetN, - parsedAddN, - parsedScanN, - parsedMIon, - parsedPrety, - parsedNumP, - parsedTokensTmp, - parsedms2Peaks_mz, - parsedms2Peaks_int, - parsedCoCl, - parsedInty, - parsedInchi, - parsedInchiKey, - parsedSmiles - ) - - if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "MS/MS file: Box") else print("MS/MS file: Box") - - ## remove NULL entries? - spectraList[unlist(lapply(X = spectraList, FUN = is.null))] <- NULL - - numberOfSpectra <- length(spectraList) - - ## postprocess - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file postprocessing", sep = "")) else print(paste("MS/MS file postprocessing", sep = "")) - #precursorMz <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorMz[[spectrumIdx]] <- spectraList[[spectrumIdx]]$mz - - #precursorRt <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorRt[[spectrumIdx]] <- spectraList[[spectrumIdx]]$rt - - precursorMz <- unlist(lapply(X = spectraList, FUN = function(x){ - if(is.null(x)) return(NULL) - else return(x$mz) - })) - precursorRt <- unlist(lapply(X = spectraList, FUN = function(x){ - if(is.null(x)) return(NULL) - else return(x$rt) - })) - - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file boxing", sep = "")) else print(paste("MS/MS file boxing", sep = "")) - returnObj <- list() - returnObj$fileSpectra <- NA - returnObj$spectraList <- spectraList - returnObj$numberOfSpectra <- numberOfSpectra - returnObj$numberOfSpectraOriginal <- numberOfSpectraOriginal - returnObj$numberOfMS2PeaksOriginal <- numberOfMS2PeaksOriginal - returnObj$numberOfMS2PeaksWithNeutralLosses <- numberOfMS2PeaksWithNeutralLosses - returnObj$numberOfMS2PeaksAboveThreshold <- numberOfMS2PeaksAboveThreshold - returnObj$numberOfMS2PeaksBelowThreshold <- numberOfMS2PeaksBelowThreshold - returnObj$numberOfTooHeavyFragments <- numberOfTooHeavyFragments - returnObj$numberOfSpectraDiscardedDueToNoPeaks <- numberOfSpectraDiscardedDueToNoPeaks - returnObj$numberOfSpectraDiscardedDueToMaxIntensity <- numberOfSpectraDiscardedDueToMaxIntensity - returnObj$numberOfSpectraDiscardedDueToTooHeavy <- numberOfSpectraDiscardedDueToTooHeavy - returnObj$precursorMz <- precursorMz - returnObj$precursorRt <- precursorRt - - return(returnObj) -} -parseMSP_attributes <- function(fileSpectra, progress = FALSE, flexiblePeakList = FALSE, multiplePeaksPerLine = FALSE, includeIDasRecordSeparator=TRUE, includeNAMEasRecordSeparator=TRUE, includeTITLEasRecordSeparator=TRUE, returnEmptySpectra = FALSE){ - fileLines <- readLines(con = fileSpectra) - - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "MS/MS file: Read file") else print("MS/MS file: Read file") - - #fileLines <- readLines(con = fileSpectra) - numberOfFileLines <- length(fileLines) - - ## start with empty lines or not? - endOfRecord <- TRUE - if(numberOfFileLines > 0) - if(nchar(trimws(fileLines[[1]])) > 0) - endOfRecord <- FALSE - - ## check for pattern - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "MS/MS file: Parse") else print("MS/MS file: Parse") - isID <- grepl(pattern = "^ID:", x = fileLines) - isBI <- grepl(pattern = "^BEGIN IONS$", x = fileLines) - isName <- grepl(pattern = "(^Name:)|(^NAME:)", x = fileLines) - isNAme <- grepl(pattern = "^NAME=", x = fileLines) - isTITLE <- grepl(pattern = "^TITLE=", x = fileLines) - isAccession <- grepl(pattern = "^ACCESSION:", x = fileLines) - #isNumP <- grepl(pattern = "^Num Peaks:", x = fileLines) - #isPeak <- grepl(pattern = "^\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+)?$", x = fileLines) - #isPeak <- grepl(pattern = "^[ \t]*\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+)?([ \t]+\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+)?)*[ \t]*$", x = fileLines) - - if(!includeIDasRecordSeparator) isID <- rep(x = F, times = length(isID)) - if(!includeNAMEasRecordSeparator){ - isName <- rep(x = F, times = length(isName)) - isNAme <- rep(x = F, times = length(isNAme)) - } - if(!includeTITLEasRecordSeparator) isTITLE <- rep(x = F, times = length(isTITLE)) - - numberSmall <- "\\d+(\\.\\d+)?" - numberBig <- "\\d+(\\.\\d+(E\\d+)?)?" - mzValueRegEx <- "(\\d+(\\.\\d+)?)" - intensityRegex <- "(\\d+((\\.\\d+)?([eE](-)?\\d+)?)?)" - annotationRegex <- "\".+\"" - if(flexiblePeakList){ - isPeak <- grepl(pattern = "^[ \t]*\\d+(\\.\\d+([eE](-)?\\d+)?)?([ \t]\\d+(\\.\\d+([eE](-)?\\d+)?)?)*[ \t]*$", x = fileLines) - } else { - #isPeak <- grepl(pattern = "^[ \t]*\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+([eE](-)?\\d+)?)?([ \t]+((\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+([eE](-)?\\d+)?)?)|(\".+\")))*[ \t]*$", x = fileLines) - isPeak <- grepl(pattern = paste("^[ \t]*", mzValueRegEx, "[ \t]", intensityRegex, "([ \t]+((", mzValueRegEx, "[ \t]", intensityRegex, ")|(", annotationRegex, ")))*[ \t]*$", sep = ""), x = fileLines) - } - isEmpty <- nchar(trimws(fileLines)) == 0 - - tagVector <- unlist(lapply(X = str_split(string = fileLines, pattern = "(:)|(=)"), FUN = function(x){x[[1]]})) - valueVector <- trimws(substr(x = fileLines, start = nchar(tagVector) + 1 + 1, stop = nchar(fileLines))) - - ## entry line intervals in file - entryBorders <- c(which(isName | isNAme | isTITLE | isBI | isID | isAccession), length(fileLines)+1) - entryIntervals <- matrix(data = unlist(lapply(X = seq_len(length(entryBorders) - 1), FUN = function(x){c(entryBorders[[x]], entryBorders[[x+1]] - 1)})), nrow=2) - - ## do it - if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "MS/MS file: Assemble spectra") else print("MS/MS file: Assemble spectra") - #suppressWarnings( - ## x <- entryIntervals[,1] - spectraList <- apply(X = entryIntervals, MARGIN = 2, FUN = function(x){ - #print(x) - fileLines2 <- fileLines [x[[1]]:x[[2]]] - isPeak2 <- isPeak [x[[1]]:x[[2]]] - isEmpty2 <- isEmpty [x[[1]]:x[[2]]] - - tagVector2 <- tagVector [x[[1]]:x[[2]]] - valueVector2 <- valueVector[x[[1]]:x[[2]]] - - ################################################################### - ## built ms set - spectrumItem <- list() - spectrumItem[tagVector2[!isPeak2 & !isEmpty2]] <- trimws(valueVector2[!isPeak2 & !isEmpty2]) - - if(!is.null(spectrumItem$"Num Peaks")) - if(spectrumItem$"Num Peaks" == "0" & !returnEmptySpectra) - return(NULL) - - #spectrumItem["peaks"] <- paste(fileLines2[isPeak2], collapse = "; ") - peakLines <- fileLines2[isPeak2] - peakLines <- trimws(gsub(x = peakLines, pattern = "\".*\"", replacement = "")) - - if(multiplePeaksPerLine){ - peakLines <- unlist(strsplit(x = peakLines, split = "[ \t]")) - } else { - peakLines <- unlist(lapply(X = strsplit(x = peakLines, split = "[ \t]"), FUN = function(peaktokens){peaktokens[1:2]})) - } - spectrumItem["peaks"] <- paste(peakLines, collapse = " ") - spectrumItem["peaks"] <- trimws(gsub(x = spectrumItem["peaks"], pattern = " ", replacement = " ")) - - ## check peaks - tokens <- strsplit(x = spectrumItem[["peaks"]], split = "[ \t]")[[1]] - if(length(tokens) == 0){#spectrumItem$"Num Peaks" == "0"){ - mzs <- character(0) - ints <- character(0) - warning(paste(basename(fileSpectra), ": Empty peak list for [", paste(x, collapse = ","), "]", sep = "")) - } else { - mzs <- tokens[seq(from=1, to=length(tokens), by = 2)] - ints <- tokens[seq(from=2, to=length(tokens), by = 2)] - } - if(length(mzs) != length(ints)) stop("error in parsing peaks") - - ## handle duplicated tags - duplicatedTags <- unique(names(spectrumItem)[duplicated(names(spectrumItem))]) - if(length(duplicatedTags) > 0){ - duplicated <- sapply(X = duplicatedTags, FUN = function(x){ - unlist(sapply(X = seq_along(spectrumItem), FUN = function(y){ if(names(spectrumItem[y])==x) return(y) })) - }, simplify = F) - - indecesToRemove <- vector(mode = "integer", length = 0) - for(idx in seq_along(duplicated)){ - indeces <- duplicated[[idx]] - representant <- indeces[[1]] - indecesToRemoveHere <- indeces[-1] - - spectrumItem[[representant]] <- paste(spectrumItem[indeces], sep = "; ") - indecesToRemove <- c(indecesToRemove, indecesToRemoveHere) - } - spectrumItem <- spectrumItem[-indecesToRemove] - } - - return(spectrumItem) - }) - #)## suppressWarnings - - rm( - isName, - isNAme, - isTITLE, - isBI, - isID, - isPeak, - tagVector, - valueVector - ) - - if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "MS/MS file: Box") else print("MS/MS file: Box") - - ## remove NULL entries? - spectraList[unlist(lapply(X = spectraList, FUN = is.null))] <- NULL - - numberOfSpectra <- length(spectraList) - - ## postprocess - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file postprocessing", sep = "")) else print(paste("MS/MS file postprocessing", sep = "")) - - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file boxing", sep = "")) else print(paste("MS/MS file boxing", sep = "")) - returnObj <- list() - returnObj$fileSpectra <- fileSpectra - returnObj$spectraList <- spectraList - returnObj$numberOfSpectra <- numberOfSpectra - - return(returnObj) -} -parseMSP_chunk_old <- function(fileLines, minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks, neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments, progress = FALSE){ - - ## LC-MS/MS entry: - ## NAME: Unknown - ## RETENTIONTIME: 3.215358 - ## PRECURSORMZ: 78.91963 - ## METABOLITENAME: - ## ADDUCTIONNAME: [M-H]- - ## Num Peaks: 2 - ## 76.97093 754 - ## 76.98951 754 - ## - ## GC-MS additional properties: - ## SCANNUMBER: 518 - ## MODELION: 59 - ## MODELIONHEIGHT: 924 - ## MODELIONAREA: 924 - ## INTEGRATEDHEIGHT: 924 - ## INTEGRATEDAREA: 924 - - if(progress) incProgress(amount = 0, detail = "MS/MS file: Read file") else print("MS/MS file: Read file") - - #fileLines <- readLines(con = fileSpectra) - numberOfFileLines <- length(fileLines) - - numberOfMS2PeaksOriginal <- 0 - numberOfMS2PeaksWithNeutralLosses <- 0 - numberOfMS2PeaksAboveThreshold <- 0 - numberOfMS2PeaksBelowThreshold <- 0 - - ## start with empty lines or not? - endOfRecord <- TRUE - if(numberOfFileLines > 0) - if(nchar(trimws(fileLines[[1]])) > 0) - endOfRecord <- FALSE - - ## check for pattern - if(progress) incProgress(amount = 0, detail = "MS/MS file: Parse") else print("MS/MS file: Parse") - isName <- grepl(pattern = "^Name:", x = fileLines) - isNAME <- grepl(pattern = "^NAME:", x = fileLines) - isRT <- grepl(pattern = "^RETENTIONTIME:", x = fileLines) - isRt <- grepl(pattern = "^retention time:", x = fileLines) - isMZ <- grepl(pattern = "^PRECURSORMZ:", x = fileLines) - isMz <- grepl(pattern = "^precursor m/z:", x = fileLines) - isTotEm <- grepl(pattern = "^total exact mass:", x = fileLines) - isEMass <- grepl(pattern = "^exact mass:", x = fileLines) - isMetN <- grepl(pattern = "^METABOLITENAME:", x = fileLines) - isADDN <- grepl(pattern = "^ADDUCTIONNAME:", x = fileLines) - isAddN <- grepl(pattern = "^Adductionname:", x = fileLines) - isScanN <- grepl(pattern = "^SCANNUMBER:", x = fileLines) - isMIon <- grepl(pattern = "^MODELION:", x = fileLines) - isPrety <- grepl(pattern = "^precursor type:", x = fileLines) - isNumP <- grepl(pattern = "^Num Peaks:", x = fileLines) - isPeak <- grepl(pattern = "^\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+)?$", x = fileLines) - isCoCl <- grepl(pattern = "^compound class:", x = fileLines) - isInty <- grepl(pattern = "^instrument type:", x = fileLines) - isInchi <- grepl(pattern = "^InChI:", x = fileLines) - isInchiKey <- grepl(pattern = "^InChIKey:", x = fileLines) - isSmiles <- grepl(pattern = "^SMILES:", x = fileLines) - - ## extract - suppressWarnings({ - parsedNAME <- trimws(substring(text = fileLines, first = nchar("NAME:") + 1)) - parsedName <- trimws(substring(text = fileLines, first = nchar("Name:") + 1)) - parsedRT <- as.numeric( trimws(substring(text = fileLines, first = nchar("RETENTIONTIME:") + 1))) - parsedRt <- as.numeric(unlist(lapply(X = strsplit(x = trimws(substring(text = fileLines, first = nchar("retention time:") + 1)), split = " "), FUN = function(x){if(length(x)==0) return(NA) else return(x[[1]])}))) - parsedMZ <- as.numeric( trimws(substring(text = fileLines, first = nchar("PRECURSORMZ:") + 1))) - parsedMz <- as.numeric( trimws(substring(text = fileLines, first = nchar("precursor m/z:") + 1))) - parsedTotEm <- as.numeric( trimws(substring(text = fileLines, first = nchar("total exact mass:") + 1))) - parsedEMass <- as.numeric( trimws(substring(text = fileLines, first = nchar("exact mass:") + 1))) - parsedMetN <- trimws(substring(text = fileLines, first = nchar("METABOLITENAME:") + 1)) - parsedADDN <- trimws(substring(text = fileLines, first = nchar("ADDUCTIONNAME:") + 1)) - parsedAddN <- trimws(substring(text = fileLines, first = nchar("Adductionname:") + 1)) - parsedScanN <- as.numeric( trimws(substring(text = fileLines, first = nchar("SCANNUMBER:") + 1))) - parsedMIon <- as.numeric( trimws(substring(text = fileLines, first = nchar("MODELION:") + 1))) - parsedPrety <- trimws(substring(text = fileLines, first = nchar("precursor type:") + 1)) - parsedNumP <- as.numeric( trimws(substring(text = fileLines, first = nchar("Num Peaks:") + 1))) - - parsedTokensTmp <- strsplit(x = trimws(fileLines), split = "[ \t]") - #parsedms2Peaks_mz <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){head(x = x, n = 1)}))) - parsedms2Peaks_mz <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){if(length(x)<1) return(NA) else return(x[[1]])}))) - parsedms2Peaks_int <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){if(length(x)<2) return(NA) else return(x[[2]])}))) - - parsedCoCl <- trimws(substring(text = fileLines, first = nchar("compound class:") + 1)) - parsedInty <- trimws(substring(text = fileLines, first = nchar("instrument type:") + 1)) - parsedInchi <- trimws(substring(text = fileLines, first = nchar("InChI:") + 1)) - parsedInchiKey <- trimws(substring(text = fileLines, first = nchar("InChIKey:") + 1)) - parsedSmiles <- trimws(substring(text = fileLines, first = nchar("SMILES:") + 1)) - }) - - ## entry line intervals in file - entryBorders <- c(which(isName | isNAME), length(fileLines)+1) - entryIntervals <- matrix(data = unlist(lapply(X = seq_len(length(entryBorders) - 1), FUN = function(x){c(entryBorders[[x]], entryBorders[[x+1]] - 1)})), nrow=2) - - ## do it - if(progress) incProgress(amount = 0, detail = "MS/MS file: Assemble spectra") else print("MS/MS file: Assemble spectra") - suppressWarnings( - spectraList <- apply(X = entryIntervals, MARGIN = 2, FUN = function(x){ - parsedNAME2 <- parsedNAME [x[[1]]:x[[2]]] - parsedName2 <- parsedName [x[[1]]:x[[2]]] - parsedRT2 <- parsedRT [x[[1]]:x[[2]]] - parsedRt2 <- parsedRt [x[[1]]:x[[2]]] - parsedMZ2 <- parsedMZ [x[[1]]:x[[2]]] - parsedMz2 <- parsedMz [x[[1]]:x[[2]]] - parsedTotEm2 <- parsedTotEm [x[[1]]:x[[2]]] - parsedEMass2 <- parsedEMass [x[[1]]:x[[2]]] - parsedMetN2 <- parsedMetN [x[[1]]:x[[2]]] - parsedADDN2 <- parsedADDN [x[[1]]:x[[2]]] - parsedAddN2 <- parsedAddN [x[[1]]:x[[2]]] - parsedScanN2 <- parsedScanN [x[[1]]:x[[2]]] - parsedMIon2 <- parsedMIon [x[[1]]:x[[2]]] - parsedPrety2 <- parsedPrety [x[[1]]:x[[2]]] - parsedNumP2 <- parsedNumP [x[[1]]:x[[2]]] - parsedms2Peaks_mz2 <- parsedms2Peaks_mz [x[[1]]:x[[2]]] - parsedms2Peaks_int2 <- parsedms2Peaks_int [x[[1]]:x[[2]]] - parsedCoCl2 <- parsedCoCl [x[[1]]:x[[2]]] - parsedInty2 <- parsedInty [x[[1]]:x[[2]]] - parsedInchi2 <- parsedInchi [x[[1]]:x[[2]]] - parsedInchiKey2 <- parsedInchiKey [x[[1]]:x[[2]]] - parsedSmiles2 <- parsedSmiles [x[[1]]:x[[2]]] - - isNAME2 <- isNAME [x[[1]]:x[[2]]] - isName2 <- isName [x[[1]]:x[[2]]] - isRT2 <- isRT [x[[1]]:x[[2]]] - isRt2 <- isRt [x[[1]]:x[[2]]] - isMZ2 <- isMZ [x[[1]]:x[[2]]] - isMz2 <- isMz [x[[1]]:x[[2]]] - isTotEm2 <- isTotEm [x[[1]]:x[[2]]] - isEMass2 <- isEMass [x[[1]]:x[[2]]] - isMetN2 <- isMetN [x[[1]]:x[[2]]] - isADDN2 <- isADDN [x[[1]]:x[[2]]] - isAddN2 <- isAddN [x[[1]]:x[[2]]] - isScanN2 <- isScanN [x[[1]]:x[[2]]] - isMIon2 <- isMIon [x[[1]]:x[[2]]] - isPrety2 <- isPrety [x[[1]]:x[[2]]] - isNumP2 <- isNumP [x[[1]]:x[[2]]] - isPeak2 <- isPeak [x[[1]]:x[[2]]] - isCoCl2 <- isCoCl [x[[1]]:x[[2]]] - isInty2 <- isInty [x[[1]]:x[[2]]] - isInchi2 <- isInchi [x[[1]]:x[[2]]] - isInchiKey2 <- isInchiKey [x[[1]]:x[[2]]] - isSmiles2 <- isSmiles [x[[1]]:x[[2]]] - - name <- NULL - rt <- NULL - mz <- NULL - metName <- "Unknown" - adduct <- "Unknown" - scanNumber <- NA - quantMass <- NA - peakNumber <- NA - ms2Peaks_mz <- vector(mode = "numeric") - ms2Peaks_int <- vector(mode = "numeric") - compoundClass <- "Unknown" - instrumentType <- "Unknown" - inchi <- "" - inchiKey <- "" - smiles <- "" - - if(any(isName2)) - ## name - name <- parsedName2 [[which(isName2)[[1]]]] - if(any(isNAME2) & !is.null(name)) - ## name - name <- parsedNAME2 [[which(isNAME2)[[1]]]] - if(any(isRT2)) - ## retention time - rt <- parsedRT2 [[which(isRT2)[[1]]]] - if(any(isRt2) & any(is.null(rt), is.na(rt))) - rt <- parsedRt2 [[which(isRt2)[[1]]]] - if(any(isMZ2)) - ## precursor m/z - mz <- parsedMZ2 [[which(isMZ2)[[1]]]] - if(any(isMz2) & any(is.null(mz), is.na(mz), mz%%1==0)) - mz <- parsedMz2 [[which(isMz2)[[1]]]] - if(any(isTotEm2) & any(is.null(mz), is.na(mz), mz%%1==0) & any(isPrety2)){ - mzTmp <- parsedTotEm2[[which(isTotEm2)[[1]]]] - pit <- parsedPrety2[[which(isPrety2)[[1]]]] - switch(pit, - "[M-H]-" = { mz <- mzTmp - 1.008 }, - "[M-H]" = { mz <- mzTmp - 1.008 }, - "[M+H]+" = { mz <- mzTmp + 1.008 }, - "[M+H]" = { mz <- mzTmp + 1.008 }, - "[M+Na]+"= { mz <- mzTmp + 22.9898 }, - "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) - ) - } - if(any(isEMass2) & any(is.null(mz), is.na(mz), mz%%1==0) & any(isPrety2)){ - mzTmp <- parsedEMass2[[which(isEMass2)[[1]]]] - pit <- parsedPrety2[[which(isPrety2)[[1]]]] - switch(pit, - "[M-H]-" = { mz <- mzTmp - 1.008 }, - "[M-H]" = { mz <- mzTmp - 1.008 }, - "[M+H]+" = { mz <- mzTmp + 1.008 }, - "[M+H]" = { mz <- mzTmp + 1.008 }, - "[M+Na]+"= { mz <- mzTmp + 22.9898 }, - "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) - ) - } - if(any(isMetN2)) - metName <- parsedMetN2 [[which(isMetN2)[[1]]]] - if(any(isADDN2)) - ## adduct - adduct <- parsedADDN2 [[which(isADDN2)[[1]]]] - if(any(isAddN2) & adduct == "Unknown") - adduct <- parsedAddN2 [[which(isAddN2)[[1]]]] - if(any(isPrety2) & adduct == "Unknown") - adduct <- parsedPrety2[[which(isPrety2)[[1]]]] - if(any(isScanN2)) - scanNumber <- parsedScanN2[[which(isScanN2)[[1]]]] - if(any(isMIon2)) - quantMass <- parsedMIon2 [[which(isMIon2)[[1]]]] - if(any(isNumP2)) - ## #peaks - peakNumber <- parsedNumP2 [[which(isNumP2)[[1]]]] - if(any(isPeak2)){ - ## MS2 peaks: "178.88669\t230" - ms2Peaks_mz <- parsedms2Peaks_mz2 [which(isPeak2)] - ms2Peaks_int <- parsedms2Peaks_int2[which(isPeak2)] - } - if(any(isCoCl2)) - ## compound class - compoundClass <- parsedCoCl2 [[which(isCoCl2)[[1]]]] - if(any(isInty2)) - ## instrument type - instrumentType <- parsedInty2 [[which(isInty2)[[1]]]] - if(any(isInchi2)) - ## structure - inchi <- parsedInchi2 [[which(isInchi2)[[1]]]] - if(any(isInchiKey2)) - ## structure - inchiKey <- parsedInchiKey2 [[which(isInchiKey2)[[1]]]] - if(any(isSmiles2)) - ## structure - smiles <- parsedSmiles2 [[which(isSmiles2)[[1]]]] - ## end of parsing - - if(is.null(rt)) - rt <- 0 - - #if(is.null(mz)) - # ## in case of gc - # mz <- max(ms2Peaks_mz) - ms2Peaks_mz_original <- ms2Peaks_mz - ms2Peaks_int_original <- ms2Peaks_int - - ################################################################### - ## filter fragments with mass greater than precursor - if(!is.null(mz)){ - tooHeavy <- ms2Peaks_mz > mz - ms2Peaks_mz <- ms2Peaks_mz [!tooHeavy] - ms2Peaks_int <- ms2Peaks_int[!tooHeavy] - } - - ################################################################### - ## filter for ms2 peak intensity - peakNumber <- length(ms2Peaks_mz) - if(peakNumber > 0){ - ################################################################### - ## filter for ms2 peak intensity relative to maximum peak intensity in spectrum - maximumIntensity <- max(ms2Peaks_int) - if(maximumIntensity >= minimumIntensityOfMaximalMS2peak){ - ## spectrum is considered - intensityThreshold <- maximumIntensity * minimumProportionOfMS2peaks - fragmentsAboveThreshold <- ms2Peaks_int >= intensityThreshold - - ms2Peaks_mz <- ms2Peaks_mz [fragmentsAboveThreshold] - ms2Peaks_int <- ms2Peaks_int[fragmentsAboveThreshold] - numberOfMS2PeaksAboveThreshold <- numberOfMS2PeaksAboveThreshold + length(ms2Peaks_mz) - } else { - ## spectrum is not considered - ms2Peaks_mz <- vector(mode = "numeric") - ms2Peaks_int <- vector(mode = "numeric") - } - } - - ################################################################### - ## normalize ms2 peaks to maximum = 1 - peakNumber <- length(ms2Peaks_mz) - if(peakNumber > 0){ - max <- max(ms2Peaks_int) - ms2Peaks_int <- ms2Peaks_int / max - } - - ################################################################### - ## add neutral losses - if(peakNumber > 0){ - ################################# - ## neutral losses regarding the precursor - if(all(!is.null(mz), neutralLossesPrecursorToFragments)){ - ms2PeaksNLPF_mz <- ms2Peaks_mz - as.numeric(mz) - ms2PeaksNLPF_int <- ms2Peaks_int - } else { - ms2PeaksNLPF_mz <- vector(mode = "numeric") - ms2PeaksNLPF_int <- vector(mode = "numeric") - } - ################################# - ## neutral losses amongst fragments - if(neutralLossesFragmentsToFragments){ - m_mz <- outer(X = ms2Peaks_mz, Y = ms2Peaks_mz, FUN = function(x,y){x-y}) - m_int <- outer(X = ms2Peaks_int, Y = ms2Peaks_int, FUN = function(x,y){(x+y) / 2}) - upper <- upper.tri(x = m_mz) - ms2PeaksNLFF_mz <- m_mz [upper] - ms2PeaksNLFF_int <- m_int[upper] - } else { - ms2PeaksNLFF_mz <- vector(mode = "numeric") - ms2PeaksNLFF_int <- vector(mode = "numeric") - } - - ms2Peaks_mz <- c(ms2Peaks_mz, ms2PeaksNLPF_mz, ms2PeaksNLFF_mz) - ms2Peaks_int <- c(ms2Peaks_int, ms2PeaksNLPF_int, ms2PeaksNLFF_int) - } - - ################################################################### - ## precursor mz - #mz <- ifelse(test = !is.null(mz), yes = round(as.numeric(mz), digits = 4), no = ifelse(test = !is.null(scanNumber), yes = scanNumber, no = max(ms2Peaks_mz))) - if(!is.null(mz)){ - mz <- round(as.numeric(mz), digits = 4) - } else { - if(!is.na(quantMass)){ - mz <- quantMass - } else { - if(!is.na(scanNumber)){ - mz <- scanNumber - } else { - mz <- max(ms2Peaks_mz) - } - } - } - - ################################################################### - ## string representation of spectrum - spectrumString <- paste(ms2Peaks_mz_original, ms2Peaks_int_original, sep = " ", collapse = ";") - - ################################################################### - ## built ms set - spectrumItem <- list( - name = name, - #rt = round(as.numeric(rt), digits = 2), - rt = rt, - mz = mz, - metName = metName, - adduct = adduct, - quantMass = quantMass, - compoundClass = compoundClass, - instrumentType = instrumentType, - inchi = inchi, - inchiKey = inchiKey, - smiles = smiles, - #peakNumber = as.numeric(peakNumber), - peakNumber = length(ms2Peaks_mz), - ms2Peaks_mz = ms2Peaks_mz, - ms2Peaks_int = ms2Peaks_int, - spectrumString = spectrumString - ) - if(spectrumItem$peakNumber > 0){ - ## add - numberOfMS2PeaksWithNeutralLosses <- numberOfMS2PeaksWithNeutralLosses + spectrumItem$peakNumber - return(spectrumItem) - } else - return(NULL) - }) - )## suppressWarnings - - rm( - isName, - isNAME, - isRT, - isRt, - isMZ, - isMz, - isTotEm, - isEMass, - isMetN, - isADDN, - isAddN, - isScanN, - isMIon, - isPrety, - isNumP, - isPeak, - isCoCl, - isInty, - isInchi, - isInchiKey, - isSmiles, - parsedNAME, - parsedName, - parsedRT, - parsedRt, - parsedMZ, - parsedMz, - parsedTotEm, - parsedEMass, - parsedMetN, - parsedADDN, - parsedAddN, - parsedScanN, - parsedMIon, - parsedPrety, - parsedNumP, - parsedTokensTmp, - parsedms2Peaks_mz, - parsedms2Peaks_int, - parsedCoCl, - parsedInty, - parsedInchi, - parsedInchiKey, - parsedSmiles - ) - - if(progress) incProgress(amount = 0.1, detail = "MS/MS file: Box") else print("MS/MS file: Box") - - ## remove NULL entries? - spectraList[unlist(lapply(X = spectraList, FUN = is.null))] <- NULL - - numberOfSpectra <- length(spectraList) - - ## postprocess - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file postprocessing", sep = "")) else print(paste("MS/MS file postprocessing", sep = "")) - #precursorMz <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorMz[[spectrumIdx]] <- spectraList[[spectrumIdx]]$mz - - #precursorRt <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorRt[[spectrumIdx]] <- spectraList[[spectrumIdx]]$rt - - precursorMz <- unlist(lapply(X = spectraList, FUN = function(x){ - if(is.null(x)) return(NULL) - else return(x$mz) - })) - precursorRt <- unlist(lapply(X = spectraList, FUN = function(x){ - if(is.null(x)) return(NULL) - else return(x$rt) - })) - - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file boxing", sep = "")) else print(paste("MS/MS file boxing", sep = "")) - returnObj <- list() - returnObj$fileSpectra <- NA - returnObj$spectraList <- spectraList - returnObj$numberOfSpectra <- numberOfSpectra - returnObj$numberOfMS2PeaksOriginal <- numberOfMS2PeaksOriginal - returnObj$numberOfMS2PeaksWithNeutralLosses <- numberOfMS2PeaksWithNeutralLosses - returnObj$numberOfMS2PeaksAboveThreshold <- numberOfMS2PeaksAboveThreshold - returnObj$numberOfMS2PeaksBelowThreshold <- numberOfMS2PeaksBelowThreshold - returnObj$precursorMz <- precursorMz - returnObj$precursorRt <- precursorRt - - return(returnObj) -} - -parseMSPbig_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks, neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments, progress = FALSE){ - if(progress) incProgress(amount = 0, detail = "MS/MS file: Read file") else print("MS/MS file: Read file") - - fileLines <- readLines(con = fileSpectra) - numberOfFileLines <- length(fileLines) - - numberOfMS2PeaksOriginal <- 0 - numberOfMS2PeaksWithNeutralLosses <- 0 - numberOfMS2PeaksAboveThreshold <- 0 - numberOfMS2PeaksBelowThreshold <- 0 - - ## start with empty lines or not? - endOfRecord <- TRUE - if(numberOfFileLines > 0) - if(nchar(trimws(fileLines[[1]])) > 0) - endOfRecord <- FALSE - - ## check for pattern - if(progress) incProgress(amount = 0, detail = "MS/MS file: Parse") else print("MS/MS file: Parse") - isName3 <- grepl(pattern = "^Name:", x = fileLines) - isNAME3 <- grepl(pattern = "^NAME:", x = fileLines) - - isName <- which(grepl(pattern = "^Name:", x = fileLines)) - isNAME <- which(grepl(pattern = "^NAME:", x = fileLines)) - isRT <- which(grepl(pattern = "^RETENTIONTIME:", x = fileLines)) - isRt <- which(grepl(pattern = "^retention time:", x = fileLines)) - isMZ <- which(grepl(pattern = "^PRECURSORMZ:", x = fileLines)) - isMz <- which(grepl(pattern = "^precursor m/z:", x = fileLines)) - isTotEm <- which(grepl(pattern = "^total exact mass:", x = fileLines)) - isEMass <- which(grepl(pattern = "^exact mass:", x = fileLines)) - isMetN <- which(grepl(pattern = "^METABOLITENAME:", x = fileLines)) - isADDN <- which(grepl(pattern = "^ADDUCTIONNAME:", x = fileLines)) - isAddN <- which(grepl(pattern = "^Adductionname:", x = fileLines)) - isScanN <- which(grepl(pattern = "^SCANNUMBER:", x = fileLines)) - isMIon <- which(grepl(pattern = "^MODELION:", x = fileLines)) - isPrety <- which(grepl(pattern = "^precursor type:", x = fileLines)) - isNumP <- which(grepl(pattern = "^Num Peaks:", x = fileLines)) - isPeak <- which(grepl(pattern = "^\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+)?$", x = fileLines)) - isCoCl <- which(grepl(pattern = "^compound class:", x = fileLines)) - isInty <- which(grepl(pattern = "^instrument type:", x = fileLines)) - isInchi <- which(grepl(pattern = "^InChI:", x = fileLines)) - isInchiKey <- which(grepl(pattern = "^InChIKey:", x = fileLines)) - isSmiles <- which(grepl(pattern = "^SMILES:", x = fileLines)) - - ## extract - suppressWarnings({ - parsedNAME <- trimws(substring(text = fileLines[isNAME], first = nchar("NAME:") + 1)) - parsedName <- trimws(substring(text = fileLines[isName], first = nchar("Name:") + 1)) - parsedRT <- as.numeric( trimws(substring(text = fileLines[isRT], first = nchar("RETENTIONTIME:") + 1))) - parsedRt <- as.numeric(unlist(lapply(X = strsplit(x = trimws(substring(text = fileLines[isRt], first = nchar("retention time:") + 1)), split = " "), FUN = function(x){if(length(x)==0) return(NA) else return(x[[1]])}))) - parsedMZ <- as.numeric( trimws(substring(text = fileLines[isMZ], first = nchar("PRECURSORMZ:") + 1))) - parsedMz <- as.numeric( trimws(substring(text = fileLines[isMz], first = nchar("precursor m/z:") + 1))) - parsedTotEm <- as.numeric( trimws(substring(text = fileLines[isTotEm], first = nchar("total exact mass:") + 1))) - parsedEMass <- as.numeric( trimws(substring(text = fileLines[isEMass], first = nchar("exact mass:") + 1))) - parsedMetN <- trimws(substring(text = fileLines[isMetN], first = nchar("METABOLITENAME:") + 1)) - parsedADDN <- trimws(substring(text = fileLines[isADDN], first = nchar("ADDUCTIONNAME:") + 1)) - parsedAddN <- trimws(substring(text = fileLines[isAddN], first = nchar("Adductionname:") + 1)) - parsedScanN <- as.numeric( trimws(substring(text = fileLines[isScanN], first = nchar("SCANNUMBER:") + 1))) - parsedMIon <- as.numeric( trimws(substring(text = fileLines[isMIon], first = nchar("MODELION:") + 1))) - parsedPrety <- trimws(substring(text = fileLines[isPrety], first = nchar("precursor type:") + 1)) - parsedNumP <- as.numeric( trimws(substring(text = fileLines[isNumP], first = nchar("Num Peaks:") + 1))) - - parsedTokensTmp <- strsplit(x = trimws(fileLines[isPeak]), split = "[ \t]") - #parsedms2Peaks_mz <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){head(x = x, n = 1)}))) - parsedms2Peaks_mz <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){if(length(x)<1) return(NA) else return(x[[1]])}))) - parsedms2Peaks_int <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){if(length(x)<2) return(NA) else return(x[[2]])}))) - - parsedCoCl <- trimws(substring(text = fileLines[isCoCl], first = nchar("compound class:") + 1)) - parsedInty <- trimws(substring(text = fileLines[isInty], first = nchar("instrument type:") + 1)) - parsedInchi <- trimws(substring(text = fileLines[isInchi], first = nchar("InChI:") + 1)) - parsedInchiKey <- trimws(substring(text = fileLines[isInchiKey], first = nchar("InChIKey:") + 1)) - parsedSmiles <- trimws(substring(text = fileLines[isSmiles], first = nchar("SMILES:") + 1)) - }) - - numberOfFileLines <- length(fileLines) - fileLines <- NULL - - ## entry line intervals in file - entryBorders <- c(which(isName3 | isNAME3), numberOfFileLines+1) - #entryBorders <- c(sort(unique(union(isName, isNAME))), numberOfFileLines) - entryIntervals <- matrix(data = unlist(lapply(X = seq_len(length(entryBorders) - 1), FUN = function(x){c(entryBorders[[x]], entryBorders[[x+1]] - 1)})), nrow=2) - - ## do it - if(progress) incProgress(amount = 0, detail = "MS/MS file: Assemble spectra") else print("MS/MS file: Assemble spectra") - suppressWarnings( - spectraList <- apply(X = entryIntervals, MARGIN = 2, FUN = function(x){ - - parsedNAME2 <- tryCatch({parsedNAME [[which(isNAME >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedName2 <- tryCatch({parsedName [[which(isName >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedRT2 <- tryCatch({parsedRT [[which(isRT >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedRt2 <- tryCatch({parsedRt [[which(isRt >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedMZ2 <- tryCatch({parsedMZ [[which(isMZ >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedMz2 <- tryCatch({parsedMz [[which(isMz >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedTotEm2 <- tryCatch({parsedTotEm [[which(isTotEm >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedEMass2 <- tryCatch({parsedEMass [[which(isEMass >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedMetN2 <- tryCatch({parsedMetN [[which(isMetN >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedADDN2 <- tryCatch({parsedADDN [[which(isADDN >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedAddN2 <- tryCatch({parsedAddN [[which(isAddN >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedScanN2 <- tryCatch({parsedScanN [[which(isScanN >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedMIon2 <- tryCatch({parsedMIon [[which(isMIon >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedPrety2 <- tryCatch({parsedPrety [[which(isPrety >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedCoCl2 <- tryCatch({parsedCoCl [[which(isCoCl >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedInty2 <- tryCatch({parsedInty [[which(isInty >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedInchi2 <- tryCatch({parsedInchi [[which(isInchi >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedInchiKey2 <- tryCatch({parsedInchiKey [[which(isInchiKey >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedSmiles2 <- tryCatch({parsedSmiles [[which(isSmiles >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - parsedNumP2 <- tryCatch({parsedNumP [[which(isNumP >= x[[1]])[[1]]]]}, error = function(e) {NULL}) - - parsedms2Peaks_mz2 = parsedms2Peaks_mz [ which(isPeak >= x[[1]] & isPeak <= x[[2]]) ] - parsedms2Peaks_int2 = parsedms2Peaks_int[ which(isPeak >= x[[1]] & isPeak <= x[[2]]) ] - - name <- NULL - rt <- NULL - mz <- NULL - metName <- "Unknown" - adduct <- "Unknown" - scanNumber <- NA - quantMass <- NA - peakNumber <- NA - ms2Peaks_mz <- vector(mode = "numeric") - ms2Peaks_int <- vector(mode = "numeric") - compoundClass <- "Unknown" - instrumentType <- "Unknown" - inchi <- "" - inchiKey <- "" - smiles <- "" - - if(!is.null(parsedName2)) - ## name - name <- parsedName2 - if(!is.null(parsedNAME2) & !is.null(name)) - ## name - name <- parsedNAME2 - if(!is.null(parsedRT2)) - ## retention time - rt <- parsedRT2 - if(!is.null(parsedRt2) & any(is.null(rt), is.na(rt))) - rt <- parsedRt2 - if(!is.null(parsedMZ2)) - ## precursor m/z - mz <- parsedMZ2 - if(!is.null(parsedMz2) & any(is.null(mz), is.na(mz), mz%%1==0)) - mz <- parsedMz2 - if(!is.null(parsedTotEm2) & any(is.null(mz), is.na(mz), mz%%1==0) & !is.null(parsedPrety2)){ - mzTmp <- parsedTotEm2 - pit <- parsedPrety2 - switch(pit, - "[M-H]-" = { mz <- mzTmp - 1.008 }, - "[M-H]" = { mz <- mzTmp - 1.008 }, - "[M+H]+" = { mz <- mzTmp + 1.008 }, - "[M+H]" = { mz <- mzTmp + 1.008 }, - "[M+Na]+"= { mz <- mzTmp + 22.9898 }, - "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) - ) - } - if(!is.null(parsedEMass2) & any(is.null(mz), is.na(mz), mz%%1==0) & !is.null(parsedPrety2)){ - mzTmp <- parsedEMass2 - pit <- parsedPrety2 - switch(pit, - "[M-H]-" = { mz <- mzTmp - 1.008 }, - "[M-H]" = { mz <- mzTmp - 1.008 }, - "[M+H]+" = { mz <- mzTmp + 1.008 }, - "[M+H]" = { mz <- mzTmp + 1.008 }, - "[M+Na]+"= { mz <- mzTmp + 22.9898 }, - "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) - ) - } - if(!is.null(parsedMetN2)) - metName <- parsedMetN2 - if(!is.null(parsedADDN2)) - ## adduct - adduct <- parsedADDN2 - if(!is.null(parsedAddN2) & adduct == "Unknown") - adduct <- parsedAddN2 - if(!is.null(parsedPrety2) & adduct == "Unknown") - adduct <- parsedPrety2 - if(!is.null(parsedScanN2)) - scanNumber <- parsedScanN2 - if(!is.null(parsedMIon2)) - quantMass <- parsedMIon2 - if(!is.null(parsedCoCl2)) - ## compound class - compoundClass <- parsedCoCl2 - if(!is.null(parsedInty2)) - ## instrument type - instrumentType <- parsedInty2 - if(!is.null(parsedInchi2)) - ## structure - inchi <- parsedInchi2 - if(!is.null(parsedInchiKey2)) - ## structure - inchiKey <- parsedInchiKey2 - if(!is.null(parsedSmiles2)) - ## structure - smiles <- parsedSmiles2 - if(!is.null(parsedNumP2)) - ## #peaks - peakNumber <- parsedNumP2 - if(!is.null(parsedms2Peaks_mz2)){ - ## MS2 peaks: "178.88669\t230" - ms2Peaks_mz <- parsedms2Peaks_mz2 - ms2Peaks_int <- parsedms2Peaks_int2 - } - - ## end of entry - - if(is.null(rt)) - rt <- 0 - - #if(is.null(mz)) - # ## in case of gc - # mz <- max(ms2Peaks_mz) - ms2Peaks_mz_original <- ms2Peaks_mz - ms2Peaks_int_original <- ms2Peaks_int - - ################################################################### - ## filter fragments with mass greater than precursor - if(!is.null(mz)){ - tooHeavy <- ms2Peaks_mz > mz - ms2Peaks_mz <- ms2Peaks_mz [!tooHeavy] - ms2Peaks_int <- ms2Peaks_int[!tooHeavy] - } - - ################################################################### - ## filter for ms2 peak intensity - peakNumber <- length(ms2Peaks_mz) - if(peakNumber > 0){ - ################################################################### - ## filter for ms2 peak intensity relative to maximum peak intensity in spectrum - maximumIntensity <- max(ms2Peaks_int) - if(maximumIntensity >= minimumIntensityOfMaximalMS2peak){ - ## spectrum is considered - intensityThreshold <- maximumIntensity * minimumProportionOfMS2peaks - fragmentsAboveThreshold <- ms2Peaks_int >= intensityThreshold - - ms2Peaks_mz <- ms2Peaks_mz [fragmentsAboveThreshold] - ms2Peaks_int <- ms2Peaks_int[fragmentsAboveThreshold] - numberOfMS2PeaksAboveThreshold <- numberOfMS2PeaksAboveThreshold + length(ms2Peaks_mz) - } else { - ## spectrum is not considered - ms2Peaks_mz <- vector(mode = "numeric") - ms2Peaks_int <- vector(mode = "numeric") - } - } - - ################################################################### - ## normalize ms2 peaks to maximum = 1 - peakNumber <- length(ms2Peaks_mz) - if(peakNumber > 0){ - max <- max(ms2Peaks_int) - ms2Peaks_int <- ms2Peaks_int / max - } - - ################################################################### - ## add neutral losses - if(peakNumber > 0){ - ################################# - ## neutral losses regarding the precursor - if(all(!is.null(mz), neutralLossesPrecursorToFragments)){ - ms2PeaksNLPF_mz <- ms2Peaks_mz - as.numeric(mz) - ms2PeaksNLPF_int <- ms2Peaks_int - } else { - ms2PeaksNLPF_mz <- vector(mode = "numeric") - ms2PeaksNLPF_int <- vector(mode = "numeric") - } - ################################# - ## neutral losses amongst fragments - if(neutralLossesFragmentsToFragments){ - m_mz <- outer(X = ms2Peaks_mz, Y = ms2Peaks_mz, FUN = function(x,y){x-y}) - m_int <- outer(X = ms2Peaks_int, Y = ms2Peaks_int, FUN = function(x,y){(x+y) / 2}) - upper <- upper.tri(x = m_mz) - ms2PeaksNLFF_mz <- m_mz [upper] - ms2PeaksNLFF_int <- m_int[upper] - } else { - ms2PeaksNLFF_mz <- vector(mode = "numeric") - ms2PeaksNLFF_int <- vector(mode = "numeric") - } - - ms2Peaks_mz <- c(ms2Peaks_mz, ms2PeaksNLPF_mz, ms2PeaksNLFF_mz) - ms2Peaks_int <- c(ms2Peaks_int, ms2PeaksNLPF_int, ms2PeaksNLFF_int) - } - - ################################################################### - ## precursor mz - #mz <- ifelse(test = !is.null(mz), yes = round(as.numeric(mz), digits = 4), no = ifelse(test = !is.null(scanNumber), yes = scanNumber, no = max(ms2Peaks_mz))) - if(!is.null(mz)){ - mz <- round(as.numeric(mz), digits = 4) - } else { - if(!is.na(quantMass)){ - mz <- quantMass - } else { - if(!is.na(scanNumber)){ - mz <- scanNumber - } else { - mz <- max(ms2Peaks_mz) - } - } - } - - ################################################################### - ## string representation of spectrum - spectrumString <- paste(ms2Peaks_mz_original, ms2Peaks_int_original, sep = " ", collapse = ";") - - ################################################################### - ## built ms set - spectrumItem <- list( - name = name, - #rt = round(as.numeric(rt), digits = 2), - rt = rt, - mz = mz, - metName = metName, - adduct = adduct, - quantMass = quantMass, - compoundClass = compoundClass, - instrumentType = instrumentType, - inchi = inchi, - inchiKey = inchiKey, - smiles = smiles, - #peakNumber = as.numeric(peakNumber), - peakNumber = length(ms2Peaks_mz), - ms2Peaks_mz = ms2Peaks_mz, - ms2Peaks_int = ms2Peaks_int, - spectrumString = spectrumString - ) - if(spectrumItem$peakNumber > 0){ - ## add - numberOfMS2PeaksWithNeutralLosses <- numberOfMS2PeaksWithNeutralLosses + spectrumItem$peakNumber - return(spectrumItem) - } else - return(NULL) - }) - )## suppressWarnings - - rm( - isName, - isNAME, - isRT, - isRt, - isMZ, - isMz, - isTotEm, - isEMass, - isMetN, - isADDN, - isAddN, - isScanN, - isMIon, - isPrety, - isNumP, - isPeak, - isCoCl, - isInty, - isInchi, - isInchiKey, - isSmiles, - parsedNAME, - parsedName, - parsedRT, - parsedRt, - parsedMZ, - parsedMz, - parsedTotEm, - parsedEMass, - parsedMetN, - parsedADDN, - parsedAddN, - parsedScanN, - parsedMIon, - parsedPrety, - parsedNumP, - parsedTokensTmp, - parsedms2Peaks_mz, - parsedms2Peaks_int, - parsedCoCl, - parsedInty, - parsedInchi, - parsedInchiKey, - parsedSmiles - ) - - if(progress) incProgress(amount = 0.1, detail = "MS/MS file: Box") else print("MS/MS file: Box") - - ## remove NULL entries? - spectraList[unlist(lapply(X = spectraList, FUN = is.null))] <- NULL - - numberOfSpectra <- length(spectraList) - - ## postprocess - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file postprocessing", sep = "")) else print(paste("MS/MS file postprocessing", sep = "")) - #precursorMz <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorMz[[spectrumIdx]] <- spectraList[[spectrumIdx]]$mz - - #precursorRt <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorRt[[spectrumIdx]] <- spectraList[[spectrumIdx]]$rt - - precursorMz <- unlist(lapply(X = spectraList, FUN = function(x){ - if(is.null(x)) return(NULL) - else return(x$mz) - })) - precursorRt <- unlist(lapply(X = spectraList, FUN = function(x){ - if(is.null(x)) return(NULL) - else return(x$rt) - })) - - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file boxing", sep = "")) else print(paste("MS/MS file boxing", sep = "")) - returnObj <- list() - returnObj$fileSpectra <- fileSpectra - returnObj$spectraList <- spectraList - returnObj$numberOfSpectra <- numberOfSpectra - returnObj$numberOfMS2PeaksOriginal <- numberOfMS2PeaksOriginal - returnObj$numberOfMS2PeaksWithNeutralLosses <- numberOfMS2PeaksWithNeutralLosses - returnObj$numberOfMS2PeaksAboveThreshold <- numberOfMS2PeaksAboveThreshold - returnObj$numberOfMS2PeaksBelowThreshold <- numberOfMS2PeaksBelowThreshold - returnObj$precursorMz <- precursorMz - returnObj$precursorRt <- precursorRt - - return(returnObj) -} -parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimumProportionOfMS2peaks, neutralLossesPrecursorToFragments, neutralLossesFragmentsToFragments, progress = FALSE){ - if(progress) incProgress(amount = 0, detail = "MS/MS file: Read file") else print("MS/MS file: Read file") - - fileLines <- readLines(con = fileSpectra) - numberOfFileLines <- length(fileLines) - - numberOfMS2PeaksOriginal <- 0 - numberOfMS2PeaksWithNeutralLosses <- 0 - numberOfMS2PeaksAboveThreshold <- 0 - numberOfMS2PeaksBelowThreshold <- 0 - - ## start with empty lines or not? - endOfRecord <- TRUE - if(numberOfFileLines > 0) - if(nchar(trimws(fileLines[[1]])) > 0) - endOfRecord <- FALSE - - ## check for pattern - if(progress) incProgress(amount = 0, detail = "MS/MS file: Parse") else print("MS/MS file: Parse") - isName3 <- grepl(pattern = "^Name:", x = fileLines) - isNAME3 <- grepl(pattern = "^NAME:", x = fileLines) - - isName <- which(grepl(pattern = "^Name:", x = fileLines)) - isNAME <- which(grepl(pattern = "^NAME:", x = fileLines)) - isRT <- which(grepl(pattern = "^RETENTIONTIME:", x = fileLines)) - isRt <- which(grepl(pattern = "^retention time:", x = fileLines)) - isMZ <- which(grepl(pattern = "^PRECURSORMZ:", x = fileLines)) - isMz <- which(grepl(pattern = "^precursor m/z:", x = fileLines)) - isTotEm <- which(grepl(pattern = "^total exact mass:", x = fileLines)) - isEMass <- which(grepl(pattern = "^exact mass:", x = fileLines)) - isMetN <- which(grepl(pattern = "^METABOLITENAME:", x = fileLines)) - isADDN <- which(grepl(pattern = "^ADDUCTIONNAME:", x = fileLines)) - isAddN <- which(grepl(pattern = "^Adductionname:", x = fileLines)) - isScanN <- which(grepl(pattern = "^SCANNUMBER:", x = fileLines)) - isMIon <- which(grepl(pattern = "^MODELION:", x = fileLines)) - isPrety <- which(grepl(pattern = "^precursor type:", x = fileLines)) - isNumP <- which(grepl(pattern = "^Num Peaks:", x = fileLines)) - isPeak <- which(grepl(pattern = "^\\d+(\\.\\d+)?[ \t]\\d+(\\.\\d+)?$", x = fileLines)) - isCoCl <- which(grepl(pattern = "^compound class:", x = fileLines)) - isInty <- which(grepl(pattern = "^instrument type:", x = fileLines)) - isInchi <- which(grepl(pattern = "^InChI:", x = fileLines)) - isInchiKey <- which(grepl(pattern = "^InChIKey:", x = fileLines)) - isSmiles <- which(grepl(pattern = "^SMILES:", x = fileLines)) - - isAnyName <- length(isName ) > 0 - isAnyNAME <- length(isNAME ) > 0 - isAnyRT <- length(isRT ) > 0 - isAnyRt <- length(isRt ) > 0 - isAnyMZ <- length(isMZ ) > 0 - isAnyMz <- length(isMz ) > 0 - isAnyTotEm <- length(isTotEm ) > 0 - isAnyEMass <- length(isEMass ) > 0 - isAnyMetN <- length(isMetN ) > 0 - isAnyADDN <- length(isADDN ) > 0 - isAnyAddN <- length(isAddN ) > 0 - isAnyScanN <- length(isScanN ) > 0 - isAnyMIon <- length(isMIon ) > 0 - isAnyPrety <- length(isPrety ) > 0 - isAnyNumP <- length(isNumP ) > 0 - isAnyPeak <- length(isPeak ) > 0 - isAnyCoCl <- length(isCoCl ) > 0 - isAnyInty <- length(isInty ) > 0 - isAnyInchi <- length(isInchi ) > 0 - isAnyInchiKey <- length(isInchiKey) > 0 - isAnySmiles <- length(isSmiles ) > 0 - - - ## extract - suppressWarnings({ - parsedNAME <- trimws(substring(text = fileLines[isNAME], first = nchar("NAME:") + 1)) - parsedName <- trimws(substring(text = fileLines[isName], first = nchar("Name:") + 1)) - parsedRT <- as.numeric( trimws(substring(text = fileLines[isRT], first = nchar("RETENTIONTIME:") + 1))) - parsedRt <- as.numeric(unlist(lapply(X = strsplit(x = trimws(substring(text = fileLines[isRt], first = nchar("retention time:") + 1)), split = " "), FUN = function(x){if(length(x)==0) return(NA) else return(x[[1]])}))) - parsedMZ <- as.numeric( trimws(substring(text = fileLines[isMZ], first = nchar("PRECURSORMZ:") + 1))) - parsedMz <- as.numeric( trimws(substring(text = fileLines[isMz], first = nchar("precursor m/z:") + 1))) - parsedTotEm <- as.numeric( trimws(substring(text = fileLines[isTotEm], first = nchar("total exact mass:") + 1))) - parsedEMass <- as.numeric( trimws(substring(text = fileLines[isEMass], first = nchar("exact mass:") + 1))) - parsedMetN <- trimws(substring(text = fileLines[isMetN], first = nchar("METABOLITENAME:") + 1)) - parsedADDN <- trimws(substring(text = fileLines[isADDN], first = nchar("ADDUCTIONNAME:") + 1)) - parsedAddN <- trimws(substring(text = fileLines[isAddN], first = nchar("Adductionname:") + 1)) - parsedScanN <- as.numeric( trimws(substring(text = fileLines[isScanN], first = nchar("SCANNUMBER:") + 1))) - parsedMIon <- as.numeric( trimws(substring(text = fileLines[isMIon], first = nchar("MODELION:") + 1))) - parsedPrety <- trimws(substring(text = fileLines[isPrety], first = nchar("precursor type:") + 1)) - parsedNumP <- as.numeric( trimws(substring(text = fileLines[isNumP], first = nchar("Num Peaks:") + 1))) - - parsedTokensTmp <- strsplit(x = trimws(fileLines[isPeak]), split = "[ \t]") - #parsedms2Peaks_mz <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){head(x = x, n = 1)}))) - parsedms2Peaks_mz <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){if(length(x)<1) return(NA) else return(x[[1]])}))) - parsedms2Peaks_int <- as.numeric(unlist(lapply(X = parsedTokensTmp, FUN = function(x){if(length(x)<2) return(NA) else return(x[[2]])}))) - - parsedCoCl <- trimws(substring(text = fileLines[isCoCl], first = nchar("compound class:") + 1)) - parsedInty <- trimws(substring(text = fileLines[isInty], first = nchar("instrument type:") + 1)) - parsedInchi <- trimws(substring(text = fileLines[isInchi], first = nchar("InChI:") + 1)) - parsedInchiKey <- trimws(substring(text = fileLines[isInchiKey], first = nchar("InChIKey:") + 1)) - parsedSmiles <- trimws(substring(text = fileLines[isSmiles], first = nchar("SMILES:") + 1)) - }) - - numberOfFileLines <- length(fileLines) - fileLines <- NULL - - ## entry line intervals in file - entryBorders <- c(which(isName3 | isNAME3), numberOfFileLines+1) - #entryBorders <- c(sort(unique(union(isName, isNAME))), numberOfFileLines) - entryIntervals <- matrix(data = unlist(lapply(X = seq_len(length(entryBorders) - 1), FUN = function(x){c(entryBorders[[x]], entryBorders[[x+1]] - 1)})), nrow=2) - - ## do it - if(progress) incProgress(amount = 0, detail = "MS/MS file: Assemble spectra") else print("MS/MS file: Assemble spectra") - suppressWarnings( - spectraList <- apply(X = entryIntervals, MARGIN = 2, FUN = function(x){ - - if(FALSE){ - if(isAnyName ) parsedName2 <- parsedName [[which(isName >= x[[1]])[[1]]]] else parsedName2 <- NULL - if(isAnyNAME ) parsedNAME2 <- parsedNAME [[which(isNAME >= x[[1]])[[1]]]] else parsedNAME2 <- NULL - if(isAnyRT ) parsedRT2 <- parsedRT [[which(isRT >= x[[1]])[[1]]]] else parsedRT2 <- NULL - if(isAnyRt ) parsedRt2 <- parsedRt [[which(isRt >= x[[1]])[[1]]]] else parsedRt2 <- NULL - if(isAnyMZ ) parsedMZ2 <- parsedMZ [[which(isMZ >= x[[1]])[[1]]]] else parsedMZ2 <- NULL - if(isAnyMz ) parsedMz2 <- parsedMz [[which(isMz >= x[[1]])[[1]]]] else parsedMz2 <- NULL - if(isAnyTotEm ) parsedTotEm2 <- parsedTotEm [[which(isTotEm >= x[[1]])[[1]]]] else parsedTotEm2 <- NULL - if(isAnyEMass ) parsedEMass2 <- parsedEMass [[which(isEMass >= x[[1]])[[1]]]] else parsedEMass2 <- NULL - if(isAnyMetN ) parsedMetN2 <- parsedMetN [[which(isMetN >= x[[1]])[[1]]]] else parsedMetN2 <- NULL - if(isAnyADDN ) parsedADDN2 <- parsedADDN [[which(isADDN >= x[[1]])[[1]]]] else parsedADDN2 <- NULL - if(isAnyAddN ) parsedAddN2 <- parsedAddN [[which(isAddN >= x[[1]])[[1]]]] else parsedAddN2 <- NULL - if(isAnyScanN ) parsedScanN2 <- parsedScanN [[which(isScanN >= x[[1]])[[1]]]] else parsedScanN2 <- NULL - if(isAnyMIon ) parsedMIon2 <- parsedMIon [[which(isMIon >= x[[1]])[[1]]]] else parsedMIon2 <- NULL - if(isAnyPrety ) parsedPrety2 <- parsedPrety [[which(isPrety >= x[[1]])[[1]]]] else parsedPrety2 <- NULL - if(isAnyNumP ) parsedNumP2 <- parsedNumP [[which(isNumP >= x[[1]])[[1]]]] else parsedNumP2 <- NULL - if(isAnyCoCl ) parsedCoCl2 <- parsedCoCl [[which(isCoCl >= x[[1]])[[1]]]] else parsedCoCl2 <- NULL - if(isAnyInty ) parsedInty2 <- parsedInty [[which(isInty >= x[[1]])[[1]]]] else parsedInty2 <- NULL - if(isAnyInchi ) parsedInchi2 <- parsedInchi [[which(isInchi >= x[[1]])[[1]]]] else parsedInchi2 <- NULL - if(isAnyInchiKey) parsedInchiKey2 <- parsedInchiKey [[which(isInchiKey >= x[[1]])[[1]]]]else parsedInchiKey2 <- NULL - if(isAnySmiles ) parsedSmiles2 <- parsedSmiles [[which(isSmiles >= x[[1]])[[1]]]] else parsedSmiles2 <- NULL - } - - if(isAnyName ) parsedName2 <- parsedName [which(isName >= x[[1]] & isName < x[[2]])] else parsedName2 <- NULL - if(isAnyNAME ) parsedNAME2 <- parsedNAME [which(isNAME >= x[[1]] & isNAME < x[[2]])] else parsedNAME2 <- NULL - if(isAnyRT ) parsedRT2 <- parsedRT [which(isRT >= x[[1]] & isRT < x[[2]])] else parsedRT2 <- NULL - if(isAnyRt ) parsedRt2 <- parsedRt [which(isRt >= x[[1]] & isRt < x[[2]])] else parsedRt2 <- NULL - if(isAnyMZ ) parsedMZ2 <- parsedMZ [which(isMZ >= x[[1]] & isMZ < x[[2]])] else parsedMZ2 <- NULL - if(isAnyMz ) parsedMz2 <- parsedMz [which(isMz >= x[[1]] & isMz < x[[2]])] else parsedMz2 <- NULL - if(isAnyTotEm ) parsedTotEm2 <- parsedTotEm [which(isTotEm >= x[[1]] & isTotEm < x[[2]])] else parsedTotEm2 <- NULL - if(isAnyEMass ) parsedEMass2 <- parsedEMass [which(isEMass >= x[[1]] & isEMass < x[[2]])] else parsedEMass2 <- NULL - if(isAnyMetN ) parsedMetN2 <- parsedMetN [which(isMetN >= x[[1]] & isMetN < x[[2]])] else parsedMetN2 <- NULL - if(isAnyADDN ) parsedADDN2 <- parsedADDN [which(isADDN >= x[[1]] & isADDN < x[[2]])] else parsedADDN2 <- NULL - if(isAnyAddN ) parsedAddN2 <- parsedAddN [which(isAddN >= x[[1]] & isAddN < x[[2]])] else parsedAddN2 <- NULL - if(isAnyScanN ) parsedScanN2 <- parsedScanN [which(isScanN >= x[[1]] & isScanN < x[[2]])] else parsedScanN2 <- NULL - if(isAnyMIon ) parsedMIon2 <- parsedMIon [which(isMIon >= x[[1]] & isMIon < x[[2]])] else parsedMIon2 <- NULL - if(isAnyPrety ) parsedPrety2 <- parsedPrety [which(isPrety >= x[[1]] & isPrety < x[[2]])] else parsedPrety2 <- NULL - if(isAnyNumP ) parsedNumP2 <- parsedNumP [which(isNumP >= x[[1]] & isNumP < x[[2]])] else parsedNumP2 <- NULL - if(isAnyCoCl ) parsedCoCl2 <- parsedCoCl [which(isCoCl >= x[[1]] & isCoCl < x[[2]])] else parsedCoCl2 <- NULL - if(isAnyInty ) parsedInty2 <- parsedInty [which(isInty >= x[[1]] & isInty < x[[2]])] else parsedInty2 <- NULL - if(isAnyInchi ) parsedInchi2 <- parsedInchi [which(isInchi >= x[[1]] & isInchi < x[[2]])] else parsedInchi2 <- NULL - if(isAnyInchiKey) parsedInchiKey2 <- parsedInchiKey [which(isInchiKey >= x[[1]] & isInchiKey< x[[2]])] else parsedInchiKey2 <- NULL - if(isAnySmiles ) parsedSmiles2 <- parsedSmiles [which(isSmiles >= x[[1]] & isSmiles < x[[2]])] else parsedSmiles2 <- NULL - - parsedms2Peaks_mz2 = parsedms2Peaks_mz [which(isPeak >= x[[1]] & isPeak <= x[[2]]) ] - parsedms2Peaks_int2 = parsedms2Peaks_int[which(isPeak >= x[[1]] & isPeak <= x[[2]]) ] - - - name <- NULL - rt <- NULL - mz <- NULL - metName <- "Unknown" - adduct <- "Unknown" - scanNumber <- NA - quantMass <- NA - peakNumber <- NA - ms2Peaks_mz <- vector(mode = "numeric") - ms2Peaks_int <- vector(mode = "numeric") - compoundClass <- "Unknown" - instrumentType <- "Unknown" - inchi <- "" - inchiKey <- "" - smiles <- "" - - if(length(parsedName2) != 0) - ## name - name <- parsedName2 - if(length(parsedNAME2) != 0 & !is.null(name)) - ## name - name <- parsedNAME2 - if(length(parsedRT2) != 0) - ## retention time - rt <- parsedRT2 - if(length(parsedRt2) != 0 & any(is.null(rt), is.na(rt))) - rt <- parsedRt2 - if(length(parsedMZ2) != 0) - ## precursor m/z - mz <- parsedMZ2 - if(length(parsedMz2) != 0 & any(is.null(mz), is.na(mz), mz%%1==0)) - mz <- parsedMz2 - if(length(parsedTotEm2) != 0 & any(is.null(mz), is.na(mz), mz%%1==0) & length(parsedPrety2) != 0){ - mzTmp <- parsedTotEm2 - pit <- parsedPrety2 - switch(pit, - "[M-H]-" = { mz <- mzTmp - 1.008 }, - "[M-H]" = { mz <- mzTmp - 1.008 }, - "[M+H]+" = { mz <- mzTmp + 1.008 }, - "[M+H]" = { mz <- mzTmp + 1.008 }, - "[M+Na]+"= { mz <- mzTmp + 22.9898 }, - "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) - ) - } - if(length(parsedEMass2) != 0 & any(is.null(mz), is.na(mz), mz%%1==0) & length(parsedPrety2) != 0){ - mzTmp <- parsedEMass2 - pit <- parsedPrety2 - switch(pit, - "[M-H]-" = { mz <- mzTmp - 1.008 }, - "[M-H]" = { mz <- mzTmp - 1.008 }, - "[M+H]+" = { mz <- mzTmp + 1.008 }, - "[M+H]" = { mz <- mzTmp + 1.008 }, - "[M+Na]+"= { mz <- mzTmp + 22.9898 }, - "[M+Na]" = { mz <- mzTmp + 22.9898 }#, - #stop(paste("Unknown precursor ion type, pit, ")!", sep = "")) - ) + tmp <- as.numeric(strsplit(x = trimws(substring(text = fileLines_022[[which(isPMass2)[[1]]]], first = nchar("PEPMASS=") + 1)), split = "[\t ]")[[1]]) + mz <- tmp[[1]] + if(length(tmp) > 1) + ms1Int <- tmp[[2]] + } + } + if(any(isE2Mass2) & any(is.null(mz), is.na(mz), ifelse(test = is.null(mz), yes = FALSE, no = mz%%1==0))){ + mz <- parsedE2Mass2[[which(isE2Mass2)[[1]]]] } - if(length(parsedMetN2) != 0) - metName <- parsedMetN2 - if(length(parsedADDN2) != 0) + if(any(isMetN2)) + metName <- parsedMetN2 [[which(isMetN2)[[1]]]] + if(any(isAddN2) & adduct == "Unknown") ## adduct - adduct <- parsedADDN2 - if(length(parsedAddN2) != 0 & adduct == "Unknown") - adduct <- parsedAddN2 - if(length(parsedPrety2) != 0 & adduct == "Unknown") - adduct <- parsedPrety2 - if(length(parsedScanN2) != 0) - scanNumber <- parsedScanN2 - if(length(parsedMIon2) != 0) - quantMass <- parsedMIon2 - if(length(parsedCoCl2) != 0) + adduct <- parsedAddN2 [[which(isAddN2)[[1]]]] + if(any(isPrety2) & adduct == "Unknown") + adduct <- parsedPrety2[[which(isPrety2)[[1]]]] + if(any(isPretyp2) & adduct == "Unknown") + adduct <- parsedPretyp2[[which(isPretyp2)[[1]]]] + if(any(isScanN2)) + scanNumber <- parsedScanN2[[which(isScanN2)[[1]]]] + if(any(isMIon2)) + quantMass <- parsedMIon2 [[which(isMIon2)[[1]]]] + if(any(isNumP2)) + ## #peaks + peakNumber <- parsedNumP2 [[which(isNumP2)[[1]]]] + if(any(isPeak2)){ + ## MS2 peaks: "178.88669\t230" + ms2Peaks_mz <- parsedms2Peaks_mz2 [which(isPeak2)] + ms2Peaks_int <- parsedms2Peaks_int2[which(isPeak2)] + } + if(any(isCoCl2)) ## compound class - compoundClass <- parsedCoCl2 - if(length(parsedInty2) != 0) + compoundClass <- parsedCoCl2 [[which(isCoCl2)[[1]]]] + if(any(isInty2)) + ## instrument type + instrumentType <- parsedInty2 [[which(isInty2)[[1]]]] + if(any(isIntype2)) + ## instrument type + instrumentType <- parsedIntype2 [[which(isIntype2)[[1]]]] + if(any(isIntyp2)) ## instrument type - instrumentType <- parsedInty2 - if(length(parsedInchi2) != 0) + instrumentType <- parsedIntyp2 [[which(isIntyp2)[[1]]]] + if(all(any(isInt2), instrumentType %in% c("Unknown", "NA"))) + ## instrument + instrumentType <- parsedInt2 [[which(isInt2)[[1]]]] + if(any(isInchi2)) ## structure - inchi <- parsedInchi2 - if(length(parsedInchiKey2) != 0) + inchi <- parsedInchi2 [[which(isInchi2)[[1]]]] + if(any(isInchiKey2)) ## structure - inchiKey <- parsedInchiKey2 - if(length(parsedSmiles2) != 0) + inchiKey <- parsedInchiKey2 [[which(isInchiKey2)[[1]]]] + if(any(isSmiles2)) ## structure - smiles <- parsedSmiles2 - if(length(parsedNumP2) != 0) - ## #peaks - peakNumber <- parsedNumP2 - if(length(parsedms2Peaks_mz2) != 0){ - ## MS2 peaks: "178.88669\t230" - ms2Peaks_mz <- parsedms2Peaks_mz2 - ms2Peaks_int <- parsedms2Peaks_int2 - } - - ## end of entry + smiles <- parsedSmiles2 [[which(isSmiles2)[[1]]]] + ## end of parsing if(is.null(rt)) rt <- 0 - #if(is.null(mz)) - # ## in case of gc - # mz <- max(ms2Peaks_mz) ms2Peaks_mz_original <- ms2Peaks_mz ms2Peaks_int_original <- ms2Peaks_int + numberOfMS2PeaksOriginal <<- numberOfMS2PeaksOriginal + length(ms2Peaks_mz) + if(length(ms2Peaks_mz) == 0) + numberOfSpectraDiscardedDueToNoPeaks <<- numberOfSpectraDiscardedDueToNoPeaks + 1 + ################################################################### ## filter fragments with mass greater than precursor - if(!is.null(mz)){ + numberOfTooHeavyFragmentsHere <- 0 + if(all(!is.null(mz), !is.na(mz))){ tooHeavy <- ms2Peaks_mz > mz ms2Peaks_mz <- ms2Peaks_mz [!tooHeavy] ms2Peaks_int <- ms2Peaks_int[!tooHeavy] + numberOfTooHeavyFragmentsHere <- sum(tooHeavy) + + if(length(ms2Peaks_mz) == 0 & numberOfTooHeavyFragmentsHere > 0) + numberOfSpectraDiscardedDueToTooHeavy <<- numberOfSpectraDiscardedDueToTooHeavy + 1 } + numberOfTooHeavyFragments <<- numberOfTooHeavyFragments + numberOfTooHeavyFragmentsHere ################################################################### ## filter for ms2 peak intensity @@ -2482,9 +635,12 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum ms2Peaks_mz <- ms2Peaks_mz [fragmentsAboveThreshold] ms2Peaks_int <- ms2Peaks_int[fragmentsAboveThreshold] - numberOfMS2PeaksAboveThreshold <- numberOfMS2PeaksAboveThreshold + length(ms2Peaks_mz) + numberOfMS2PeaksAboveThreshold <<- numberOfMS2PeaksAboveThreshold + sum( fragmentsAboveThreshold) + numberOfMS2PeaksBelowThreshold <<- numberOfMS2PeaksBelowThreshold + sum(!fragmentsAboveThreshold) } else { ## spectrum is not considered + numberOfMS2PeaksBelowThreshold <<- numberOfMS2PeaksBelowThreshold + length(ms2Peaks_mz) + numberOfSpectraDiscardedDueToMaxIntensity <<- numberOfSpectraDiscardedDueToMaxIntensity + 1 ms2Peaks_mz <- vector(mode = "numeric") ms2Peaks_int <- vector(mode = "numeric") } @@ -2503,7 +659,7 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum if(peakNumber > 0){ ################################# ## neutral losses regarding the precursor - if(all(!is.null(mz), neutralLossesPrecursorToFragments)){ + if(all(!is.null(mz), !is.na(mz), neutralLossesPrecursorToFragments)){ ms2PeaksNLPF_mz <- ms2Peaks_mz - as.numeric(mz) ms2PeaksNLPF_int <- ms2Peaks_int } else { @@ -2530,7 +686,7 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum ################################################################### ## precursor mz #mz <- ifelse(test = !is.null(mz), yes = round(as.numeric(mz), digits = 4), no = ifelse(test = !is.null(scanNumber), yes = scanNumber, no = max(ms2Peaks_mz))) - if(!is.null(mz)){ + if(all(!is.null(mz), !is.na(mz))){ mz <- round(as.numeric(mz), digits = 4) } else { if(!is.na(quantMass)){ @@ -2552,6 +708,7 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum ## built ms set spectrumItem <- list( name = name, + ms1Int = ms1Int, #rt = round(as.numeric(rt), digits = 2), rt = rt, mz = mz, @@ -2567,16 +724,17 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum peakNumber = length(ms2Peaks_mz), ms2Peaks_mz = ms2Peaks_mz, ms2Peaks_int = ms2Peaks_int, - spectrumString = spectrumString + spectrumString = spectrumString, + entryInterval = x + offset ) if(spectrumItem$peakNumber > 0){ ## add - numberOfMS2PeaksWithNeutralLosses <- numberOfMS2PeaksWithNeutralLosses + spectrumItem$peakNumber + numberOfMS2PeaksWithNeutralLosses <<- numberOfMS2PeaksWithNeutralLosses + spectrumItem$peakNumber return(spectrumItem) } else return(NULL) }) - )## suppressWarnings + ) rm( isName, @@ -2587,8 +745,9 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum isMz, isTotEm, isEMass, + isE2Mass, + isPMass, isMetN, - isADDN, isAddN, isScanN, isMIon, @@ -2600,16 +759,17 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum isInchi, isInchiKey, isSmiles, - parsedNAME, parsedName, + parsedNAME, parsedRT, parsedRt, parsedMZ, parsedMz, parsedTotEm, parsedEMass, + parsedE2Mass, + parsedPMass, parsedMetN, - parsedADDN, parsedAddN, parsedScanN, parsedMIon, @@ -2625,7 +785,7 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum parsedSmiles ) - if(progress) incProgress(amount = 0.1, detail = "MS/MS file: Box") else print("MS/MS file: Box") + if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "MS/MS file: Box") else print("MS/MS file: Box") ## remove NULL entries? spectraList[unlist(lapply(X = spectraList, FUN = is.null))] <- NULL @@ -2634,13 +794,6 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum ## postprocess if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file postprocessing", sep = "")) else print(paste("MS/MS file postprocessing", sep = "")) - #precursorMz <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorMz[[spectrumIdx]] <- spectraList[[spectrumIdx]]$mz - - #precursorRt <- vector(mode = "numeric", length = numberOfSpectra) - #for(spectrumIdx in seq_len(length.out = numberOfSpectra)) - # precursorRt[[spectrumIdx]] <- spectraList[[spectrumIdx]]$rt precursorMz <- unlist(lapply(X = spectraList, FUN = function(x){ if(is.null(x)) return(NULL) @@ -2653,39 +806,187 @@ parseMSPbig2_ <- function(fileSpectra, minimumIntensityOfMaximalMS2peak, minimum if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file boxing", sep = "")) else print(paste("MS/MS file boxing", sep = "")) returnObj <- list() - returnObj$fileSpectra <- fileSpectra + returnObj$fileSpectra <- NA returnObj$spectraList <- spectraList returnObj$numberOfSpectra <- numberOfSpectra + returnObj$numberOfSpectraOriginal <- numberOfSpectraOriginal returnObj$numberOfMS2PeaksOriginal <- numberOfMS2PeaksOriginal returnObj$numberOfMS2PeaksWithNeutralLosses <- numberOfMS2PeaksWithNeutralLosses returnObj$numberOfMS2PeaksAboveThreshold <- numberOfMS2PeaksAboveThreshold returnObj$numberOfMS2PeaksBelowThreshold <- numberOfMS2PeaksBelowThreshold + returnObj$numberOfTooHeavyFragments <- numberOfTooHeavyFragments + returnObj$numberOfSpectraDiscardedDueToNoPeaks <- numberOfSpectraDiscardedDueToNoPeaks + returnObj$numberOfSpectraDiscardedDueToMaxIntensity <- numberOfSpectraDiscardedDueToMaxIntensity + returnObj$numberOfSpectraDiscardedDueToTooHeavy <- numberOfSpectraDiscardedDueToTooHeavy returnObj$precursorMz <- precursorMz returnObj$precursorRt <- precursorRt return(returnObj) } +parseMSP_attributes <- function(fileSpectra, progress = FALSE, flexiblePeakList = FALSE, multiplePeaksPerLine = FALSE, includeIDasRecordSeparator=TRUE, includeNAMEasRecordSeparator=TRUE, includeTITLEasRecordSeparator=TRUE, returnEmptySpectra = FALSE){ + fileLines <- readLines(con = fileSpectra) + + if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "MS/MS file: Read file") else print("MS/MS file: Read file") + + numberOfFileLines <- length(fileLines) + + ## start with empty lines or not? + endOfRecord <- TRUE + if(numberOfFileLines > 0) + if(nchar(trimws(fileLines[[1]])) > 0) + endOfRecord <- FALSE + + ## check for pattern + if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "MS/MS file: Parse") else print("MS/MS file: Parse") + isID <- grepl(pattern = "^ID:", x = fileLines) + isBI <- grepl(pattern = "^BEGIN IONS$", x = fileLines) + isName <- grepl(pattern = "(^Name:)|(^NAME:)", x = fileLines) + isNAme <- grepl(pattern = "^NAME=", x = fileLines) + isTITLE <- grepl(pattern = "^TITLE=", x = fileLines) + isAccession <- grepl(pattern = "^ACCESSION:", x = fileLines) + + if(!includeIDasRecordSeparator) isID <- rep(x = F, times = length(isID)) + if(!includeNAMEasRecordSeparator){ + isName <- rep(x = F, times = length(isName)) + isNAme <- rep(x = F, times = length(isNAme)) + } + if(!includeTITLEasRecordSeparator) isTITLE <- rep(x = F, times = length(isTITLE)) + + numberSmall <- "\\d+(\\.\\d+)?" + numberBig <- "\\d+(\\.\\d+(E\\d+)?)?" + mzValueRegEx <- "(\\d+(\\.\\d+)?)" + intensityRegex <- "(\\d+((\\.\\d+)?([eE](-)?\\d+)?)?)" + annotationRegex <- "\".+\"" + if(flexiblePeakList){ + isPeak <- grepl(pattern = "^[ \t]*\\d+(\\.\\d+([eE](-)?\\d+)?)?([ \t]\\d+(\\.\\d+([eE](-)?\\d+)?)?)*[ \t]*$", x = fileLines) + } else { + isPeak <- grepl(pattern = paste("^[ \t]*", mzValueRegEx, "[ \t]", intensityRegex, "([ \t]+((", mzValueRegEx, "[ \t]", intensityRegex, ")|(", annotationRegex, ")))*[ \t]*$", sep = ""), x = fileLines) + } + isEmpty <- nchar(trimws(fileLines)) == 0 + + tagVector <- unlist(lapply(X = str_split(string = fileLines, pattern = "(:)|(=)"), FUN = function(x){x[[1]]})) + valueVector <- trimws(substr(x = fileLines, start = nchar(tagVector) + 1 + 1, stop = nchar(fileLines))) + + ## entry line intervals in file + entryBorders <- c(which(isName | isNAme | isTITLE | isBI | isID | isAccession), length(fileLines)+1) + entryIntervals <- matrix(data = unlist(lapply(X = seq_len(length(entryBorders) - 1), FUN = function(x){c(entryBorders[[x]], entryBorders[[x+1]] - 1)})), nrow=2) + + ## do it + if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = "MS/MS file: Assemble spectra") else print("MS/MS file: Assemble spectra") + spectraList <- apply(X = entryIntervals, MARGIN = 2, FUN = function(x){ + #print(x) + fileLines2 <- fileLines [x[[1]]:x[[2]]] + isPeak2 <- isPeak [x[[1]]:x[[2]]] + isEmpty2 <- isEmpty [x[[1]]:x[[2]]] + + tagVector2 <- tagVector [x[[1]]:x[[2]]] + valueVector2 <- valueVector[x[[1]]:x[[2]]] + + ################################################################### + ## built ms set + spectrumItem <- list() + spectrumItem[tagVector2[!isPeak2 & !isEmpty2]] <- trimws(valueVector2[!isPeak2 & !isEmpty2]) + + if(!is.null(spectrumItem$"Num Peaks")) + if(spectrumItem$"Num Peaks" == "0" & !returnEmptySpectra) + return(NULL) + + peakLines <- fileLines2[isPeak2] + peakLines <- trimws(gsub(x = peakLines, pattern = "\".*\"", replacement = "")) + + if(multiplePeaksPerLine){ + peakLines <- unlist(strsplit(x = peakLines, split = "[ \t]")) + } else { + peakLines <- unlist(lapply(X = strsplit(x = peakLines, split = "[ \t]"), FUN = function(peaktokens){peaktokens[1:2]})) + } + spectrumItem["peaks"] <- paste(peakLines, collapse = " ") + spectrumItem["peaks"] <- trimws(gsub(x = spectrumItem["peaks"], pattern = " ", replacement = " ")) + + ## check peaks + tokens <- strsplit(x = spectrumItem[["peaks"]], split = "[ \t]")[[1]] + if(length(tokens) == 0){#spectrumItem$"Num Peaks" == "0"){ + mzs <- character(0) + ints <- character(0) + warning(paste(basename(fileSpectra), ": Empty peak list for [", paste(x, collapse = ","), "]", sep = "")) + } else { + mzs <- tokens[seq(from=1, to=length(tokens), by = 2)] + ints <- tokens[seq(from=2, to=length(tokens), by = 2)] + } + if(length(mzs) != length(ints)) stop("error in parsing peaks") + + ## handle duplicated tags + duplicatedTags <- unique(names(spectrumItem)[duplicated(names(spectrumItem))]) + if(length(duplicatedTags) > 0){ + duplicated <- sapply(X = duplicatedTags, FUN = function(x){ + unlist(sapply(X = seq_along(spectrumItem), FUN = function(y){ if(names(spectrumItem[y])==x) return(y) })) + }, simplify = F) + + indecesToRemove <- vector(mode = "integer", length = 0) + for(idx in seq_along(duplicated)){ + indeces <- duplicated[[idx]] + representant <- indeces[[1]] + indecesToRemoveHere <- indeces[-1] + + spectrumItem[[representant]] <- paste(spectrumItem[indeces], sep = "; ") + indecesToRemove <- c(indecesToRemove, indecesToRemoveHere) + } + spectrumItem <- spectrumItem[-indecesToRemove] + } + + return(spectrumItem) + }) + + rm( + isName, + isNAme, + isTITLE, + isBI, + isID, + isPeak, + tagVector, + valueVector + ) + + if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "MS/MS file: Box") else print("MS/MS file: Box") + + ## remove NULL entries? + spectraList[unlist(lapply(X = spectraList, FUN = is.null))] <- NULL + + numberOfSpectra <- length(spectraList) + + ## postprocess + if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file postprocessing", sep = "")) else print(paste("MS/MS file postprocessing", sep = "")) + + if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("MS/MS file boxing", sep = "")) else print(paste("MS/MS file boxing", sep = "")) + returnObj <- list() + returnObj$fileSpectra <- fileSpectra + returnObj$spectraList <- spectraList + returnObj$numberOfSpectra <- numberOfSpectra + + return(returnObj) +} + + + + + #################################################################################### ## built matrix -builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationInPPM_grouping, doMs2PeakGroupDeisotoping, mzDeviationAbsolute_ms2PeakGroupDeisotoping, mzDeviationInPPM_ms2PeakGroupDeisotoping, proportionOfMatchingPeaks_ms2PeakGroupDeisotoping, progress = FALSE){ +builtMatrix <- function(spectraList, + mzDeviationAbsolute_grouping, + mzDeviationInPPM_grouping, + doMs2PeakGroupDeisotoping, + mzDeviationAbsolute_ms2PeakGroupDeisotoping, + mzDeviationInPPM_ms2PeakGroupDeisotoping, + proportionOfMatchingPeaks_ms2PeakGroupDeisotoping, + progress = FALSE) +{ if(!is.na(progress)) if(progress) incProgress(amount = 0.005, detail = paste("Fragment grouping preprocessing...", sep = "")) else print(paste("Fragment grouping preprocessing...", sep = "")) numberOfSpectra <- length(spectraList) - # spectrumItem <- list( - # name = name, - # rt = round(as.numeric(rt), digits = 2), - # mz = round(as.numeric(mz), digits = 4), - # metName = metName, - # adduct = adduct, - # #peakNumber = as.numeric(peakNumber), - # peakNumber = length(ms2Peaks_mz), - # ms2Peaks_mz = ms2Peaks_mz, - # ms2Peaks_int = ms2Peaks_int - # ) - fragment_mz <- as.vector(unlist(lapply(X = spectraList, FUN = function(x){x$ms2Peaks_mz }))) fragment_int <- as.vector(unlist(lapply(X = spectraList, FUN = function(x){x$ms2Peaks_int}))) fragment_spec <- as.vector(unlist(lapply(X = spectraList, FUN = function(x){x$peakNumber }))) @@ -2696,7 +997,6 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Fragment grouping", sep = "")) else print(paste("Fragment grouping", sep = "")) startTime <- Sys.time() - #resultObj <- xcms:::mzClustGeneric( resultObj <- mzClustGeneric( p = matrix(data = c(fragment_mz, fragment_spec), nrow = numberOfMS2Peaks, ncol = 2), sampclass = NULL, @@ -2729,7 +1029,6 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn matrixVals <- c(matrixVals, fragment_int[groupMembers]) } - #rm(ms2PeakGroupList) numberOfCollisions <- sum(duplicated(cbind(matrixRows, matrixCols))) endTime <- Sys.time() if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Fragment group postprocessing ready (", difftime(time1 = endTime, time2 = startTime, units = "secs"), "s)", sep = "")) else print(paste("Fragment group postprocessing ready (", difftime(time1 = endTime, time2 = startTime, units = "secs"), "s)", sep = "")) @@ -2755,11 +1054,6 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn if(!is.na(progress)) if(progress) incProgress(amount = 0, detail = paste("Boxing to matrix", sep = "")) else print(paste("Boxing to matrix", sep = "")) matrix <- sparseMatrix(i = matrixRows, j = matrixCols, x = matrixVals, dims = c(numberOfSpectra, numberOfMS2PeakGroups)) - #rm(matrixRows) - #rm(matrixCols) - #rm(matrixVals) - #gc() - orderTempCol <- order(fragmentMasses) matrix <- matrix[, orderTempCol] @@ -2782,8 +1076,7 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn if((ms2PeakGroupIdx %% (as.integer(numberOfMS2PeakGroups/10))) == 0) if(!is.na(progress)){ if(progress) incProgress(amount = 0.0, detail = paste("Fragment group deisotoping ", ms2PeakGroupIdx, " / ", numberOfMS2PeakGroups, sep = "")) else print(paste("Fragment group deisotoping ", ms2PeakGroupIdx, " / ", numberOfMS2PeakGroups, sep = "")) - #break - } + } mzError <- abs(fragmentMasses[[ms2PeakGroupIdx]] * mzDeviationInPPM_ms2PeakGroupDeisotoping / 1E6) mzError <- max(mzError, mzDeviationAbsolute_ms2PeakGroupDeisotoping) @@ -2796,9 +1089,7 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn distances <- (fragmentMasses[[ms2PeakGroupIdx]] + distance13Cminus12C) - fragmentMasses[-ms2PeakGroupIdx] } validInMz <- abs(distances) <= mzError - #validInMz1 <- abs((fragmentMasses[[ms2PeakGroupIdx]] - distance13Cminus12C) - fragmentMasses[-ms2PeakGroupIdx]) <= mzError - #validInMz2 <- abs((fragmentMasses[[ms2PeakGroupIdx]] - distance13Cminus12C * 2) - fragmentMasses[-ms2PeakGroupIdx]) <= mzError - #validInMz <- validInMz1 | validInMz2 + if(!any(validInMz)) next validInMz <- which(validInMz) @@ -2807,16 +1098,18 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn ## isotopic fragments are mainly in spectra with monoisotopic fragments fragmentIntensitiesHere <- matrix[, ms2PeakGroupIdx] - #if(TRUE) next + isotopicThere <- fragmentIntensitiesHere != 0 numberOfFragmentPeaksHere <- sum(isotopicThere) - validInOverlap <- apply(X = matrix(data = matrix[, validInMz], nrow = numberOfSpectra), MARGIN = 2, FUN = function(x){ - monoisotopicThere <- x != 0 - precursorInCommon <- isotopicThere & monoisotopicThere - validOverlap <- (sum(precursorInCommon) / sum(isotopicThere)) >= proportionOfMatchingPeaks_ms2PeakGroupDeisotoping - return(validOverlap) - }) + validInOverlap <- apply(X = matrix(data = matrix[, validInMz], + nrow = numberOfSpectra), + MARGIN = 2, FUN = function(x){ + monoisotopicThere <- x != 0 + precursorInCommon <- isotopicThere & monoisotopicThere + validOverlap <- (sum(precursorInCommon) / sum(isotopicThere)) >= proportionOfMatchingPeaks_ms2PeakGroupDeisotoping + return(validOverlap) + }) if(!any(validInOverlap)) next @@ -2825,7 +1118,6 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn ## intensity gets smaller in the isotope spectrum - #monoisotopicFragmentIntensities <- matrix[, monoisotopicFragmentColumn] monoisotopicThere <- matrix[, monoisotopicFragmentColumn] != 0 numberOfMonoisotopicFragmentPeaks <- sum(monoisotopicThere) precursorInCommon <- isotopicThere & monoisotopicThere @@ -2838,9 +1130,6 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn ms2PeakGroupsToRemove[[ms2PeakGroupIdx]] <- TRUE } - #endTime <- Sys.time() - #difftime(time1 = endTime, time2 = startTime, units = "secs") - ## remove matrix <- matrix[, !ms2PeakGroupsToRemove] @@ -2881,140 +1170,35 @@ builtMatrix <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationIn return(returnObj) } -builtMatrix_big_ <- function(spectraList, mzDeviationAbsolute_grouping, mzDeviationInPPM_grouping, doMs2PeakGroupDeisotoping, mzDeviationAbsolute_ms2PeakGroupDeisotoping, mzDeviationInPPM_ms2PeakGroupDeisotoping, proportionOfMatchingPeaks_ms2PeakGroupDeisotoping, progress = FALSE){ - if(!is.na(progress)) if(progress) incProgress(amount = 0.05, detail = paste("Fragment chunking...", sep = "")) else print(paste("Fragment chunking...", sep = "")) - - mzs <- sort(unlist(lapply(X = spectraList, FUN = function(x){x$ms2Peaks_mz}))) - mzDiff <- diff(mzs) - mzErrorPPM <- mzDiff * 1E6 / mzs[1:(length(mzs) - 1)] - - mzAbsThreshold <- parameterSet$mzDeviationAbsolute_grouping - mzPpmThreshold <- parameterSet$mzDeviationInPPM_grouping - - #mzErrorPPM[mzErrorPPM==0] <- 0.0001 - #plot(mzErrorPPM, log="y") - #segments(1,mzPpmThreshold,200000,mzPpmThreshold) - - bigMzJumps <- mzErrorPPM > mzPpmThreshold | mzDiff > mzAbsThreshold - - maximumNumberOfMzValues <- 50000 - mzThresholdsDown <- list() - mzThresholdsUp <- list() - mzCounts <- list() - - while(length(mzs) > 0){ - if(!is.na(progress)) if(progress) incProgress(amount = 0.4, detail = paste("Fragment chunking ", (length(mzThresholdsDown) + 1), "...", sep = "")) else print(paste("Fragment chunking ", (length(mzThresholdsDown) + 1), "...", sep = "")) - - if(length(mzs) <= maximumNumberOfMzValues){ - ## the end --> take all - #break - - end <- length(mzs) - } else { - ## compute next end - end <- maximumNumberOfMzValues - end <- suppressWarnings(max(which(bigMzJumps[1:end]))) - - if(any(is.infinite(end), end < maximumNumberOfMzValues / 10)){ - end <- min(which(bigMzJumps[(maximumNumberOfMzValues + 1):length(bigMzJumps)])) + maximumNumberOfMzValues - } - - #end <- end - 1 - } - - ## box threshold - mzThresholdsDown[length(mzThresholdsDown) + 1] <- mzs[[1]] - mzThresholdsUp [length(mzThresholdsUp ) + 1] <- mzs[[end]] - mzCounts [length(mzCounts ) + 1] <- end + 1 - #print(mzs[[end]]) - - ## for next iteration - if(end < length(mzs)){ - bigMzJumps <- bigMzJumps[(end + 1):length(bigMzJumps)] - mzs <- mzs [(end + 1):length(mzs )] - } else { - bigMzJumps <- logical() - mzs <- numeric() - } - } - - ## chunked mzClust - if(!is.na(progress)) if(progress) incProgress(amount = 0.4, detail = paste("Fragment group assembly...", sep = "")) else print(paste("Fragment group assembly...", sep = "")) - - numberOfSpectra <- length(spectraList) - - returnObj <- list() - returnObj$matrix <- matrix(nrow = numberOfSpectra, ncol = 0) - returnObj$numberOfSpectra <- numberOfSpectra - - returnObj$fragmentMasses <- vector(mode = "numeric", length = 0) - returnObj$numberOfCollisions <- 0 - - returnObj$numberOfMS2PeaksOriginal <- 0 - returnObj$numberOfMS2PeaksPrior <- 0 - returnObj$numberOfRemovedMS2IsotopePeaks <- 0 - returnObj$numberOfMS2PeakGroups <- 0 - returnObj$numberOfMS2PeakGroupsPrior <- 0 - returnObj$numberOfRemovedMS2PeakGroupIsotopeColumns <- 0 - - for(idx in seq_along(mzThresholds)){ - if(!is.na(progress)) if(progress) incProgress(amount = 0., detail = paste("Fragment group assembly ", (length(mzThresholdsDown) + 1), "...", sep = "")) else print(paste("Fragment group assembly ", (length(mzThresholdsDown) + 1), "...", sep = "")) - - mzThresholdDown <- mzThresholdsDown[[idx]] - mzThresholdUp <- mzThresholdsUp [[idx]] - #print(paste(idx, "[", mzThresholdDown, ", ", mzThresholdUp, "]", "-->", mzCounts[[idx]])) - - ## ms2Peaks_mz, ms2Peaks_int, peakNumber - - spectraList2 <- lapply(X = spectraList, FUN = function(x){ - these <- which(x$ms2Peaks_mz >= mzThresholdDown & x$ms2Peaks_mz <= mzThresholdUp) - ms2Peaks_mz <- x$ms2Peaks_mz [these] - ms2Peaks_int <- x$ms2Peaks_int[these] - peakNumber <- length(ms2Peaks_mz) - - spectrumList <- list( - "ms2Peaks_mz" = ms2Peaks_mz, "ms2Peaks_int" = ms2Peaks_int, "peakNumber" = peakNumber - ) - return(spectrumList) - }) - - # plot(sort(unlist(lapply(spectraList2, function(x){x$peakNumber})))) - - returnObj2 <- builtMatrix( - spectraList = spectraList2, - mzDeviationAbsolute_grouping = parameterSet$mzDeviationAbsolute_grouping, - mzDeviationInPPM_grouping = parameterSet$mzDeviationInPPM_grouping, - doMs2PeakGroupDeisotoping = parameterSet$doMs2PeakGroupDeisotoping, - mzDeviationAbsolute_ms2PeakGroupDeisotoping = parameterSet$mzDeviationAbsolute_ms2PeakGroupDeisotoping, - mzDeviationInPPM_ms2PeakGroupDeisotoping = parameterSet$mzDeviationInPPM_ms2PeakGroupDeisotoping, - proportionOfMatchingPeaks_ms2PeakGroupDeisotoping = parameterSet$proportionOfMatchingPeaks_ms2PeakGroupDeisotoping, - progress = NA - ) - - returnObj$matrix <- cbind(returnObj$matrix, returnObj2$matrix) - #returnObj$numberOfSpectra - returnObj$fragmentMasses <- c( returnObj$fragmentMasses, returnObj2$fragmentMasses) - returnObj$numberOfCollisions <- returnObj$numberOfCollisions + returnObj2$numberOfCollisions - - returnObj$numberOfMS2PeaksOriginal <- returnObj$numberOfMS2PeaksOriginal + returnObj2$numberOfMS2PeaksOriginal - returnObj$numberOfMS2PeaksPrior <- returnObj$numberOfMS2PeaksPrior + returnObj2$numberOfMS2PeaksPrior - returnObj$numberOfRemovedMS2IsotopePeaks <- returnObj$numberOfRemovedMS2IsotopePeaks + returnObj2$numberOfRemovedMS2IsotopePeaks - returnObj$numberOfMS2PeakGroups <- returnObj$numberOfMS2PeakGroups + returnObj2$numberOfMS2PeakGroups - returnObj$numberOfMS2PeakGroupsPrior <- returnObj$numberOfMS2PeakGroupsPrior + returnObj2$numberOfMS2PeakGroupsPrior - returnObj$numberOfRemovedMS2PeakGroupIsotopeColumns <- returnObj$numberOfRemovedMS2PeakGroupIsotopeColumns + returnObj2$numberOfRemovedMS2PeakGroupIsotopeColumns - } - if(!is.na(progress)) if(progress) incProgress(amount = 0.05, detail = paste("Fragment group assembly ready", sep = "")) else print(paste("Fragment group assembly ready", sep = "")) - - return(returnObj) -} -## adapted from R package xcms: xcms_1.44.0, package path: R/mzClust.R -## -## Reference: -## Alignment of high resolution mass spectra: development of a heuristic approach for metabolomics -## Metabolomics June 2006, Volume 2, Issue 2, pp 75-83 -## http://link.springer.com/article/10.1007%2Fs11306-006-0021-7 -mzClustGeneric <- function(p, sampclass=NULL, mzppm = 20, mzabs = 0, minsamp = 1, minfrac=0.5, progress = FALSE){ +#' Alignment of high resolution mass spectra +#' +#' adapted from R package xcms: xcms_1.44.0, package path: R/mzClust.R +#' Reference: +#' Alignment of high resolution mass spectra: development of a heuristic approach for metabolomics +#' Metabolomics June 2006, Volume 2, Issue 2, pp 75-83 +#' http://link.springer.com/article/10.1007%2Fs11306-006-0021-7 +#' +#' @param p +#' @param sampclass +#' @param mzppm +#' @param mzabs +#' @param minsamp +#' @param minfrac +#' @param progress +#' +#' @return +#' @export +#' +#' @examples +mzClustGeneric <- function(p, + sampclass=NULL, + mzppm = 20, + mzabs = 0, + minsamp = 1, + minfrac=0.5, + progress = FALSE) +{ makeBin <- function(pos){ if(pos > numpeaks) return(list(pos=pos,bin=c(-1))) @@ -3022,25 +1206,19 @@ mzClustGeneric <- function(p, sampclass=NULL, mzppm = 20, mzabs = 0, minsamp = 1 bin <- pord[pos] pos <- pos+1 basepeak <- p[bin[1],1] - #error_range <- c(basepeak, basepeak*error_window+basepeak+2*mzabs) + error_range <- c(basepeak, abs(basepeak)*error_window+basepeak+2*mzabs) while(pos < numpeaks && p[pord[pos],1] <= error_range[2]) { bin <- c(bin,pord[pos]) pos <- pos + 1 } - #if(pos %% (numpeaks%/%100+1) == 0) { - # cat(format(((pos-1)/numpeaks*100),digits=1,nsmall=2)," ") - # flush.console() - #} - lst <- list(pos=pos,bin=bin) lst } meanDeviationOverLimit <- function(bin){ bin_mz <- p[bin,1] m <- mean(bin_mz) - #error_range <- c(m-ppm_error*m-mzabs, ppm_error*m+m+mzabs) error_range <- c(m-ppm_error*abs(m)-mzabs, ppm_error*abs(m)+m+mzabs) if(length(bin_mz[(bin_mz > error_range[2]) | (bin_mz < error_range[1])]) > 0 ) { @@ -3103,8 +1281,6 @@ mzClustGeneric <- function(p, sampclass=NULL, mzppm = 20, mzabs = 0, minsamp = 1 loopCounter <- 0 while(TRUE){ loopCounter <- loopCounter + 1 - #print(loopCounter) - #if(loopCounter==618) break if(binNumber +4 > nrow(groupmat)){ groupmat <- rbind(groupmat, matrix(nrow = nrow(groupmat), ncol = ncol(groupmat))) @@ -3119,7 +1295,6 @@ mzClustGeneric <- function(p, sampclass=NULL, mzppm = 20, mzabs = 0, minsamp = 1 lastPos <- pos if(!is.na(progress)) if(progress) incProgress(amount = peakProgress * 0.2, detail = paste("Fragment grouping ", pos, " / ", numpeaks, sep = "")) else { print(paste("Fragment grouping ", pos, " / ", numpeaks, sep = "")) - #print(tail(x = sort( sapply(ls(),function(x){object.size(get(x))})), n = 4)) } } @@ -3141,8 +1316,8 @@ mzClustGeneric <- function(p, sampclass=NULL, mzppm = 20, mzabs = 0, minsamp = 1 min_binB <- min(p[binB,1]) binclust <- 0 - if(max_binA + abs(max_binA)*error_window+2*mzabs >= min_binB && min_binB - abs(min_binB)*error_window - 2*mzabs <= max_binA){ - #if(max_binA + max_binA*error_window+2*mzabs >= min_binB && min_binB - min_binB*error_window - 2*mzabs <= max_binA){ + if( max_binA + abs(max_binA)*error_window+2*mzabs >= min_binB + && min_binB - abs(min_binB)*error_window - 2*mzabs <= max_binA){ binC <- c(binA,binB) binclust <- 1 } else { @@ -3158,35 +1333,38 @@ mzClustGeneric <- function(p, sampclass=NULL, mzppm = 20, mzabs = 0, minsamp = 1 if(length(unique(p[binC,1])) > 10000){ ## debugging - stop(paste("Too many fragments for clustering:", ppm_error, mzabs, length(p[binC,1]), length(unique(p[binC,1])))) + stop(paste("Too many fragments for clustering:", + ppm_error, mzabs, length(p[binC,1]), length(unique(p[binC,1])))) } - #groups <- xcms:::mzClust_hclust(p[binC,1],ppm_error,mzabs) - bin <- p[binC,1] uniqueBin <- unique(bin) - groups <- xcms:::mzClust_hclust(uniqueBin,ppm_error,mzabs) - groups2 <- vector(mode = "integer", length = length(bin)) + + ## xcms:::mzClust_hclust is using + ## the fast C implementation: .C("R_mzClust_hclust" + mzFragmentGroups <- xcms:::mzClust_hclust(uniqueBin,ppm_error,mzabs) + mzFragmentGroups2 <- vector(mode = "integer", length = length(bin)) for(idx in seq_along(uniqueBin)) - groups2[bin==uniqueBin[[idx]]] <- groups[[idx]] + mzFragmentGroups2[bin==uniqueBin[[idx]]] <- mzFragmentGroups[[idx]] - groups <- groups2 + mzFragmentGroups <- mzFragmentGroups2 - last_group <- groups[which.max(p[binC,1])] - binA <- binC[which(groups == last_group)] + last_group <- mzFragmentGroups[which.max(p[binC,1])] + binA <- binC[which(mzFragmentGroups == last_group)] - ## bug fix where there were not enough empty rows in the matrix (in case of more than four new groups) + ## bug fix where there were not enough empty rows in the matrix + ## (in case of more than four new mzFragmentGroups) if(binNumber + last_group > nrow(groupmat)){ groupmat <- rbind(groupmat, matrix(nrow = nrow(groupmat), ncol = ncol(groupmat))) groupindex <- c(groupindex, vector("list", length(groupindex))) } - if(max(groups) >1){ - for(c in 1:max(groups)){ + if(max(mzFragmentGroups) >1){ + for(c in 1:max(mzFragmentGroups)){ if(c == last_group){ next } - tmp_grp <- which(groups == c) + tmp_grp <- which(mzFragmentGroups == c) tmp_c <- binC[tmp_grp] out <- bin2output(tmp_c) if(length(out) != 0){ @@ -3218,7 +1396,10 @@ mzClustGeneric <- function(p, sampclass=NULL, mzppm = 20, mzabs = 0, minsamp = 1 return(list(mat=groupmat,idx=groupindex)) } -convertToProjectFile <- function(filePeakMatrix, fileSpectra, parameterSet, progress = FALSE){ +convertToProjectFile <- function(filePeakMatrix, + fileSpectra, + parameterSet, + progress = FALSE){ #################################################################################### ## parse MS/MS spectra @@ -3251,15 +1432,6 @@ convertToProjectFile <- function(filePeakMatrix, fileSpectra, parameterSet, prog if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("Parsing MS/MS file ready", sep = "")) else print(paste("Parsing MS/MS file ready", sep = "")) - ## out - #print(paste("parsing file", fileSpectra, "finished")) - #print(paste("Number of MS2 spectra: ", numberOfSpectra)) - #print(paste("Avg number of MS2 peaks per MS2 spectrum: ", returnObj$numberOfMS2PeaksAboveThreshold / numberOfSpectra)) - #print(paste("Number of MS2 peaks: ", returnObj$numberOfMS2Peaks)) - #print(paste("Number of MS2 peaks with neutral losses: ", returnObj$numberOfMS2PeaksWithNeutralLosses)) - #print(paste("Number of MS2 peaks considered: ", returnObj$numberOfMS2PeaksAboveThreshold)) - #print(paste("Number of MS2 peaks not considered: ", returnObj$numberOfMS2PeaksBelowThreshold)) - rm(returnObj) returnObj <- convertToProjectFile2( @@ -3281,10 +1453,18 @@ convertToProjectFile <- function(filePeakMatrix, fileSpectra, parameterSet, prog return(returnObj) } -## metaboliteFamilies <- rep(x = "", times = numberOfSpectra) -## uniqueMetaboliteFamilies <- NULL -## metaboliteFamilyColors <- NULL -convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, precursorRt, metaboliteFamilies, uniqueMetaboliteFamilies, metaboliteFamilyColors, furtherProperties = list(), parameterSet, progress = FALSE){ + +convertToProjectFile2 <- function(filePeakMatrix, + spectraList, + precursorMz, + precursorRt, + metaboliteFamilies, + uniqueMetaboliteFamilies, + metaboliteFamilyColors, + furtherProperties = list(), + parameterSet, + progress = FALSE) +{ numberOfSpectraParsed <- length(spectraList) #################################################################################### @@ -3306,7 +1486,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec dataColumnStartEndIndeces <- returnObj$dataColumnStartEndIndeces numberOfDataColumns <- returnObj$numberOfDataColumns - #groupLabels <- returnObj$groupLabels groupLabels <- returnObj$sampleClass sampleType <- returnObj$sampleType sampleInjectionOrder <- returnObj$sampleInjectionOrder @@ -3332,7 +1511,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec dataColumnStartEndIndeces <- c(5,5) + length(furtherProperties) numberOfDataColumns <- 1 - #groupLabels <- returnObj$groupLabels groupLabels <- c("Unknown") sampleType <- c("Sample") sampleInjectionOrder <- -1 @@ -3345,7 +1523,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec } isGC <- "EI spectrum" %in% colnames(dataFrame) - #if(is.null(dataFrame$"Average Mz")){ if(isGC){ ## in case of GC-MS data set this value according to retention time #assignment <- unlist(lapply(X = precursorRt, FUN = function(x){ @@ -3360,7 +1537,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec # return(precursorMz[[x[[1]]]]) #})) - #dataFrame$"Average Mz" <- seq_len(nrow(dataFrame)) dataFrame$"Average Mz" <- as.numeric(dataFrame$"Quant mass") if(nrow(dataFrame) == length(spectraList)){ @@ -3382,13 +1558,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec } } - ## out - #print(paste("Parsing file ", filePeakMatrix, " finished", sep = "")) - #print(paste("Number of precursors:", numberOfPrecursors)) - #if(parameterSet$doPrecursorDeisotoping){ - # print(paste("Number of isotope peaks removed:", numberOfRemovedPrecursorIsotopePeaks, "/", numberOfPrecursorsPrior, "=", round(x = numberOfRemovedPrecursorIsotopePeaks / numberOfPrecursorsPrior * 100, digits = 1), "%")) - #} - ## remove redundant MS1 features precursorLabels <- paste(dataFrame$"Average Mz", dataFrame$"Average Rt(min)", sep = " / ") dupplicated <- which(duplicated(precursorLabels)) @@ -3404,21 +1573,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "Postprocessing matrix...") else print("Postprocessing matrix...") - ######################################## - ## map spectra to significant precursors by m/z and RT - #assignment <- unlist(lapply(X = seq_along(precursorMz), FUN = function(i){ - # mz <- precursorMz[[i]] - # indeces <- which(abs(mz-dataFrame$"Average Mz") <= parameterSet$mzDeviationAbsolute_mapping) - # if(length(indeces) == 0) - # return(NA) - # if(length(indeces) == 1) - # return(indeces) - # rt <- precursorRt[[i]] - # absoluteDifferences <- abs(dataFrame$"Average Rt(min)"[indeces] - rt) - # bestIdx <- which.min(absoluteDifferences) - # return(indeces[[bestIdx]]) - #})) - ## order rows by precursor m/z and order columns by fragment group m/z (needed for deisotoping) orderMS1features <- order(precursorMz) @@ -3442,31 +1596,28 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec numberOfUnmappedPrecursorsMz <- 0 numberOfUnmappedPrecursorsRt <- 0 - #bestHits <- as.integer(rep(x = NA, times = numberOfPrecursors)) for(i in seq_len(numberOfPrecursors)){ numberOfItems <- length(allHits[[i]]) if(numberOfItems == 1) if(is.na(allHits[[i]])) numberOfItems <- 0 - - if(numberOfItems == 0){ ## no hit + + if(numberOfItems == 0){ ## no hit + allHits[[i]] <- NA + numberOfUnmappedPrecursorsMz <- numberOfUnmappedPrecursorsMz + 1 + } + else{ ## take hit with minimum absolute RT difference + absoluteDifferences <- abs(dataFrame$"Average Rt(min)"[i] - precursorRt[allHits[[i]]]) + bestIdx <- which.min(absoluteDifferences) + if(length(bestIdx) > 1) + bestIdx <- bestIdx[[1]] + if(absoluteDifferences[[bestIdx]] <= parameterSet$maximumRtDifference){ + allHits[[i]] <- allHits[[i]][[bestIdx]] + } else { allHits[[i]] <- NA - numberOfUnmappedPrecursorsMz <- numberOfUnmappedPrecursorsMz + 1 + numberOfUnmappedPrecursorsRt <- numberOfUnmappedPrecursorsRt + 1 } - else{ ## take hit with minimum absolute RT difference - absoluteDifferences <- abs(dataFrame$"Average Rt(min)"[i] - precursorRt[allHits[[i]]]) - bestIdx <- which.min(absoluteDifferences) - if(length(bestIdx) > 1) - bestIdx <- bestIdx[[1]] - if(absoluteDifferences[[bestIdx]] <= parameterSet$maximumRtDifference){ - allHits[[i]] <- allHits[[i]][[bestIdx]] - } else { - allHits[[i]] <- NA - numberOfUnmappedPrecursorsRt <- numberOfUnmappedPrecursorsRt + 1 - } - #allHits[[i]] <- allHits[[i]][[1]] - } - #print(allHits[[i]]) + } } ## apply hitsTempAll <- unlist(allHits) @@ -3477,8 +1628,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec spectraList <- spectraList[hitsTempAll] numberOfSpectra <- length(spectraList) ## sub set - #precursorMzAll <- precursorMz[hitsTempAll] - #precursorRtAll <- precursorRt[hitsTempAll] metaboliteFamiliesAll <- metaboliteFamilies[hitsTempAll] furtherPropertiesAll <- lapply(X = furtherProperties, FUN = function(props){props[hitsTempAll]}) @@ -3490,12 +1639,9 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec #################################################################################### ## built matrix - #print("") - #print("building matrix...") - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("Building fragment groups...", sep = "")) else print(paste("Building fragment groups...", sep = "")) + if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("Building fragment mzFragmentGroups...", sep = "")) else print(paste("Building fragment mzFragmentGroups...", sep = "")) if(isGC){ - ##################################### ## round GC mzs to integer for(spectrumIdx in seq_len(length(spectraList))){ spectraList[[spectrumIdx]]$ms2Peaks_mz <- round(x = spectraList[[spectrumIdx]]$ms2Peaks_mz, digits = 0) @@ -3503,7 +1649,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec } returnObj <- builtMatrix( - #returnObj <- builtMatrixOld( spectraList = spectraList, mzDeviationAbsolute_grouping = parameterSet$mzDeviationAbsolute_grouping, mzDeviationInPPM_grouping = parameterSet$mzDeviationInPPM_grouping, @@ -3517,25 +1662,16 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec fragmentMasses <- returnObj$fragmentMasses numberOfMS2PeakGroups <- returnObj$numberOfMS2PeakGroups - if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("Building fragment groups ready", sep = "")) else print(paste("Building fragment groups ready", sep = "")) + if(!is.na(progress)) if(progress) incProgress(amount = 0.01, detail = paste("Building fragment mzFragmentGroups ready", sep = "")) else print(paste("Building fragment mzFragmentGroups ready", sep = "")) orderTempCol <- order(fragmentMasses) matrix <- matrix[, orderTempCol] fragmentMasses <- fragmentMasses[orderTempCol] - ## out - #print("building matrix finished") - #print(paste("Number of processed items: ", returnObj$numberOfMS2Peaks)) - #print(paste("Number of collisions within MS2 spectra:", returnObj$numberOfCollisions, "/", returnObj$numberOfMS2Peaks, "=", round(x = returnObj$numberOfCollisions / returnObj$numberOfMS2Peaks * 100, digits = 1), "%")) - #if(parameterSet$doMs2PeakGroupDeisotoping){ - # print(paste("Number of removed MS2 peaks:", returnObj$numberOfRemovedMS2IsotopePeaks, "/", returnObj$numberOfMS2PeaksPrior, "=", round(x = returnObj$numberOfRemovedMS2IsotopePeaks / returnObj$numberOfMS2PeaksPrior * 100, digits = 1), "%")) - # print(paste("Number of completely removed MS2 peak groups:", returnObj$numberOfRemovedMS2PeakGroupIsotopeColumns, "/", returnObj$numberOfMS2PeakGroupsPrior, "=", round(x = returnObj$numberOfRemovedMS2PeakGroupIsotopeColumns / returnObj$numberOfMS2PeakGroupsPrior * 100, digits = 1), "%")) - #} - rm(returnObj) ######################################## - ## filter empty fragment groups + ## filter empty fragment mzFragmentGroups numberOfFragments <- length(fragmentMasses) fragmentGroupNonEmpty <- vector(mode = "logical", length = numberOfFragments) for(colIdx in seq_len(numberOfFragments)) @@ -3575,7 +1711,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec colIdx <- min(which(fragmentMasses > 0)) numberOfColumns <- length(fragmentMasses) - #matrix <- cbind(matrix[, colIdx:length(fragmentMzAll)], matrix[, 1:(colIdx - 1)]) matrixCols[matrixCols < colIdx] <- matrixCols[matrixCols < colIdx] + numberOfColumns matrixCols <- matrixCols - (colIdx - 1) @@ -3585,14 +1720,10 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec numberOfAnnotationColumns <- ncol(dataFrame) rowOrder <- order(precursorMzAll) - #print("postprocessing matrix finished") - #################################################################################### ## matrix assembly with additional columns and column head if(!is.na(progress)) if(progress) incProgress(amount = 0.1, detail = "Boxing...") else print("Boxing...") - #print("") - #print("Boxing...") ## additional rows stuff matrixRows <- as.integer(matrixRows) @@ -3610,7 +1741,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec ## add stuff numberOfRows <- length(precursorMzAll) - #numberOfPrimaryAnnotationColumns <- 4 numberOfPrimaryAnnotationColumns <- 3 columnOffset <- numberOfPrimaryAnnotationColumns + numberOfAnnotationColumns# + length(furtherPropertiesAll) matrixCols <- matrixCols + columnOffset @@ -3619,49 +1749,33 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec ## additional columns ## precursor m/z - #matrix <- cbind(precursorMzAll, matrix) matrixRows <- c(matrixRows, seq_len(numberOfRows)) matrixCols <- c(matrixCols, rep(x = 1, times = numberOfRows)) matrixVals <- c(matrixVals, precursorMzAll) ## precursor RT - #matrix <- cbind(precursorRtAll, matrix) matrixRows <- c(matrixRows, seq_len(numberOfRows)) matrixCols <- c(matrixCols, rep(x = 2, times = numberOfRows)) matrixVals <- c(matrixVals, precursorRtAll) ## annotation column - #matrix <- cbind(precursorRtAll, matrix) matrixRows <- c(matrixRows, seq_len(numberOfRows)) matrixCols <- c(matrixCols, rep(x = 3, times = numberOfRows)) - #matrixVals <- c(matrixVals, rep(x = "", times = numberOfPrecursors)) matrixVals <- c(matrixVals, metaboliteFamiliesAll) ## all present stuff - #matrix <- cbind(dataFrameSignificants[, 1:numberOfAnnotationColumns], matrix) for(colIdx in seq_len(numberOfAnnotationColumns)){ matrixRows <- c(matrixRows, seq_len(numberOfRows)) matrixCols <- c(matrixCols, rep(x = numberOfPrimaryAnnotationColumns + colIdx, times = numberOfRows)) matrixVals <- c(matrixVals, dataFrame[, colIdx]) } - #for(colIdx in seq_along(furtherPropertiesAll)){ - # matrixRows <- c(matrixRows, seq_len(numberOfRows)) - # matrixCols <- c(matrixCols, rep(x = numberOfPrimaryAnnotationColumns + numberOfAnnotationColumns + colIdx, times = numberOfRows)) - # matrixVals <- c(matrixVals, furtherPropertiesAll[[colIdx]]) - #} - ## sort rows rowOrderReverse <- vector(mode = "numeric", length = numberOfRows) for(i in seq_len(numberOfRows)) rowOrderReverse[[i]] <- which(rowOrder == i) matrixRows <- rowOrderReverse[matrixRows] - # newMatrixRows <- vector(mode = "numeric", length = length(matrixRows)) - # for(i in 1:length(matrixRows)) - # newMatrixRows[[i]] <- which(rowOrder == matrixRows[[i]]) - # matrixRows <- newMatrixRows - ###################################### ## additional rows numberOfColumns <- numberOfFragments + columnOffset @@ -3669,7 +1783,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec ################## ## additional row: head headRow <- c("m/z", "RT", "Annotation", names(dataFrame)[seq_len(numberOfAnnotationColumns)], fragmentMasses) - #matrix <- rbind(headRow, matrix) matrixRows <- matrixRows + 1 matrixRows <- c(matrixRows, rep(x = 1, times = numberOfColumns)) @@ -3704,7 +1817,6 @@ convertToProjectFile2 <- function(filePeakMatrix, spectraList, precursorMz, prec matrixRows <- c(matrixRows, rep(x = 1, times = 3 + numberOfDataColumns)) matrixCols <- c(matrixCols, 1, 2, 3, dataColumns) - #matrixVals <- c(matrixVals, "ID", "ID", "AnnotationColors={}", groupLabels) matrixVals <- c(matrixVals, "ID", "ID", annotationColorsFieldValue, groupLabels) ################## diff --git a/R/Plots.R b/R/Plots.R index ef3f8aa..7d57eea 100644 --- a/R/Plots.R +++ b/R/Plots.R @@ -311,14 +311,35 @@ calcPlotDendrogram <- function(dataList, filter, clusterDataList, annoPresentAnn } +#' Title +#' +#' @param dataList +#' @param filterObj +#' @param clusterDataList +#' @param distanceMeasure +#' @param showClusterLabels +#' @param hcaPrecursorLabels +#' @param selectionFragmentTreeNodeSet +#' @param selectionAnalysisTreeNodeSet +#' @param selectionSearchTreeNodeSet +#' @param selectedSelection +#' @param heatmapContent +#' @param heatmapOrdering +#' @param heatmapProportion +#' +#' @return +#' @importFrom grDevices colorRampPalette rainbow rgb +#' @export +#' +#' @examples calcPlotDendrogram_plotly <- function( dataList, filterObj, clusterDataList, #annoPresentAnnotationsList, annoPresentColorsList, distanceMeasure, showClusterLabels, hcaPrecursorLabels, selectionFragmentTreeNodeSet = NULL, selectionAnalysisTreeNodeSet = NULL, selectionSearchTreeNodeSet = NULL, - selectedSelection, heatmapContent, heatmapOrdering, heatmapProportion -){ + selectedSelection, heatmapContent, heatmapOrdering, heatmapProportion) + { if(FALSE){ dataList_ <<- dataList @@ -613,35 +634,35 @@ calcPlotDendrogram_plotly <- function( ## heatmap ## heatmap data - groups <- dataList$groups - print(groups) + grouXXXps <- dataList$grouXXXps + print(grouXXXps) print("enetring this area ...plots.r..line 611") switch(heatmapContent, "Log-fold-change"={## log-fold-change columnsOfInterest <- c( - dataList$dataMeanColumnNameFunctionFromName(filterObj$groups[[1]]), - dataList$dataMeanColumnNameFunctionFromName(filterObj$groups[[2]]), - dataList$lfcColumnNameFunctionFromName(filterObj$groups[[1]], filterObj$groups[[2]]) + dataList$dataMeanColumnNameFunctionFromName(filterObj$grouXXXps[[1]]), + dataList$dataMeanColumnNameFunctionFromName(filterObj$grouXXXps[[2]]), + dataList$lfcColumnNameFunctionFromName(filterObj$grouXXXps[[1]], filterObj$grouXXXps[[2]]) ) columnsOfInterestAbs <- columnsOfInterest[1:2] columnsOfInterestLFC <- columnsOfInterest[3] - labels = c(filterObj$groups[[1]], filterObj$groups[[2]], "LFC") + labels = c(filterObj$grouXXXps[[1]], filterObj$grouXXXps[[2]], "LFC") labelsAbs <- labels[1:2] labelsLFC <- labels[3] }, - "Abundance by group"={## groups - columnsOfInterest <- unlist(lapply(X = groups, FUN = function(x){ + "Abundance by group"={## grouXXXps + columnsOfInterest <- unlist(lapply(X = grouXXXps, FUN = function(x){ dataList$dataMeanColumnNameFunctionFromName(x) })) columnsOfInterest <- rev(columnsOfInterest) ## plot is bottom to top columnsOfInterestAbs <- columnsOfInterest columnsOfInterestLFC <- NULL - labels <- groups + labels <- grouXXXps labelsAbs <- labels[1:2] labelsLFC <- NULL }, "Abundance by sample"={## samples - columnsOfInterest <- dataList$dataColumnsNameFunctionFromGroupNames(groups = groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + columnsOfInterest <- dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) columnsOfInterest <- dataList$orderColumnNames(groupSampleDataFrame = dataList$groupSampleDataFrame, columnNames = columnsOfInterest) columnsOfInterest <- rev(columnsOfInterest) ## plot is bottom to top columnsOfInterestAbs <- columnsOfInterest @@ -1204,25 +1225,25 @@ calcPlotHeatmap <- function(dataList, filterObj, clusterDataList, selectedTreeNo #################### ## heatmap - groups <- dataList$groups + grouXXXps <- dataList$grouXXXps switch(heatmapContent, "Log-fold-change"={## log-fold-change columnsOfInterest <- c( - dataList$dataMeanColumnNameFunctionFromName(filterObj$groups[[1]]), - dataList$dataMeanColumnNameFunctionFromName(filterObj$groups[[2]]), - dataList$lfcColumnNameFunctionFromName(filterObj$groups[[1]], filterObj$groups[[2]]) + dataList$dataMeanColumnNameFunctionFromName(filterObj$grouXXXps[[1]]), + dataList$dataMeanColumnNameFunctionFromName(filterObj$grouXXXps[[2]]), + dataList$lfcColumnNameFunctionFromName(filterObj$grouXXXps[[1]], filterObj$grouXXXps[[2]]) ) - labels = c(filterObj$groups[[1]], filterObj$groups[[2]], "LFC") + labels = c(filterObj$grouXXXps[[1]], filterObj$grouXXXps[[2]], "LFC") }, - "Abundance by group"={## groups - columnsOfInterest <- unlist(lapply(X = groups, FUN = function(x){ + "Abundance by group"={## grouXXXps + columnsOfInterest <- unlist(lapply(X = grouXXXps, FUN = function(x){ dataList$dataMeanColumnNameFunctionFromName(x) })) #columnsOfInterest <- rev(columnsOfInterest) ## plot is bottom to top - labels <- groups + labels <- grouXXXps }, "Abundance by sample"={## samples - columnsOfInterest <- dataList$dataColumnsNameFunctionFromGroupNames(groups = groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + columnsOfInterest <- dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) columnsOfInterest <- dataList$orderColumnNames(groupSampleDataFrame = dataList$groupSampleDataFrame, columnNames = columnsOfInterest) #columnsOfInterest <- rev(columnsOfInterest) ## plot is bottom to top ### this is original labels @@ -1436,8 +1457,8 @@ calcPlotHeatmapOld <- function(dataList, filterObj, clusterDataList, xInterval = #################### ## heatmap columnsOfInterest <- c( - dataList$dataMeanColumnNameFunctionFromName(filterObj$groups[[1]]), dataList$dataMeanColumnNameFunctionFromName(filterObj$groups[[2]]), - dataList$lfcColumnNameFunctionFromName(filterObj$groups[[1]], filterObj$groups[[2]]) + dataList$dataMeanColumnNameFunctionFromName(filterObj$grouXXXps[[1]]), dataList$dataMeanColumnNameFunctionFromName(filterObj$grouXXXps[[2]]), + dataList$lfcColumnNameFunctionFromName(filterObj$grouXXXps[[1]], filterObj$grouXXXps[[2]]) ) par(mar=c(0,4,0,0), mgp = c(3, 1, 0)) ## c(bottom, left, top, right) ## c(title, axis, label) @@ -1464,7 +1485,7 @@ calcPlotHeatmapOld <- function(dataList, filterObj, clusterDataList, xInterval = # rect(xleft = i - 0.5, xright = i + 0.5, ybottom = 1, ytop = 2, col = colorOne[[i]], border = NA) # rect(xleft = i - 0.5, xright = i + 0.5, ybottom = 0, ytop = 1, col = colorTwo[[i]], border = NA) #} - axis(side = 2, at = c(0.5, 1.5, 2.5), labels = c(filterObj$groups[[2]], filterObj$groups[[1]], "LFC"), las = 2, tick = TRUE) + axis(side = 2, at = c(0.5, 1.5, 2.5), labels = c(filterObj$grouXXXps[[2]], filterObj$grouXXXps[[1]], "LFC"), las = 2, tick = TRUE) } return(columnsOfInterest) @@ -1540,9 +1561,9 @@ calcPlotAnnoLegend1 <- function(annoLabels, annoColors){ } -calcPlotScoresGroupsLegend <- function(groups, colors){ +calcPlotScoresGroupsLegend <- function(grouXXXps, colors){ ## get and reorder annotations - calcPlotLegend(groups, colors, "Scores") + calcPlotLegend(grouXXXps, colors, "Scores") } @@ -1609,14 +1630,14 @@ calcPlotAnnoLegendForImage1 <- function(annoLabels, annoColors, maximumNumberOfL ################# -calcPlotScoresGroupsLegendForImage <- function(groups, colors, maximumNumberOfLines=20){ +calcPlotScoresGroupsLegendForImage <- function(grouXXXps, colors, maximumNumberOfLines=20){ ## get and reorder annotations - calcPlotLegendForImage(groups, colors, "Scores", maximumNumberOfLines) + calcPlotLegendForImage(grouXXXps, colors, "Scores", maximumNumberOfLines) } #### I am adding this new -calcPlotScoresGroupsLegendForImage1 <- function(groups, colors, maximumNumberOfLines=30){ +calcPlotScoresGroupsLegendForImage1 <- function(grouXXXps, colors, maximumNumberOfLines=30){ ## get and reorder annotations - calcPlotLegendForImage1(groups, colors, "Scores", maximumNumberOfLines) + calcPlotLegendForImage1(grouXXXps, colors, "Scores", maximumNumberOfLines) } ########### @@ -1750,14 +1771,14 @@ plotLegendWithBalls1 <- function(labels, xPositions, yPositions, circleXPosition palette <- colorPaletteScores() # if(filterObj$filterBySamples){ - # colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + # colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ # groupIdx <- dataList$groupIdxFromGroupName(x) # samples <- dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) # samples <- intersect(samples, filterObj$sampleSet) # rep(x = groupIdx, times = length(samples)) # }))] # } else { - # colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + # colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ # groupIdx <- dataList$groupIdxFromGroupName(x) # rep(x = groupIdx, times = length(dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)))) # }))] @@ -1894,6 +1915,16 @@ calcPlotDiscriminativityLegend <- function(){ #graphics::text(x = xPositions[2:length(xPositions)], y = yPositions[2:length(yPositions)], labels = labels[2:length(labels)], pos = 4, adj = 1) } + +#' Title +#' +#' @param dataList +#' +#' @return +#' @export +#' @importsFrom grDevices as.raster +#' +#' @examples calcPlotHeatmapLegend <- function(dataList){ #################### ## heatmap legend @@ -2211,14 +2242,14 @@ calcPlotPCAscores <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pcaD palette <- colorPaletteScores() if(filterObj$filterBySamples){ - colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) samples <- dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) samples <- intersect(samples, filterObj$sampleSet) rep(x = groupIdx, times = length(samples)) }))] } else { - colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) rep(x = groupIdx, times = length(dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)))) }))] @@ -2273,10 +2304,10 @@ calcPlotPCAscores <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pcaD if(showScoresLabels){ if(filterObj$filterBySamples){ - labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) labels <- intersect(labels, filterObj$sampleSet) } else { - labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) } graphics::text(x = dataDimOne, y = dataDimTwo, labels = labels, pos = 4) } @@ -2290,14 +2321,14 @@ calcPlotPCAscores1 <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pca #### add this new line ######## if(filterObj$filterBySamples){ - colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) samples <- dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) samples <- intersect(samples, filterObj$sampleSet) rep(x = groupIdx, times = length(samples)) }))] - symbolsforreplicates<-unlist(lapply(X = filterObj$groups, FUN = function(x){ + symbolsforreplicates<-unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) samples <- dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) samples <- intersect(samples, filterObj$sampleSet) @@ -2305,12 +2336,12 @@ calcPlotPCAscores1 <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pca })) } else { - colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) rep(x = groupIdx, times = length(dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)))) }))] - symbolsforreplicates<-unlist(lapply(X = filterObj$groups, FUN = function(x){ + symbolsforreplicates<-unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) rep(x = groupIdx, times = length(dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)))) })) @@ -2367,10 +2398,10 @@ calcPlotPCAscores1 <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pca if(showScoresLabels){ if(filterObj$filterBySamples){ - labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) labels <- intersect(labels, filterObj$sampleSet) } else { - labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) } ### changing the pos= 3 from 4 and will see what happens graphics::text(x = dataDimOne, y = dataDimTwo, labels = labels, pos = 2) @@ -2383,14 +2414,14 @@ calcPlotPCAscores2 <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pca palette <- colorPaletteScores() #dev.new(width=15,height=15) if(filterObj$filterBySamples){ - colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) samples <- dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) samples <- intersect(samples, filterObj$sampleSet) rep(x = groupIdx, times = length(samples)) }))] } else { - colorsForReplicates <- palette[unlist(lapply(X = filterObj$groups, FUN = function(x){ + colorsForReplicates <- palette[unlist(lapply(X = filterObj$grouXXXps, FUN = function(x){ groupIdx <- dataList$groupIdxFromGroupName(x) rep(x = groupIdx, times = length(dataList$dataColumnsNameFunctionFromGroupName(x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)))) }))] @@ -2438,10 +2469,10 @@ calcPlotPCAscores2 <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pca xlim = xInterval, ylim = yInterval, xlab = xAxisLabel, ylab = yAxisLabel, main = "Scores", #### this is my blind test changing from 19 to 19:22 - #ntemp<-length(filterObj$groups), + #ntemp<-length(filterObj$grouXXXps), #ntemp1<-sum(ntemp,1), - ### I am testing the filterObj$groups... This is fine .. will test the - col = colorsForReplicates, pch=1:length(filterObj$groups),lty=1,lwd=2,cex=1. + ### I am testing the filterObj$grouXXXps... This is fine .. will test the + col = colorsForReplicates, pch=1:length(filterObj$grouXXXps),lty=1,lwd=2,cex=1. ### checking the colorsForReplicates ) @@ -2463,10 +2494,10 @@ calcPlotPCAscores2 <- function(pcaObj, dataList, filterObj, pcaDimensionOne, pca if(showScoresLabels){ if(filterObj$filterBySamples){ - labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) labels <- intersect(labels, filterObj$sampleSet) } else { - labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + labels <- dataList$dataColumnsNameFunctionFromGroupNames(filterObj$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) } graphics::text(x = dataDimOne, y = dataDimTwo, labels = labels, pos = 4) } @@ -2521,7 +2552,7 @@ calcPlotPCAloadings <- function( ################################################### ########## this is new line I added################ TNF<-pcaObj$filterObj$filter_averageOriginal - TNF1<-apply(as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(pcaObj$filterObj$groups), FUN = dataList$dataMeanColumnNameFunctionFromName)]),MARGIN = 1,FUN = mean) >= as.double(TNF) + TNF1<-apply(as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(pcaObj$filterObj$grouXXXps), FUN = dataList$dataMeanColumnNameFunctionFromName)]),MARGIN = 1,FUN = mean) >= as.double(TNF) TNF2<-names(TNF1)[unname(TNF1)] TNF3<-trimws(TNF2) TNF4<-str_squish(TNF3) @@ -3058,7 +3089,7 @@ calcPlotPCAloadings <- function( # ########## this is new line I added################ # TNF<-pcaObj$filterObj$filter_averageOriginal # print("enter the line ...line 2524") -# TNF1<-apply(as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(pcaObj$filterObj$groups), FUN = dataList$dataMeanColumnNameFunctionFromName)]),MARGIN = 1,FUN = mean) >= as.double(TNF) +# TNF1<-apply(as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(pcaObj$filterObj$grouXXXps), FUN = dataList$dataMeanColumnNameFunctionFromName)]),MARGIN = 1,FUN = mean) >= as.double(TNF) # TNF2<-names(TNF1)[unname(TNF1)] # TNF3<-trimws(TNF2) # TNF4<-str_squish(TNF3) @@ -3575,7 +3606,7 @@ calcPlotPCAloadings <- function( # ############################## # ########## this is new line I added # TNF<-pcaObj$filterObj$filter_averageOriginal -# TNF1<-apply(as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(pcaObj$filterObj$groups), FUN = dataList$dataMeanColumnNameFunctionFromName)]),MARGIN = 1,FUN = mean) >= as.double(TNF) +# TNF1<-apply(as.data.frame(dataList$dataFrameMeasurements[, sapply(X = as.vector(pcaObj$filterObj$grouXXXps), FUN = dataList$dataMeanColumnNameFunctionFromName)]),MARGIN = 1,FUN = mean) >= as.double(TNF) # TNF2<-names(TNF1)[unname(TNF1)] # TNF3<-trimws(TNF2) # TNF4<-str_squish(TNF3) @@ -4505,7 +4536,7 @@ calcPlotPCAloadings2 <- function( par(mar=c(3 + 0.5125, 3+0.125, 2, 1), mgp = c(2, 1, 0)) ## c(bottom, left, top, right) #plot(x = dataDimOne, y = dataDimTwo, xlim = xInterval, ylim = yInterval, xlab = xAxisLabel, ylab = yAxisLabel, main = "Loadings", pch=19, cex=0.7, col = nodeColors) plot(x = NULL, y = NULL, xlim = xInterval, ylim = yInterval, xlab = xAxisLabel, ylab = yAxisLabel, main = "Loadings") - ### changing this to 22 and will see what happens...1:length(filterObj$groups),lty=1,lwd=2 + ### changing this to 22 and will see what happens...1:length(filterObj$grouXXXps),lty=1,lwd=2 points(x = poisXpoints, y = poisYpoints, col = pointColors, pch=19, cex=pointSizes) ## axis diff --git a/R/R_packages.R b/R/R_packages.R index e983f3e..9926f7f 100644 --- a/R/R_packages.R +++ b/R/R_packages.R @@ -1,82 +1,46 @@ - -############################################################################################################## -## GUI -#install.packages("shiny") -library("shiny") -#devtools::install_github("rstudio/htmltools") -library("htmltools") -#install.packages("shinyjs") -library("shinyjs") -#install.packages("DT") -library("DT") -#install.packages("colourpicker") -library("colourpicker") -#install.packages("shinyBS") -library("shinyBS") -#install.packages("shinybusy") -library("shinybusy") -#install.packages("egg") -library(egg) -#install.packages("cowplot") -#library(cowplot) # not available at cran 2023-11-21 anymore -############################################################################################################## -## mass spectrometry -#sudo apt-get install libnetcdf-dev -#source("https://bioconductor.org/biocLite.R") -#biocLite("mzR") -library("mzR") -#biocLite("xmcs") -library("xcms") - - -############################################################################################################## -## MS1 analyses -#install.packages("FactoMineR") -library("FactoMineR") -#install.packages("mixOmics") -library("mixOmics") -#source("http://bioconductor.org/biocLite.R") -#biocLite("pcaMethods") -library("pcaMethods") -#install.packages("caret") -#library("caret") -################### -#install_github( "decisionpatterns/searchable" ) -library(searchable) -#install.packages("gdata") -library(gdata) -############################################################################################################## -## tools -#install.packages("matrixStats") -library("matrixStats") -library("Matrix") -library("tools") -#install.packages("stringi") -library("stringr") -#install_github('rCharts', 'ramnathv') -#library("rCharts") -#install.packages("shinydashboard") -#library("shinydashboard") -#install.packages("slam") -library("slam") - -############################################################################################################## -## pdf report -#install.packages("knitr") -library("knitr") -#install.packages("rmarkdown") -#library("rmarkdown") - -############################################################################################################## -## plot -#install.packages("cba") -library("cba") -#install.packages("squash") -library("squash") -#install.packages("plotrix") -library("plotrix") -#install.packages("plotly") -library("plotly") -#install.packages("RColorBrewer") -library("RColorBrewer") - + +load_metfamily_dependencies <- function() +{ + ############################################################################################################## + ## GUI + library("shiny") + library("htmltools") + library("shinyjs") + library("DT") + library("colourpicker") + library("shinyBS") + library("shinybusy") + library(egg) + + ############################################################################################################## + ## mass spectrometry + library("mzR") + library("xcms") + + ############################################################################################################## + ## MS1 analyses + library("FactoMineR") + library("mixOmics") + library("pcaMethods") + library(searchable) + library(gdata) + ############################################################################################################## + ## tools + library("matrixStats") + library("Matrix") + library("tools") + library("stringr") + library("slam") + + ############################################################################################################## + ## pdf report + library("knitr") + + ############################################################################################################## + ## plot + library("cba") + library("squash") + library("plotrix") + library("plotly") + library("RColorBrewer") +} diff --git a/inst/MetFamily/app_files/server_functionsDownloads.R b/inst/MetFamily/app_files/server_functionsDownloads.R index c0d609e..a330b82 100644 --- a/inst/MetFamily/app_files/server_functionsDownloads.R +++ b/inst/MetFamily/app_files/server_functionsDownloads.R @@ -137,106 +137,17 @@ createExportMatrix <- function(precursorSet){ return(lines) } -createExportMatrixOld <- function(precursorSet){ - numberOfRows <- length(precursorSet) - numberOfColumns <- ncol(dataList$featureMatrix) - - ########################################################### - ## built reduced MS2 matrix - fragmentMatrix <- dataList$featureMatrix[precursorSet, ] - fragmentCounts <- apply(X = fragmentMatrix, MARGIN = 2, FUN = function(x){ sum(x != 0) }) - fragmentIntensities <- apply(X = fragmentMatrix, MARGIN = 2, FUN = function(x){ sum(x) }) / fragmentCounts - fragmentMasses <- dataList$fragmentMasses - - fragmentSelection <- fragmentCounts != 0 - - fragmentMatrix <- fragmentMatrix[, fragmentSelection] - fragmentCounts <- fragmentCounts[fragmentSelection] - fragmentIntensities <- fragmentIntensities[fragmentSelection] - fragmentMasses <- fragmentMasses[fragmentSelection] - - ## fragment matrix - dgTMatrix <- as(fragmentMatrix, "dgTMatrix") - matrixRows <- dgTMatrix@i + 1 - matrixCols <- dgTMatrix@j + 1 - matrixVals <- dgTMatrix@x - - numberOfColumns2 <- ncol(fragmentMatrix) - - fragmentMatrix <- matrix(data = rep(x = "", times = numberOfRows * numberOfColumns2), nrow = numberOfRows, ncol = numberOfColumns2) - fragmentMatrix[cbind(matrixRows, matrixCols)] <- matrixVals - - ## box - ms2Matrix <- rbind( - fragmentCounts, - fragmentIntensities, - fragmentMasses, - fragmentMatrix - ) - - ########################################################### - ## built MS1 matrix - ms1Matrix <- rbind( - dataList$dataFrameMS1Header, - dataList$dataFrameInfos[precursorSet, ] - ) - ms1Matrix <- as.matrix(ms1Matrix) - - ########################################################### - ## export annotations - - ## process annotations - annotations <- dataList$annoArrayOfLists - for(i in 1:length(annotations)) - if(dataList$annoArrayIsArtifact[[i]]) - annotations[[i]] <- c(annotations[[i]], dataList$annotationValueIgnore) - - annotationStrings <- vector(mode = "character", length = length(annotations)) - for(i in 1:length(annotations)){ - if(length(annotations[[i]]) > 0) - annotationStrings[[i]] <- paste(annotations[[i]], sep = ", ") - else - annotationStrings[[i]] <- "" - } - annotationStrings <- annotationStrings[precursorSet] - - ## process annotaiotn-color-map - annoPresentAnnotations <- dataList$annoPresentAnnotationsList[-1] - annoPresentColors <- dataList$annoPresentColorsList[-1] - - if(length(annoPresentAnnotations) > 0){ - annotationColors <- paste(annoPresentAnnotations, annoPresentColors, sep = "=", collapse = ", ") - } else { - annotationColors <- "" - } - annotationColors <- paste(dataList$annotationColorsName, "={", annotationColors, "}", sep = "") - - ## box - annotationColumn <- c("", annotationColors, dataList$annotationColumnName, annotationStrings) - - ms1Matrix[, dataList$annotationColumnIndex] <- annotationColumn - - ########################################################### - ## assemble - dataFrame <- cbind( - ms1Matrix, - ms2Matrix - ) - - return(dataFrame) -} + writeTable <- function(precursorSet, file){ - #dataFrame <- createExportMatrix(precursorSet) - #gz1 <- gzfile(description = file, open = "w") - #write.table(x = dataFrame, file = gz1, sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE) - #close(gz1) - show_modal_spinner(spin="scaling-squares", text="\nMerging project files to csv. This can take several minutes!") + show_modal_spinner(spin="scaling-squares", + text="\nMerging project files to csv. This can take several minutes!") lines <- createExportMatrix(precursorSet) gz1 <- gzfile(description = file, open = "w") writeLines(text = lines, con = gz1) close(gz1) remove_modal_spinner() } + ## individual downloads output$downloadGlobalMS2filteredPrecursors <- downloadHandler( filename = function() { @@ -288,15 +199,19 @@ output$downloadSelectedPrecursors <- downloadHandler( }, contentType = 'text/csv' ) + #Obvserve button for exporting the project observeEvent(input$prepareAllPrecursors, { ExportMatrixName <<- createExportMatrixName() precursorSet <- 1:dataList$numberOfPrecursors writeTable(precursorSet = precursorSet, file = file.path(tempdir(),ExportMatrixName)) - showModal(modalDialog(title = "Download", footer = NULL, size="s", fluidRow(column(12, p("Your download is ready."))), - fluidRow(column(3,downloadButton(outputId = "downloadAllpreparedPrecursors", label = "Download project"))), -)) + showModal(modalDialog(title = "Download", footer = NULL, size="s", + fluidRow(column(12, p("Your download is ready."))), + fluidRow(column(3,downloadButton(outputId = "downloadAllpreparedPrecursors", + label = "Download project"))), + )) }) + #Serving the modal with the download button to download the project output$downloadAllpreparedPrecursors <- downloadHandler( filename <- ExportMatrixName, @@ -307,6 +222,7 @@ output$downloadAllpreparedPrecursors <- downloadHandler( }, contentType = 'text/csv' ) + ## download selected output$downloadHcaSelectedPrecursors <- downloadHandler( filename = function() { @@ -325,6 +241,7 @@ output$downloadHcaSelectedPrecursors <- downloadHandler( }, contentType = 'text/csv' ) + output$downloadImportParameterSet <- downloadHandler( filename = function() { createImportParameterSetExportFileName() @@ -335,6 +252,7 @@ output$downloadImportParameterSet <- downloadHandler( }, contentType = 'text/csv' ) + ## download images output$downloadHcaImage <- downloadHandler( filename = function() { @@ -347,24 +265,9 @@ output$downloadHcaImage <- downloadHandler( }#, #contentType = 'image/png' ) + plotHCA <- function(file, fileType, plotMS2 = TRUE){ - ## 1 den ## 2 hea - ## 3 ms2 ## 4 l anno - ## 5 l sel ## 6 l hea - ## 7 l ms2 - ## - ## 1 4 - ## 1 5 - ## 1 6 - ## 2 7 - ## 3 7 - ## - ## parameters - ##widthInInch <- 10 - ##heigthInInch <- ifelse(test = plotMS2, yes = 7.5, no = (5.2-1.5)/5.2 * 7.5) - ##resolutionInDPI <- 600 - ### changing resolution DPI to 650 widthInInch <- 10 heigthInInch <- ifelse(test = plotMS2, yes = 7.5, no = (5.2-1.5)/5.2 * 7.5) resolutionInDPI <- 650 @@ -373,7 +276,10 @@ plotHCA <- function(file, fileType, plotMS2 = TRUE){ switch(fileType, "png"={ - png(filename = file, width = widthInPixel, height = heightInPixel, res = resolutionInDPI, bg = "white") + png(filename = file, + width = widthInPixel, + height = heightInPixel, + res = resolutionInDPI, bg = "white") }, "svg"={ svg(filename = file) @@ -384,32 +290,13 @@ plotHCA <- function(file, fileType, plotMS2 = TRUE){ stop(paste("Unknown file type (", fileType, ")!", sep = "")) ) - if(plotMS2){ - - ######################## + if(plotMS2) { graphics::layout( mat = matrix( - ### Just commenting this - ##data = c(1, 1, 1, 1, 2, 3, - ## 4, 5, 6, 7, 8, 8), - #data = c(1, 1, 1, 1, 2, 3), - #nrow = 6, ncol = 2), - ############################ - data = c(1, 1, 1, 1, 2, 2,3, 4, 5, 5,5, 5), nrow = 6, ncol = 2), - widths = c(4.58, 1.59), # this is working good - ##################### - ### This is original - #heights = c(0.002, 0.001, 0.002, 0.0012, 0.0013, 0.0001) - #heights = c(0.2, 0.1, 0.2, 0.2, 0.013, 0.11) # this working good - #heights = c(0.0052, 0.011, 0.01112, 0.01112, 0.001113, 0.0011) - ##heights = c(0.0052, 0.011, 0.01112, 0.0112, 0.0113, 0.011) # this is working well - ### - heights = c(0.0052, 0.011, 0.01112, 0.01112,0.001111, 0.0111) # this is working well - + data = c(1, 1, 1, 1, 2, 2,3, 4, 5, 5,5, 5), nrow = 6, ncol = 2), + widths = c(4.58, 1.59), + heights = c(0.0052, 0.011, 0.01112, 0.01112,0.001111, 0.0111) ) - ################################ - - } else { graphics::layout( mat = matrix( @@ -421,41 +308,17 @@ plotHCA <- function(file, fileType, plotMS2 = TRUE){ ) } - ### i am changing the cex to 0.4 to 0.3 - #cex <- par("cex") - #par(cex = 0.3) - ## 1 - #drawDendrogramPlotImpl() - #par(cex = cex) - ## 2 - #drawHeatmapPlotImpl() ## out for plotly and adapt layout - ## 3 - #if(plotMS2) drawMS2PlotImpl() - ## 4 - #drawDendrogramLegendImpl() - ## 5 - #drawHeatmapLegendImpl() - ## 6 - #if(plotMS2) drawMS2LegendImpl() - ## 7 - #drawFragmentDiscriminativityLegendImpl() - ## 8 - #drawAnnotationLegendForImageHCAimpl() - #drawAnnotationLegendImpl() - ############################### drawDendrogramPlotImpl() #1 - drawHeatmapPlotImpl() # 2 + drawHeatmapPlotImpl() #2 drawFragmentDiscriminativityLegendImpl() #4 drawHeatmapLegendImpl() #3 - drawAnnotationLegendForImageHCAimpl() #5 - - #################### - + dev.off() } + output$downloadPcaImage <- downloadHandler( filename = function() { fileType <- input$downloadPcaImageType @@ -464,34 +327,23 @@ output$downloadPcaImage <- downloadHandler( content = function(file) { fileType <- input$downloadPcaImageType plotPCA(file, fileType) - }#, - #contentType = 'image/png' + } ) + plotPCA <- function(file, fileType, plotMS2 = TRUE){ - ## 1 score ## 2 loadings - ## 3 ms2 ## 4 l anno - ## 5 l sel ## 6 l hea - ## 7 l ms2 - ## - ## 1 2 4 - ## 1 2 5 - ## 1 2 6 - ## 1 2 7 - ## 3 3 7 - ## - ## parameters widthInInch <- 10 - #widthInInch <- 10 * 4 / 5 heigthInInch <- ifelse(test = plotMS2, yes = 6, no = (5.2-1.5)/5.2 * 6) - #heigthInInch <- 6 * 4 / 5 resolutionInDPI <- 650 widthInPixel <- widthInInch * resolutionInDPI heightInPixel <- heigthInInch * resolutionInDPI switch(fileType, "png"={ - png(filename = file, width = widthInPixel, height = heightInPixel, res = resolutionInDPI, bg = "white") + png(filename = file, + width = widthInPixel, + height = heightInPixel, + res = resolutionInDPI, bg = "white") }, "svg"={ svg(filename = file) @@ -503,16 +355,12 @@ plotPCA <- function(file, fileType, plotMS2 = TRUE){ ) if(plotMS2){ - ############################# graphics::layout( - mat = matrix( - ################################ - data = c(1,1,2,2,1,1,2,2,3,3,4,4,3,3,4,4,5,5,5,5,6,6,6,6,7,7,7,7),nrow=4,ncol=7), - widths = c(0.85, 1.15), - heights = c(1.02,1.01) - ######################## - ) - ######################################### + mat = matrix( + data = c(1,1,2,2,1,1,2,2,3,3,4,4,3,3,4,4,5,5,5,5,6,6,6,6,7,7,7,7),nrow=4,ncol=7), + widths = c(0.85, 1.15), + heights = c(1.02,1.01) + ) } else { graphics::layout( mat = matrix( @@ -523,21 +371,15 @@ plotPCA <- function(file, fileType, plotMS2 = TRUE){ widths = c(2, 2, 1), heights = c(0.7, 0.6, 2.4) ) - ########## Adding this new - ############################ } - ############################################ drawPcaScoresPlotImpl1() - calcPlotScoresGroupsLegendForImage1(scoresGroups$groups, scoresGroups$colors, 30) + calcPlotScoresGroupsLegendForImage1(scoresGroups$grouXXXps, scoresGroups$colors, 30) drawPcaLoadingsPlotImpl() drawAnnotationLegendForImagePCAimpl() - ##plot.new() - #################### - #################### This is the end ####### dev.off() } -############################################## + output$downloadDistanceMatrix <- downloadHandler( filename = function() { createExportDistanceMatrixName(currentDistanceMatrixObj$distanceMeasure) @@ -547,6 +389,7 @@ output$downloadDistanceMatrix <- downloadHandler( }, contentType = 'text/csv' ) + ## download publication data output$downloadMsData <- downloadHandler( filename = function() { @@ -554,60 +397,74 @@ output$downloadMsData <- downloadHandler( }, content = function(file) { ## copy data for download - file.copy(getFile("Metabolite_profile_showcase.txt"), file) + file.copy(system.file("extdata/showcase/Metabolite_profile_showcase.txt", package = "MetFamily"), + file) }, contentType = "application/zip" ) + +## STN: should this really be a fixed fileName ?? output$downloadMsMsData <- downloadHandler( filename = function() { return("MSMS_library_showcase.msp") }, content = function(file) { ## copy data for download - file.copy(getFile("MSMS_library_showcase.msp"), file) + file.copy(system.file("extdata/showcase/Metabolite_profile_showcase.txt", package = "MetFamily"), + file) }, contentType = "application/zip" ) + +## STN: should this really be a fixed fileName ?? output$downloadFragmentMatrix <- downloadHandler( filename = function() { return("Fragment_matrix_showcase.csv") }, content = function(file) { ## copy data for download - file.copy(getFile("Fragment_matrix_showcase.csv"), file) + file.copy(system.file("Fragment_matrix_showcase.csv", package = "MetFamily"), + file) }, contentType = "application/zip" ) + output$downloadDocShowcaseProtocol <- downloadHandler( filename = function() { return("MetFamily_Showcase_protocol.pdf") }, content = function(file) { ## copy data for download - file.copy(getFile("MetFamily_Showcase_protocol.pdf"), file) + file.copy(system.file("extdata/showcase/MetFamily_Showcase_protocol.pdf", package = "MetFamily"), + file) }, contentType = "application/pdf" ) + output$downloadDocUserGuide <- downloadHandler( filename = function() { return("MetFamily_user_guide.pdf") }, content = function(file) { ## copy data for download - file.copy(getFile("MetFamily_user_guide.pdf"), file) + file.copy(system.file("extdata/showcase/MetFamily_user_guide.pdf", package = "MetFamily"), + file) }, contentType = "application/pdf" ) + output$downloadDocInputSpecification <- downloadHandler( filename = function() { return("MetFamily_Input_Specification.pdf") }, content = function(file) { ## copy data for download - file.copy(getFile("MetFamily_Input_Specification.pdf"), file) + file.copy(system.file("extdata/showcase/MetFamily_Input_Specification.pdf", package = "MetFamily"), + file) }, contentType = "application/pdf" ) + ######################################################################################### ## consensus spectrum for metabolite families output$downloadMetaboliteFamilyConsensusSpectrum <- downloadHandler( @@ -616,7 +473,8 @@ output$downloadMetaboliteFamilyConsensusSpectrum <- downloadHandler( }, content = function(file) { annotation <- allAnnotationNames[[input$familySelectionTable_rows_selected]] - precursorSet <- which(unlist(lapply(X = dataList$annoArrayOfLists, FUN = function(y){any(y==annotation)}))) + precursorSet <- which(unlist(lapply(X = dataList$annoArrayOfLists, + FUN = function(y){any(y==annotation)}))) returnObj <- getSpectrumStatistics(dataList = dataList, precursorSet = precursorSet) fragmentMasses = returnObj$fragmentMasses fragmentCounts = returnObj$fragmentCounts @@ -627,17 +485,20 @@ output$downloadMetaboliteFamilyConsensusSpectrum <- downloadHandler( "Frequency" = fragmentProportion ) - write.table(x = consensusSpectrumDf, file = file, sep = "\t", row.names = FALSE, quote = FALSE) + write.table(x = consensusSpectrumDf, file = file, sep = "\t", + row.names = FALSE, quote = FALSE) }, contentType = 'text/csv' ) + output$downloadMetaboliteFamilyFilteredPrecursors <- downloadHandler( filename = function() { createMetaboliteFamilyProjectFileName(allAnnotationNames[[input$familySelectionTable_rows_selected]]) }, content = function(file) { annotation <- allAnnotationNames[[input$familySelectionTable_rows_selected]] - precursorSet <- which(unlist(lapply(X = dataList$annoArrayOfLists, FUN = function(y){any(y==annotation)}))) + precursorSet <- which(unlist(lapply(X = dataList$annoArrayOfLists, + FUN = function(y){any(y==annotation)}))) writeTable(precursorSet = precursorSet, file = file) }, contentType = 'text/csv' @@ -667,9 +528,6 @@ createAnnotationResultTableAll <- function(){ if(!(precursorIndex %in% as.integer(names(classToSpectra_class[[classIdx]])))) next - #class <- names(classToSpectra_class)[[selectedRowIdx]] - #classToSpectra <- classToSpectra_class [[selectedRowIdx]] - class <- names(classToSpectra_class)[[classIdx]] pValue <- format(unname( classToSpectra_class[[classIdx]][[which( precursorIndex == as.integer(names(classToSpectra_class[[classIdx]])) )]] ), digits=4) rows[[length(rows)+1]] <- c( @@ -699,6 +557,7 @@ createAnnotationResultTableAll <- function(){ colnames(outputDf) <- head return(outputDf) } + createAnnotationResultTableForClass <- function(){ selectedRowIdx <- input$annotationResultTableClass_rows_selected @@ -711,15 +570,16 @@ createAnnotationResultTableForClass <- function(){ mzs <- dataList$dataFrameInfos[precursorIndeces, "m/z"] rts <- dataList$dataFrameInfos[precursorIndeces, "RT"] metaboliteNames <- dataList$dataFrameInfos[precursorIndeces, "Metabolite name"] - presentAnnotations <- unlist(lapply(X = dataList$annoArrayOfLists[precursorIndeces], FUN = function(x){ - if(length(x) == 0){ - return("") - } else { - x <- sort(unlist(x)) - s <- paste(x, collapse = "; ") - return(s) - } - })) + presentAnnotations <- unlist(lapply(X = dataList$annoArrayOfLists[precursorIndeces], + FUN = function(x){ + if(length(x) == 0){ + return("") + } else { + x <- sort(unlist(x)) + s <- paste(x, collapse = "; ") + return(s) + } + })) outputDf <- data.frame( "Index" = precursorIndeces, @@ -734,6 +594,7 @@ createAnnotationResultTableForClass <- function(){ return(outputDf) } + output$downloadAllAnnotationResults <- downloadHandler( filename = function() { createClassifierAnnotationName( "All" ) @@ -745,6 +606,7 @@ output$downloadAllAnnotationResults <- downloadHandler( }, contentType = 'text/csv' ) + output$downloadMetaboliteFamilyAnnotationResults <- downloadHandler( filename = function() { createClassifierAnnotationName( names(classToSpectra_class)[[input$annotationResultTableClass_rows_selected]] ) diff --git a/inst/MetFamily/app_files/server_functionsFilters.R b/inst/MetFamily/app_files/server_functionsFilters.R index 2f16d91..c8bcdd8 100644 --- a/inst/MetFamily/app_files/server_functionsFilters.R +++ b/inst/MetFamily/app_files/server_functionsFilters.R @@ -19,10 +19,10 @@ resetWorkspaceFunctions <- c(resetWorkspaceFunctions, function(){ ######################################################################################### ## update filter - sampleSet <- dataList$groupSampleDataFrame[, "Sample"][!dataList$groupSampleDataFrame[, "Exclude"]] - filter <- doPerformFiltering(dataList$groups, sampleSet, FALSE, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, TRUE)$filter - if(length(dataList$groups) == 1) - filter2 <- doPerformFiltering(c(dataList$groups[[1]], dataList$groups[[1]]), NULL, FALSE, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, TRUE)$filter + sampleSet <- dataList$grouXXXpsampleDataFrame[, "Sample"][!dataList$groupSampleDataFrame[, "Exclude"]] + filter <- doPerformFiltering(dataList$grouXXXps, sampleSet, FALSE, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, TRUE)$filter + if(length(dataList$grouXXXps) == 1) + filter2 <- doPerformFiltering(c(dataList$grouXXXps[[1]], dataList$grouXXXps[[1]]), NULL, FALSE, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, TRUE)$filter else filter2 <- filter @@ -48,18 +48,18 @@ resetWorkspaceFunctions <- c(resetWorkspaceFunctions, function(){ ######################################################################################### ## update filter input values - ## groups - switch(as.character(length(dataList$groups)), + ## grouXXXps + switch(as.character(length(dataList$grouXXXps)), "0"={ - stop("No groups available") + stop("No grouXXXps available") }, "1"={ - selectedOne <- dataList$groups[[1]] - selectedTwo <- dataList$groups[[1]] + selectedOne <- dataList$grouXXXps[[1]] + selectedTwo <- dataList$grouXXXps[[1]] }, { - selectedOne <- dataList$groups[[1]] - selectedTwo <- dataList$groups[[2]] + selectedOne <- dataList$grouXXXps[[1]] + selectedTwo <- dataList$grouXXXps[[2]] } ) @@ -72,14 +72,14 @@ resetWorkspaceFunctions <- c(resetWorkspaceFunctions, function(){ updateTextInput(session = session, inputId = "globalFilter_ms2_ppm", value = "20") ## input fields: HCA filter - updateRadioButtons(session = session, inputId = "hcaFilterGroupOne", choices = dataList$groups, selected = selectedOne) - updateRadioButtons(session = session, inputId = "hcaFilterGroupTwo", choices = dataList$groups, selected = selectedTwo) + updateRadioButtons(session = session, inputId = "hcaFilterGroupOne", choices = dataList$grouXXXps, selected = selectedOne) + updateRadioButtons(session = session, inputId = "hcaFilterGroupTwo", choices = dataList$grouXXXps, selected = selectedTwo) updateTextInput(session = session, inputId = "hcaFilter_average", value = "0") updateTextInput(session = session, inputId = "hcaFilter_lfc", value = "0") updateCheckboxInput(session = session, inputId = "hcaFilterIncludeIgnoredPrecursors", value = FALSE) ## input fields: PCA filter - updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$groups, selected = dataList$groups) + updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$grouXXXps, selected = dataList$grouXXXps) updateCheckboxGroupInput(session = session, inputId = "pcaSamples", choices = sampleNames, selected = sampleNames) updateTextInput(session = session, inputId = "pcaFilter_average", value = "0") updateTextInput(session = session, inputId = "pcaFilter_lfc", value = "0") @@ -295,7 +295,7 @@ doPerformFiltering_impl <- function(groupSet, sampleSet, filterBySamples, filter filterHere <- filterData( dataList = dataList, #groupOne = groupOne, groupTwo = groupTwo, - groups = groupSet, sampleSet, filterBySamples, filter_average = filter_average, filter_lfc = filter_lfc, + grouXXXps = groupSet, sampleSet, filterBySamples, filter_average = filter_average, filter_lfc = filter_lfc, filterList_ms2_masses = filterList_ms2_masses, filter_ms2_ppm = filter_ms2_ppm, filter_ms1_masses = filter_ms1_masses, filter_ms1_ppm = filter_ms1_ppm, includeIgnoredPrecursors = includeIgnoredPrecursors, @@ -382,7 +382,7 @@ checkPcaFilterValidity <- function(numberOfPrecursorsFiltered){ } applyGlobalMS2filters <- function(filter_ms2_masses1, filter_ms2_masses2, filter_ms2_masses3, filter_ms2_ppm){ - groupSet <- dataList$groups + groupSet <- dataList$grouXXXps filter_average <- NULL filter_lfc <- NULL includeIgnoredPrecursors <- TRUE @@ -504,10 +504,10 @@ obsApplyPcaFilters <- observeEvent(input$applyPcaFilters, { includeIgnoredPrecursors <- input$pcaFilterIncludeIgnoredPrecursors if(filterBySamples){ - ## update groups and samples mutually + ## update grouXXXps and samples mutually - ## groups which are covered by at least one sample - groupsFromSamples <- unlist(lapply(X = dataList$groups, FUN = function(x){ + ## grouXXXps which are covered by at least one sample + groupsFromSamples <- unlist(lapply(X = dataList$grouXXXps, FUN = function(x){ samplesOfGroups <- dataList$dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) if(any(samplesOfGroups %in% sampleSet)) return(x) @@ -516,7 +516,7 @@ obsApplyPcaFilters <- observeEvent(input$applyPcaFilters, { })) ## samples which ae covered by a group - samplesFromGroups <- dataList$dataColumnsNameFunctionFromGroupNames(groups = groupSet, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + samplesFromGroups <- dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = groupSet, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) groupSet <- intersect(groupSet, groupsFromSamples) sampleSet <- intersect(sampleSet, samplesFromGroups) } else { @@ -543,7 +543,7 @@ obsClearPcaFilters <- observeEvent(input$clearPcaFilters, { ################################################# ## get inputs - groupSet <- dataList$groups + groupSet <- dataList$grouXXXps sampleSet <- dataList$groupSampleDataFrame[, "Sample"][!dataList$groupSampleDataFrame[, "Exclude"]] filterByPCAgroupSamples <- TRUE filter_average <- "" diff --git a/inst/MetFamily/app_files/server_functionsSerialization.R b/inst/MetFamily/app_files/server_functionsSerialization.R index 6f964d5..c4f46e8 100644 --- a/inst/MetFamily/app_files/server_functionsSerialization.R +++ b/inst/MetFamily/app_files/server_functionsSerialization.R @@ -93,10 +93,10 @@ deserialization <- function(serialization){ ## update parameter fields #updateTextInput(session = session, inputId = "globalFilter_ms2_masses1", value = "") - #updateRadioButtons(session = session, inputId = "hcaFilterGroupOne", choices = dataList$groups, selected = selectedOne) + #updateRadioButtons(session = session, inputId = "hcaFilterGroupOne", choices = dataList$grouXXXps, selected = selectedOne) #updateCheckboxInput(session = session, inputId = "hcaFilterIncludeIgnoredPrecursors", value = FALSE) #updateSelectInput(session = session, inputId = "presentAnnotationValue", choices = c("[init]"), selected = lalala) - #updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$groups, selected = dataList$groups) + #updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$grouXXXps, selected = dataList$grouXXXps) ## global MS2 filter updateTextInput( session = session, inputId = "globalFilter_ms2_masses1", value = paramsList$globalFilter_ms2_masses1) @@ -162,7 +162,7 @@ deserialization <- function(serialization){ filter_ms2_masses3 <- paramsList$globalFilter_ms2_masses3 filter_ms2_ppm <- paramsList$globalFilter_ms2_ppm - groupSet <- dataList$groups + groupSet <- dataList$grouXXXps sampleSet <- dataList$groupSampleDataFrame[, "Sample"][!dataList$groupSampleDataFrame[, "Exclude"]] filterBySamples <- TRUE filter_average <- NULL @@ -278,9 +278,9 @@ deserialization <- function(serialization){ filter_lfc <- NULL filter_average <- NULL - groupSet <- dataList$groups + groupSet <- dataList$grouXXXps sampleSet <- dataList$includedSamples(dataList$groupSampleDataFrame) - #sampleSet <- dataList$dataColumnsNameFunctionFromGroupNames(groups = groupSet, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + #sampleSet <- dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = groupSet, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) filterBySamples <- TRUE includeIgnoredPrecursors <- paramsListsearchIncludeIgnoredPrecursors diff --git a/inst/MetFamily/app_files/server_guiPlots.R b/inst/MetFamily/app_files/server_guiPlots.R index f9db668..754fb41 100644 --- a/inst/MetFamily/app_files/server_guiPlots.R +++ b/inst/MetFamily/app_files/server_guiPlots.R @@ -258,7 +258,7 @@ drawAnnotationLegendForImagePCAimpl <- function(){ calcPlotAnnoLegendForImage(state_tabPca$annotationsPca$setOfAnnotations, state_tabPca$annotationsPca$setOfColors) } drawScoresGroupsLegendImpl <- function(){ - calcPlotScoresGroupsLegend(scoresGroups$groups, scoresGroups$colors) + calcPlotScoresGroupsLegend(scoresGroups$grouXXXps, scoresGroups$colors) } drawMS2PlotImpl <- function(){ diff --git a/inst/MetFamily/app_files/server_guiTabHca.R b/inst/MetFamily/app_files/server_guiTabHca.R index 4e570da..15e6bfc 100644 --- a/inst/MetFamily/app_files/server_guiTabHca.R +++ b/inst/MetFamily/app_files/server_guiTabHca.R @@ -104,10 +104,10 @@ drawDendrogramPlotImpl_forPlotly <- function(){ numberOfGroups <- 3 }, "Abundance by group"={## groups - numberOfGroups <- length(dataList$groups) + numberOfGroups <- length(dataList$grouXXXps) }, "Abundance by sample"={## samples - numberOfGroups <- length(dataList$dataColumnsNameFunctionFromGroupNames(groups = groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))) + numberOfGroups <- length(dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))) }, {## unknown state stop(paste("Unknown heatmapContent value", heatmapContent)) @@ -637,8 +637,8 @@ obsHeatmaphover <- observeEvent(input$plotHeatmap_hover, { if(hoverY > 2){ ## lcf - groupOne <- filterHca$groups[[1]] - groupTwo <- filterHca$groups[[2]] + groupOne <- filterHca$grouXXXps[[1]] + groupTwo <- filterHca$grouXXXps[[2]] msg[[length(msg) + 1]] <- paste("log-fold-change = log_2( mean(group ", groupOne, ") / mean(group ", groupTwo, ") )", sep = "") valMeanOne <- dataList$dataFrameMeasurements[precursorIndex, dataList$dataMeanColumnNameFunctionFromName(groupOne)] @@ -654,17 +654,17 @@ obsHeatmaphover <- observeEvent(input$plotHeatmap_hover, { } else { if(hoverY > 1){ ## group 1 - groupHere <- filterHca$groups[[1]] + groupHere <- filterHca$grouXXXps[[1]] } else { ## hoverY <= 1 ## group 2 - groupHere <- filterHca$groups[[2]] + groupHere <- filterHca$grouXXXps[[2]] } msg[[length(msg) + 1]] <- paste("Mean abundance of group ", groupHere, ": ", sep = "") valMean <- dataList$dataFrameMeasurements[precursorIndex, dataList$dataMeanColumnNameFunctionFromName(groupHere)] msg[[length(msg) + 1]] <- as.numeric(format(x = valMean, digits = 2)) msg[[length(msg) + 1]] <- " = mean(" #columnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = groupHere, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) - columnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = groupHere, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, groups = groupHere)) + columnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = groupHere, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, grouXXXps = groupHere)) vals <- dataList$dataFrameMeasurements[precursorIndex, columnNames] vals <- as.numeric(format(x = vals, digits = 2)) msg[[length(msg) + 1]] <- paste(vals, collapse = ", ") @@ -702,7 +702,7 @@ output$plotHeatmap_hover_info <- renderUI({ treeLeafIndex <- clusterDataList$cluster$order[[treeLeafIndex2]] precursorIndex <- filterHca$filter[[treeLeafIndex]] - ## differentiate heatmap content: LFC, samples, groups + ## differentiate heatmap content: LFC, samples, grouXXXps columnOfInterest <- columnsOfInterest[[ceiling(hoverY)]] #print("entering the line ...line 707") #print(columnOfInterest) @@ -714,13 +714,13 @@ output$plotHeatmap_hover_info <- renderUI({ ## lcf #print("enter the line ...line 714") ######## - groups <- dataList$lfcColumnNameFunctionFromString(columnOfInterest) + grouXXXps <- dataList$lfcColumnNameFunctionFromString(columnOfInterest) ####### #print(columnOfInterest) - #print(groups) + #print(grouXXXps) ####### - groupOne <- groups[[1]] - groupTwo <- groups[[2]] + groupOne <- grouXXXps[[1]] + groupTwo <- grouXXXps[[2]] #msg[[length(msg) + 1]] <- paste("log-fold-change = log_2( mean(group ", groupOne, ") / mean(group ", groupTwo, ") )", sep = "") valMeanOne <- dataList$dataFrameMeasurements[precursorIndex, dataList$dataMeanColumnNameFunctionFromName(groupOne)] @@ -737,8 +737,8 @@ output$plotHeatmap_hover_info <- renderUI({ lfc <- format(x = lfc, digits = 2) #msg[[length(msg) + 1]] <- lfc - #samplesOne <- dataList$dataColumnsNameFunctionFromGroupName(group = groupOne, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, groups = groupOne)) - #samplesTwo <- dataList$dataColumnsNameFunctionFromGroupName(group = groupTwo, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, groups = groupTwo)) + #samplesOne <- dataList$dataColumnsNameFunctionFromGroupName(group = groupOne, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, grouXXXps = groupOne)) + #samplesTwo <- dataList$dataColumnsNameFunctionFromGroupName(group = groupTwo, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, grouXXXps = groupTwo)) info <- paste( "MS\u00B9 feature: ", "", dataList$precursorLabels[[precursorIndex]], "
", @@ -765,7 +765,7 @@ output$plotHeatmap_hover_info <- renderUI({ #msg[[length(msg) + 1]] <- valMean #msg[[length(msg) + 1]] <- " = mean(" #columnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = groupHere, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) - columnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = groupHere, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, groups = groupHere)) + columnNames <- dataList$dataColumnsNameFunctionFromGroupName(group = groupHere, sampleNamesToExclude = dataList$excludedSamples(groupSampleDataFrame = dataList$groupSampleDataFrame, grouXXXps = groupHere)) vals <- dataList$dataFrameMeasurements[precursorIndex, columnNames] vals <- format(x = vals, digits = 2) #msg[[length(msg) + 1]] <- paste(vals, collapse = ", ") diff --git a/inst/MetFamily/app_files/server_guiTabInput.R b/inst/MetFamily/app_files/server_guiTabInput.R index 59fb8fb..b7a19c0 100644 --- a/inst/MetFamily/app_files/server_guiTabInput.R +++ b/inst/MetFamily/app_files/server_guiTabInput.R @@ -1,581 +1,582 @@ - -## data import: fixed parameters -proportionOfMatchingPeaks_ms2PeakGroupDeisotoping <- 0.9 -mzDeviationAbsolute_mapping <- 0.01 -#minimumNumberOfMS2PeaksPerGroup <- 1 - -## data -dataList <- NULL -state_tabInput <- reactiveValues( - importedOrLoadedFile_s_ = NULL -) -resetWorkspaceFunctions <- c(resetWorkspaceFunctions, function(){ - print("Reset tabInput state") - state_tabInput$importedOrLoadedFile_s_ <<- NULL -}) - -enableLoadButtons <- function(){ - #session$sendCustomMessage("enableButton", "loadProjectData") - #session$sendCustomMessage("enableButton", "loadExampleData") - #session$sendCustomMessage("enableButton", "importMs1Ms2Data") - #session$sendCustomMessage("enableButton", "importMs2Data") - shinyjs::enable("loadProjectData") - shinyjs::enable("loadExampleData") - shinyjs::enable("importMs1Ms2Data") - shinyjs::enable("importMs2Data") -} -disableLoadButtons <- function(){ - session$sendCustomMessage("disableButton", "loadProjectData") - session$sendCustomMessage("disableButton", "loadExampleData") - session$sendCustomMessage("disableButton", "importMs1Ms2Data") - session$sendCustomMessage("disableButton", "importMs2Data") - shinyjs::disable("loadProjectData") - shinyjs::disable("loadExampleData") - shinyjs::disable("importMs1Ms2Data") - shinyjs::disable("importMs2Data") -} -obsFile <- observeEvent(input$matrixFile$datapath, { - filePath <- input$matrixFile$datapath - fileName <- input$matrixFile$name - print(paste("Observe file for data", fileName)) - if(!is.null(filePath)) - shinyjs::enable("loadProjectData") - - updateFileInputInfo() -}) -obsLoadProjectData <- observeEvent(input$loadProjectData, { - disableLoadButtons() - loadProjectData <- as.numeric(input$loadProjectData) - print(paste("Observe loadProjectData", loadProjectData)) - - ################################################# - ## check if button was hit - #if(loadProjectData == loadProjectDataButtonValue) - # return() - #loadProjectDataButtonValue <<- loadProjectData - - ################################################# - ## files - filePath <- input$matrixFile$datapath - loadProjectFile(filePath = filePath) - enableLoadButtons() -}) -obsLoadExampleData <- observeEvent(input$loadExampleData, { - disableLoadButtons() - loadExampleData <- as.numeric(input$loadExampleData) - print(paste("Observe loadExampleData", loadExampleData)) - - ################################################# - ## check if button was hit - #if(loadExampleData == loadExampleDataButtonValue) - # return() - #loadExampleDataButtonValue <<- loadExampleData - - ################################################# - ## files - filePath <- getFile("Project_file_showcase_annotated.csv.gz") - loadProjectFile(filePath = filePath) - enableLoadButtons() -}) -loadProjectFile <- function(filePath){ - fileName <- basename(filePath) - ######################################################################################### - ## read data - - error <<- NULL - withProgress(message = 'Reading file...', value = 0, { - dataList <<- tryCatch( - { - readClusterDataFromProjectFile(file = filePath, progress = TRUE) - }, - error = function(e) { - print(e) - error <<- e - } - ) - }) - - if(!is.null(error)){ - print(paste("readClusterDataFromProjectFile resulted in error:", error)) - msg <- paste("An error occurred while reading the input files. Please check the file format and content and try again. The error was", error) - output$fileInfo <- renderText({msg}) - #session$sendCustomMessage("enableButton", buttonId) - #shinyBS::addPopover(session = session, id = "fileInputSelection", title = "Error", content = "huhu") - - msg <- paste( - "An error occurred while reading the input files.", - "Please check the file format and content and try again.", - "The error was:", - "
", - error - ) - showErrorDialog(msg) - - return() - } - print(paste("readClusterDataFromProjectFile finished", dataList$minimumMass)) - - resetWorkspace() - - state_tabInput$importedOrLoadedFile_s_ <<- fileName - updateFileInputInfo() -} -obsImportMs1DataFile <- observeEvent(input$ms1DataFile$datapath, { - fileMs1Path <- input$ms1DataFile$datapath - fileMs1Name <- input$ms1DataFile$name - fileMs2Path <- input$ms2DataFile$datapath - fileMs2Name <- input$ms2DataFile$name - print(paste("Observe import MS1 file", fileMs1Name)) - - if(all(!is.null(fileMs1Path), !is.null(fileMs2Path))) - shinyjs::enable("importMs1Ms2Data") - else - shinyjs::disable("importMs1Ms2Data") - - updateFileInputInfo() -}) -obsImportMs2DataFile <- observeEvent(input$ms2DataFile$datapath, { - setImportState() -}) -setImportState <- function(){ - fileMs1Path <- input$ms1DataFile$datapath - fileMs1Name <- input$ms1DataFile$name - fileMs2Path <- input$ms2DataFile$datapath - fileMs2Name <- input$ms2DataFile$name - print(paste("Observe import MS2 file", fileMs2Name)) - - if(all(!is.null(fileMs1Path), !is.null(fileMs2Path))) - shinyjs::enable("importMs1Ms2Data") - else - shinyjs::disable("importMs1Ms2Data") - - if(!is.null(fileMs2Path)) - shinyjs::enable("importMs2Data") - else - shinyjs::disable("importMs2Data") - - updateFileInputInfo() -} -obsImportMs1Ms2Data <- observeEvent(input$importMs1Ms2Data, { - disableLoadButtons() - importMs1Ms2Data <- as.numeric(input$importMs1Ms2Data) - - print(paste("Observe importMs1Ms2Data", importMs1Ms2Data)) - - ################################################# - ## check if button was hit - #if(importMs1Ms2Data == importMs1Ms2DataButtonValue) - # return() - #importMs1Ms2DataButtonValue <<- importMs1Ms2Data - - importData(TRUE) - enableLoadButtons() -}) -obsImportMs2Data <- observeEvent(input$importMs2Data, { - enableLoadButtons() - importMs2Data <- as.numeric(input$importMs2Data) - - print(paste("Observe importMs2Data", importMs2Data)) - - ################################################# - ## check if button was hit - #if(importMs2Data == importMs2DataButtonValue) - # return() - #importMs2DataButtonValue <<- importMs2Data - - importData(FALSE) - disableLoadButtons() -}) -importData <- function(importMS1andMS2data){ - ################################################# - ## files - if(importMS1andMS2data){ - fileMs1Path <- input$ms1DataFile$datapath - fileMs1Name <- input$ms1DataFile$name - } else { - fileMs1Path <- NULL - fileMs1Name <- NULL - } - fileMs2Path <- input$ms2DataFile$datapath - fileMs2Name <- input$ms2DataFile$name - - ################################################# - ## params - - ## project name - projectName <- input$projectName - projectName <- gsub(";", "_", gsub(",", "_", gsub("\t", "_", projectName))) - projectDescription <- input$projectDescription - projectDescription <- gsub(";", "_", gsub(",", "_", gsub("\t", "_", projectDescription))) - - ## minimum MS2 peak intensity - minimumIntensityOfMaximalMS2peak <- input$minimumIntensityOfMaximalMS2peak - minimumProportionOfMS2peaks <- input$minimumProportionOfMS2peaks - ## grouping of MS2 peaks - mzDeviationAbsolute_grouping <- input$mzDeviationAbsolute_grouping - mzDeviationInPPM_grouping <- input$mzDeviationInPPM_grouping - ## precursor deisotoping - doPrecursorDeisotoping <- input$doPrecursorDeisotoping - mzDeviationAbsolute_precursorDeisotoping <- input$mzDeviationAbsolute_precursorDeisotoping - mzDeviationInPPM_precursorDeisotoping <- input$mzDeviationInPPM_precursorDeisotoping - maximumRtDifference <- input$maximumRtDifference - ## fragment deisotoping - doMs2PeakGroupDeisotoping <- input$doMs2PeakGroupDeisotoping - mzDeviationAbsolute_ms2PeakGroupDeisotoping <- input$mzDeviationAbsolute_ms2PeakGroupDeisotoping - mzDeviationInPPM_ms2PeakGroupDeisotoping <- input$mzDeviationInPPM_ms2PeakGroupDeisotoping - ## neutral losses - neutralLossesPrecursorToFragments <- input$neutralLossesPrecursorToFragments - neutralLossesFragmentsToFragments <- input$neutralLossesFragmentsToFragments - #neutralLossesPrecursorToFragments <- TRUE - #neutralLossesFragmentsToFragments <- FALSE - - ## fixed - proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere <- proportionOfMatchingPeaks_ms2PeakGroupDeisotoping - mzDeviationAbsolute_mappingHere <- mzDeviationAbsolute_mapping - #minimumNumberOfMS2PeaksPerGroupHere <- minimumNumberOfMS2PeaksPerGroup - - ################################################# - ## check params - error <- FALSE - if(any(is.null(minimumIntensityOfMaximalMS2peak), length(minimumIntensityOfMaximalMS2peak) == 0, nchar(minimumIntensityOfMaximalMS2peak) == 0)) - error <- TRUE - else{ - minimumIntensityOfMaximalMS2peak <- as.numeric(minimumIntensityOfMaximalMS2peak) - error <- error | is.na(minimumIntensityOfMaximalMS2peak) - } - if(any(is.null(minimumProportionOfMS2peaks), length(minimumProportionOfMS2peaks) == 0, nchar(minimumProportionOfMS2peaks) == 0)) - error <- TRUE - else{ - minimumProportionOfMS2peaks <- as.numeric(minimumProportionOfMS2peaks) - error <- error | is.na(minimumProportionOfMS2peaks) - } - if(any(is.null(mzDeviationAbsolute_grouping), length(mzDeviationAbsolute_grouping) == 0, nchar(mzDeviationAbsolute_grouping) == 0)) - error <- TRUE - else{ - mzDeviationAbsolute_grouping <- as.numeric(mzDeviationAbsolute_grouping) - error <- error | is.na(mzDeviationAbsolute_grouping) - } - if(any(is.null(mzDeviationInPPM_grouping), length(mzDeviationInPPM_grouping) == 0, nchar(mzDeviationInPPM_grouping) == 0)) - error <- TRUE - else{ - mzDeviationInPPM_grouping <- as.numeric(mzDeviationInPPM_grouping) - error <- error | is.na(mzDeviationInPPM_grouping) - } - if(doPrecursorDeisotoping){ - if(any(is.null(mzDeviationAbsolute_precursorDeisotoping), length(mzDeviationAbsolute_precursorDeisotoping) == 0, nchar(mzDeviationAbsolute_precursorDeisotoping) == 0)) - error <- TRUE - else{ - mzDeviationAbsolute_precursorDeisotoping <- as.numeric(mzDeviationAbsolute_precursorDeisotoping) - error <- error | is.na(mzDeviationAbsolute_precursorDeisotoping) - } - if(any(is.null(mzDeviationInPPM_precursorDeisotoping), length(mzDeviationInPPM_precursorDeisotoping) == 0, nchar(mzDeviationInPPM_precursorDeisotoping) == 0)) - error <- TRUE - else{ - mzDeviationInPPM_precursorDeisotoping <- as.numeric(mzDeviationInPPM_precursorDeisotoping) - error <- error | is.na(mzDeviationInPPM_precursorDeisotoping) - } - } - if(any(is.null(maximumRtDifference), length(maximumRtDifference) == 0, nchar(maximumRtDifference) == 0)) - error <- TRUE - else{ - maximumRtDifference <- as.numeric(maximumRtDifference) - error <- error | is.na(maximumRtDifference) - } - if(doMs2PeakGroupDeisotoping){ - if(any(is.null(mzDeviationAbsolute_ms2PeakGroupDeisotoping), length(mzDeviationAbsolute_ms2PeakGroupDeisotoping) == 0, nchar(mzDeviationAbsolute_ms2PeakGroupDeisotoping) == 0)) - error <- TRUE - else{ - mzDeviationAbsolute_ms2PeakGroupDeisotoping <- as.numeric(mzDeviationAbsolute_ms2PeakGroupDeisotoping) - error <- error | is.na(mzDeviationAbsolute_ms2PeakGroupDeisotoping) - } - if(any(is.null(mzDeviationInPPM_ms2PeakGroupDeisotoping), length(mzDeviationInPPM_ms2PeakGroupDeisotoping) == 0, nchar(mzDeviationInPPM_ms2PeakGroupDeisotoping) == 0)) - error <- TRUE - else{ - mzDeviationInPPM_ms2PeakGroupDeisotoping <- as.numeric(mzDeviationInPPM_ms2PeakGroupDeisotoping) - error <- error | is.na(mzDeviationInPPM_ms2PeakGroupDeisotoping) - } - } - - if(any(is.null(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere), length(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) == 0, nchar(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) == 0)) - error <- TRUE - else{ - proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere <- as.numeric(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) - error <- error | is.na(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) - } - if(any(is.null(mzDeviationAbsolute_mappingHere), length(mzDeviationAbsolute_mappingHere) == 0, nchar(mzDeviationAbsolute_mappingHere) == 0)) - error <- TRUE - else{ - mzDeviationAbsolute_mappingHere <- as.numeric(mzDeviationAbsolute_mappingHere) - error <- error | is.na(mzDeviationAbsolute_mappingHere) - } - # if(any(is.null(minimumNumberOfMS2PeaksPerGroupHere), length(minimumNumberOfMS2PeaksPerGroupHere) == 0, nchar(minimumNumberOfMS2PeaksPerGroupHere) == 0)) - # error <- TRUE - # else{ - # minimumNumberOfMS2PeaksPerGroupHere <- as.numeric(minimumNumberOfMS2PeaksPerGroupHere) - # error <- error | is.na(minimumNumberOfMS2PeaksPerGroupHere) - # } - - if(error){ - setImportState() - output$fileInfo <- renderText({paste("There are invalid parameter values. Please check the parameters and press 'Import MS\u00B9 and MS/MS data' again.")}) - return() - } - - ## box parameters - print(paste("Observe importMs1Ms2Data", "e", error, "mi", minimumIntensityOfMaximalMS2peak, "mp", minimumProportionOfMS2peaks, "ga", mzDeviationAbsolute_grouping, "gr", mzDeviationInPPM_grouping, "pd", doPrecursorDeisotoping, "pa", mzDeviationAbsolute_precursorDeisotoping, "pr", mzDeviationInPPM_precursorDeisotoping, "mr", maximumRtDifference, "fd", doMs2PeakGroupDeisotoping, "fa", mzDeviationAbsolute_ms2PeakGroupDeisotoping, "fr", mzDeviationInPPM_ms2PeakGroupDeisotoping, "pm", proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere, "ma", mzDeviationAbsolute_mappingHere)) - parameterSet <- list() - parameterSet$projectName <- projectName - parameterSet$projectDescription <- projectDescription - parameterSet$toolVersion <- paste(toolName, toolVersion, sep = " ") - parameterSet$minimumIntensityOfMaximalMS2peak <- minimumIntensityOfMaximalMS2peak - parameterSet$minimumProportionOfMS2peaks <- minimumProportionOfMS2peaks - parameterSet$mzDeviationAbsolute_grouping <- mzDeviationAbsolute_grouping - parameterSet$mzDeviationInPPM_grouping <- mzDeviationInPPM_grouping - parameterSet$doPrecursorDeisotoping <- doPrecursorDeisotoping - parameterSet$mzDeviationAbsolute_precursorDeisotoping <- mzDeviationAbsolute_precursorDeisotoping - parameterSet$mzDeviationInPPM_precursorDeisotoping <- mzDeviationInPPM_precursorDeisotoping - parameterSet$maximumRtDifference <- maximumRtDifference - parameterSet$doMs2PeakGroupDeisotoping <- doMs2PeakGroupDeisotoping - parameterSet$mzDeviationAbsolute_ms2PeakGroupDeisotoping <- mzDeviationAbsolute_ms2PeakGroupDeisotoping - parameterSet$mzDeviationInPPM_ms2PeakGroupDeisotoping <- mzDeviationInPPM_ms2PeakGroupDeisotoping - parameterSet$proportionOfMatchingPeaks_ms2PeakGroupDeisotoping <- proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere - parameterSet$mzDeviationAbsolute_mapping <- mzDeviationAbsolute_mappingHere - #parameterSet$minimumNumberOfMS2PeaksPerGroup <- minimumNumberOfMS2PeaksPerGroupHere - parameterSet$neutralLossesPrecursorToFragments <- neutralLossesPrecursorToFragments - parameterSet$neutralLossesFragmentsToFragments <- neutralLossesFragmentsToFragments - - ################################################# - ## convert to project file - - ## built matrix - error <- NULL - withProgress(message = 'Generating matrix...', value = 0, { - resultObj <- tryCatch( - { - convertToProjectFile( - filePeakMatrix = fileMs1Path, - fileSpectra = fileMs2Path, - parameterSet = parameterSet, - progress = TRUE - ) - }, error = function(e) { - error <<- e - } - ) - }) - - if(!is.null(error)){ - msg <- paste( - "There occurred an error while processing the input file. Please check the file format and content and try again.", "\n", - "Occurred error: ", error, sep = "" - ) - output$fileInfo <- renderText({msg}) - showErrorDialog(msg) - setImportState() - return() - } - if(length(resultObj) == 1){ - if(resultObj == "Number of spectra is zero"){ - msg <- paste("There are no MS/MS spectra which fulfill the given criteria. Please refine parameter 'Spectrum intensity' and try 'Import MS\u00B9 and MS/MS data' again.") - output$fileInfo <- renderText({msg}) - showErrorDialog(msg) - setImportState() - return() - } - } - - error <- NULL - withProgress(message = 'Processing matrix...', value = 0, { - lines <- sparseMatrixToString(matrixRows = resultObj$matrixRows, matrixCols = resultObj$matrixCols, matrixVals = resultObj$matrixVals, parameterSet = parameterSet) - - ################################################# - ## process project file - - dataList <<- tryCatch({ - readProjectData(fileLines = lines, progress = TRUE) - }, error = function(e) { - error <<- e - } - ) - }) - - if(!is.null(error)){ - msg <- paste( - "There occurred an error while processing the input file. Please check the file format and content and try again.", "\n", - "Occurred error: ", error, sep = "" - ) - output$fileInfo <- renderText({msg}) - showErrorDialog(msg) - setImportState() - return() - } - - print(paste("readProjectData do data finished", dataList$minimumMass)) - - spectraImport <- paste( - ## spectra - resultObj$numberOfParsedSpectra, " / ", resultObj$numberOfSpectraOriginal, " spectra were imported successfully.", - ifelse(test = resultObj$numberOfParsedSpectra < resultObj$numberOfSpectraOriginal, yes = paste(" (",paste( Filter(nchar, c( - ifelse(test = resultObj$numberOfSpectraDiscardedDueToNoPeaks > 0, yes = paste(resultObj$numberOfSpectraDiscardedDueToNoPeaks, " empty", sep = ""), no = ""), - ifelse(test = resultObj$numberOfSpectraDiscardedDueToMaxIntensity > 0, yes = paste(resultObj$numberOfSpectraDiscardedDueToMaxIntensity, " low intensity", sep = ""), no = ""), - ifelse(test = resultObj$numberOfSpectraDiscardedDueToTooHeavy > 0, yes = paste(resultObj$numberOfSpectraDiscardedDueToTooHeavy, " too heavy", sep = ""), no = "") - )), collapse = ", "), ")", sep = ""), no = ""), - sep = "" - ) - spectraMapping <- paste( - ## mapping - resultObj$numberOfPrecursors, " / ", resultObj$numberOfParsedSpectra, " spectra were successfully mapped to MS\u00B9 features.", - ifelse(test = resultObj$numberOfPrecursors < resultObj$numberOfParsedSpectra, yes = paste(" (",paste( Filter(nchar, c( - #ifelse(test = resultObj$numberOfUnmappedPrecursorsMz > 0, yes = paste(resultObj$numberOfUnmappedPrecursorsMz, " with m/z deviation", sep = ""), no = ""), - #ifelse(test = resultObj$numberOfUnmappedPrecursorsRt > 0, yes = paste(resultObj$numberOfUnmappedPrecursorsRt, " with RT deviation", sep = ""), no = "") - ifelse(test = resultObj$numberOfUnmappedSpectra > 0, yes = paste(resultObj$numberOfUnmappedSpectra, " unmapped", sep = ""), no = "") - )), collapse = ", "), ")", sep = ""), no = ""), - sep = "" - ) - fragmentImport <- paste( - ## fragments - resultObj$numberOfMS2PeaksAboveThreshold, " / ", resultObj$numberOfMS2PeaksOriginal, " fragments were successfully imported.", - ifelse(test = resultObj$numberOfMS2PeaksAboveThreshold < resultObj$numberOfMS2PeaksOriginal, yes = paste(" (",paste( Filter(nchar, c( - ifelse(test = resultObj$numberOfTooHeavyFragments > 0, yes = paste(resultObj$numberOfTooHeavyFragments, " too heavy", sep = ""), no = ""), - ifelse(test = resultObj$numberOfMS2PeaksBelowThreshold > 0, yes = paste(resultObj$numberOfMS2PeaksBelowThreshold, " low intensity", sep = ""), no = "") - )), collapse = ", "), ")", sep = ""), no = ""), - sep = "" - ) - featureImport <- paste( - ## MS1 features - resultObj$numberOfPrecursors, " / ", resultObj$numberOfParsedMs1Features, " MS\u00B9 features were successfully imported.", - ifelse(test = resultObj$numberOfPrecursors < resultObj$numberOfParsedMs1Features, yes = paste(" (",paste( Filter(nchar, c( - ifelse(test = resultObj$numberOfRemovedPrecursorIsotopePeaks > 0, yes = paste(resultObj$numberOfRemovedPrecursorIsotopePeaks, " were isotopes", sep = ""), no = ""), - ifelse(test = resultObj$numberOfUnmappedPrecursors > 0, yes = paste(resultObj$numberOfUnmappedPrecursors, " without spectra", sep = ""), no = ""), - ifelse(test = resultObj$numberOfDuplicatedPrecursors > 0, yes = paste(resultObj$numberOfDuplicatedPrecursors, " duplicated", sep = ""), no = "") - )), collapse = ", "), ")", sep = ""), no = ""), - sep = "" - ) - - msg <- paste( - "The data import was successful.", "
", - "
", - spectraImport, "
", - ifelse(test = resultObj$numberOfParsedMs1Features!=-1, yes = paste(spectraMapping, "
", sep = ""), no = ""), - fragmentImport, "
", - ifelse(test = resultObj$numberOfParsedMs1Features!=-1, yes = featureImport, no = ""), - sep = "" - ) - showInfoDialog(msg) - - ## MS2 - # + returnObj$numberOfSpectraOriginal - # + returnObj$numberOfMS2PeaksOriginal - # - returnObj$numberOfMS2PeaksWithNeutralLosses - # + returnObj$numberOfMS2PeaksAboveThreshold - # + returnObj$numberOfMS2PeaksBelowThreshold - # + returnObj$numberOfTooHeavyFragments - # + returnObj$numberOfSpectraDiscardedDueToNoPeaks - # + returnObj$numberOfSpectraDiscardedDueToMaxIntensity - # + returnObj$numberOfSpectraDiscardedDueToTooHeavy - # - ## MS1 - # + returnObj$numberOfPrecursors - # - # + returnObj$numberOfDuplicatedPrecursors - # + returnObj$numberOfUnmappedPrecursors - # + returnObj$numberOfUnmappedPrecursorsMz - # + returnObj$numberOfUnmappedPrecursorsRt - # + returnObj$numberOfParsedSpectra - # + returnObj$numberOfParsedMs1Features - # + returnObj$numberOfRemovedPrecursorIsotopePeaks - - - resetWorkspace() - - if(importMS1andMS2data) - state_tabInput$importedOrLoadedFile_s_ <<- c(fileMs1Name, fileMs2Name) - else - state_tabInput$importedOrLoadedFile_s_ <<- c(fileMs2Name) - updateFileInputInfo() - - setImportState() -} -obsFileInputSelection <- observeEvent(input$fileInputSelection, { - updateFileInputInfo() -}) -obsApplyImportParameterFile <- observeEvent(input$importParameterFileInput$datapath, { - filePath <- input$importParameterFileInput$datapath - fileName <- input$importParameterFileInput$name - print(paste("Observe importParameterFile", fileName)) - if(is.null(filePath)) - return() - - ## read and parse - fileContent <- readLines(con = filePath) - parameterSet <- deserializeParameterSetFile(fileContent) - - ## apply - #projectName2 <- parameterSet$projectName - #projectName2 <- paste(projectName2, " adopted", sep = "") - #updateTextInput (session = session, inputId = "projectName", value = projectName2) - #parameterSet$toolVersion - updateTextInput (session = session, inputId = "minimumIntensityOfMaximalMS2peak", value = parameterSet$minimumIntensityOfMaximalMS2peak) - updateTextInput (session = session, inputId = "minimumProportionOfMS2peaks", value = parameterSet$minimumProportionOfMS2peaks) - updateTextInput (session = session, inputId = "mzDeviationAbsolute_grouping", value = parameterSet$mzDeviationAbsolute_grouping) - updateTextInput (session = session, inputId = "mzDeviationInPPM_grouping", value = parameterSet$mzDeviationInPPM_grouping) - updateCheckboxInput(session = session, inputId = "doPrecursorDeisotoping", value = parameterSet$doPrecursorDeisotoping) - updateTextInput (session = session, inputId = "mzDeviationAbsolute_precursorDeisotoping", value = parameterSet$mzDeviationAbsolute_precursorDeisotoping) - updateTextInput (session = session, inputId = "mzDeviationInPPM_precursorDeisotoping", value = parameterSet$mzDeviationInPPM_precursorDeisotoping) - updateTextInput (session = session, inputId = "maximumRtDifference", value = parameterSet$maximumRtDifference) - updateCheckboxInput(session = session, inputId = "doMs2PeakGroupDeisotoping", value = parameterSet$doMs2PeakGroupDeisotoping) - updateTextInput (session = session, inputId = "mzDeviationAbsolute_ms2PeakGroupDeisotoping", value = parameterSet$mzDeviationAbsolute_ms2PeakGroupDeisotoping) - updateTextInput (session = session, inputId = "mzDeviationInPPM_ms2PeakGroupDeisotoping", value = parameterSet$mzDeviationInPPM_ms2PeakGroupDeisotoping) - #parameterSet$proportionOfMatchingPeaks_ms2PeakGroupDeisotoping - #parameterSet$mzDeviationAbsolute_mapping - #parameterSet$minimumNumberOfMS2PeaksPerGroup - updateCheckboxInput(session = session, inputId = "neutralLossesPrecursorToFragments", value = parameterSet$neutralLossesPrecursorToFragments) - updateCheckboxInput(session = session, inputId = "neutralLossesFragmentsToFragments", value = parameterSet$neutralLossesFragmentsToFragments) -}) -updateFileInputInfo <- function(){ - fileInputSelection <- input$fileInputSelection - filePath <- input$matrixFile$datapath - fileName <- input$matrixFile$name - fileMs1Path <- input$ms1DataFile$datapath - fileMs1Name <- input$ms1DataFile$name - fileMs2Path <- input$ms2DataFile$datapath - fileMs2Name <- input$ms2DataFile$name - #exampleDataSelection <- input$exampleDataSelection - - if(all(fileInputSelection == "Example data")) - output$fileInfo <- renderText({paste("Please press 'Load example data' to load the full example data set")}) - if(all(fileInputSelection == "Load project", is.null(filePath))) - output$fileInfo <- renderText({paste("Please select a project file and press 'Load project data'")}) - if(all(fileInputSelection == "Load project", !is.null(filePath), any(is.null(state_tabInput$importedOrLoadedFile_s_), fileName != state_tabInput$importedOrLoadedFile_s_))) - output$fileInfo <- renderText({paste("Please press 'Load project data'")}) - if(all(fileInputSelection == "Load project", !is.null(filePath), !is.null(state_tabInput$importedOrLoadedFile_s_), fileName == state_tabInput$importedOrLoadedFile_s_)) - output$fileInfo <- renderText({paste(fileName)}) - if(all(fileInputSelection == "Import data", is.null(fileMs1Path), is.null(fileMs2Path))) - output$fileInfo <- renderText({paste("Please select a metabolite profile and a MS/MS library and press 'Import MS1 and MS/MS data' or select a MS/MS library and press 'Import MS/MS data'")}) - if(all(fileInputSelection == "Import data", is.null(fileMs1Path), !is.null(fileMs2Path))) - output$fileInfo <- renderText({paste("Please press 'Import MS/MS data' or select a metabolite profile and press 'Import MS1 and MS/MS data'")}) - if(all(fileInputSelection == "Import data", !is.null(fileMs1Path), is.null(fileMs2Path))) - output$fileInfo <- renderText({paste("Please select a MS/MS library and press 'Import MS1 and MS/MS data'")}) - if(all(fileInputSelection == "Import data", !is.null(fileMs1Path), !is.null(fileMs2Path), any(is.null(state_tabInput$importedOrLoadedFile_s_), c(fileMs1Name,fileMs2Name) != state_tabInput$importedOrLoadedFile_s_))) - output$fileInfo <- renderText({paste("Please press 'Import MS1 and MS/MS data'")}) - if(all(fileInputSelection == "Import data", !is.null(fileMs1Path), !is.null(fileMs2Path), !is.null(state_tabInput$importedOrLoadedFile_s_), c(fileMs1Name,fileMs2Name) == state_tabInput$importedOrLoadedFile_s_)) - output$fileInfo <- renderText({paste(fileMs1Name, "\n", fileMs2Name, sep = "")}) -} - -output$fileInfo <- renderText({ - print(paste("init output$fileInfo")) - paste("Please select a project file and press 'Load project data'") -}) - -suspendOnExitFunctions <- c(suspendOnExitFunctions, function(){ - print("Suspending tabInput observers") - obsFile$suspend() - obsLoadProjectData$suspend() - obsLoadExampleData$suspend() - obsImportMs1DataFile$suspend() - obsImportMs2DataFile$suspend() - obsImportMs1Ms2Data$suspend() - obsImportMs2Data$suspend() - obsFileInputSelection$suspend() - obsApplyImportParameterFile$suspend() -}) + +## data import: fixed parameters +proportionOfMatchingPeaks_ms2PeakGroupDeisotoping <- 0.9 +mzDeviationAbsolute_mapping <- 0.01 +#minimumNumberOfMS2PeaksPerGroup <- 1 + +## data +dataList <- NULL +state_tabInput <- reactiveValues( + importedOrLoadedFile_s_ = NULL +) +resetWorkspaceFunctions <- c(resetWorkspaceFunctions, function(){ + print("Reset tabInput state") + state_tabInput$importedOrLoadedFile_s_ <<- NULL +}) + +enableLoadButtons <- function(){ + #session$sendCustomMessage("enableButton", "loadProjectData") + #session$sendCustomMessage("enableButton", "loadExampleData") + #session$sendCustomMessage("enableButton", "importMs1Ms2Data") + #session$sendCustomMessage("enableButton", "importMs2Data") + shinyjs::enable("loadProjectData") + shinyjs::enable("loadExampleData") + shinyjs::enable("importMs1Ms2Data") + shinyjs::enable("importMs2Data") +} +disableLoadButtons <- function(){ + session$sendCustomMessage("disableButton", "loadProjectData") + session$sendCustomMessage("disableButton", "loadExampleData") + session$sendCustomMessage("disableButton", "importMs1Ms2Data") + session$sendCustomMessage("disableButton", "importMs2Data") + shinyjs::disable("loadProjectData") + shinyjs::disable("loadExampleData") + shinyjs::disable("importMs1Ms2Data") + shinyjs::disable("importMs2Data") +} +obsFile <- observeEvent(input$matrixFile$datapath, { + filePath <- input$matrixFile$datapath + fileName <- input$matrixFile$name + print(paste("Observe file for data", fileName)) + if(!is.null(filePath)) + shinyjs::enable("loadProjectData") + + updateFileInputInfo() +}) +obsLoadProjectData <- observeEvent(input$loadProjectData, { + disableLoadButtons() + loadProjectData <- as.numeric(input$loadProjectData) + print(paste("Observe loadProjectData", loadProjectData)) + + ################################################# + ## check if button was hit + #if(loadProjectData == loadProjectDataButtonValue) + # return() + #loadProjectDataButtonValue <<- loadProjectData + + ################################################# + ## files + filePath <- input$matrixFile$datapath + loadProjectFile(filePath = filePath) + enableLoadButtons() +}) +obsLoadExampleData <- observeEvent(input$loadExampleData, { + disableLoadButtons() + loadExampleData <- as.numeric(input$loadExampleData) + print(paste("Observe loadExampleData", loadExampleData)) + + ################################################# + ## check if button was hit + #if(loadExampleData == loadExampleDataButtonValue) + # return() + #loadExampleDataButtonValue <<- loadExampleData + + ################################################# + ## files + filePath <- system.file("extdata/showcase/Project_file_showcase_annotated.csv.gz", package = "MetFamily") + + loadProjectFile(filePath = filePath) + enableLoadButtons() +}) +loadProjectFile <- function(filePath){ + fileName <- basename(filePath) + ######################################################################################### + ## read data + + error <<- NULL + withProgress(message = 'Reading file...', value = 0, { + dataList <<- tryCatch( + { + readClusterDataFromProjectFile(file = filePath, progress = TRUE) + }, + error = function(e) { + print(e) + error <<- e + } + ) + }) + + if(!is.null(error)){ + print(paste("readClusterDataFromProjectFile resulted in error:", error)) + msg <- paste("An error occurred while reading the input files. Please check the file format and content and try again. The error was", error) + output$fileInfo <- renderText({msg}) + #session$sendCustomMessage("enableButton", buttonId) + #shinyBS::addPopover(session = session, id = "fileInputSelection", title = "Error", content = "huhu") + + msg <- paste( + "An error occurred while reading the input files.", + "Please check the file format and content and try again.", + "The error was:", + "
", + error + ) + showErrorDialog(msg) + + return() + } + print(paste("readClusterDataFromProjectFile finished", dataList$minimumMass)) + + resetWorkspace() + + state_tabInput$importedOrLoadedFile_s_ <<- fileName + updateFileInputInfo() +} +obsImportMs1DataFile <- observeEvent(input$ms1DataFile$datapath, { + fileMs1Path <- input$ms1DataFile$datapath + fileMs1Name <- input$ms1DataFile$name + fileMs2Path <- input$ms2DataFile$datapath + fileMs2Name <- input$ms2DataFile$name + print(paste("Observe import MS1 file", fileMs1Name)) + + if(all(!is.null(fileMs1Path), !is.null(fileMs2Path))) + shinyjs::enable("importMs1Ms2Data") + else + shinyjs::disable("importMs1Ms2Data") + + updateFileInputInfo() +}) +obsImportMs2DataFile <- observeEvent(input$ms2DataFile$datapath, { + setImportState() +}) +setImportState <- function(){ + fileMs1Path <- input$ms1DataFile$datapath + fileMs1Name <- input$ms1DataFile$name + fileMs2Path <- input$ms2DataFile$datapath + fileMs2Name <- input$ms2DataFile$name + print(paste("Observe import MS2 file", fileMs2Name)) + + if(all(!is.null(fileMs1Path), !is.null(fileMs2Path))) + shinyjs::enable("importMs1Ms2Data") + else + shinyjs::disable("importMs1Ms2Data") + + if(!is.null(fileMs2Path)) + shinyjs::enable("importMs2Data") + else + shinyjs::disable("importMs2Data") + + updateFileInputInfo() +} +obsImportMs1Ms2Data <- observeEvent(input$importMs1Ms2Data, { + disableLoadButtons() + importMs1Ms2Data <- as.numeric(input$importMs1Ms2Data) + + print(paste("Observe importMs1Ms2Data", importMs1Ms2Data)) + + ################################################# + ## check if button was hit + #if(importMs1Ms2Data == importMs1Ms2DataButtonValue) + # return() + #importMs1Ms2DataButtonValue <<- importMs1Ms2Data + + importData(TRUE) + enableLoadButtons() +}) +obsImportMs2Data <- observeEvent(input$importMs2Data, { + enableLoadButtons() + importMs2Data <- as.numeric(input$importMs2Data) + + print(paste("Observe importMs2Data", importMs2Data)) + + ################################################# + ## check if button was hit + #if(importMs2Data == importMs2DataButtonValue) + # return() + #importMs2DataButtonValue <<- importMs2Data + + importData(FALSE) + disableLoadButtons() +}) +importData <- function(importMS1andMS2data){ + ################################################# + ## files + if(importMS1andMS2data){ + fileMs1Path <- input$ms1DataFile$datapath + fileMs1Name <- input$ms1DataFile$name + } else { + fileMs1Path <- NULL + fileMs1Name <- NULL + } + fileMs2Path <- input$ms2DataFile$datapath + fileMs2Name <- input$ms2DataFile$name + + ################################################# + ## params + + ## project name + projectName <- input$projectName + projectName <- gsub(";", "_", gsub(",", "_", gsub("\t", "_", projectName))) + projectDescription <- input$projectDescription + projectDescription <- gsub(";", "_", gsub(",", "_", gsub("\t", "_", projectDescription))) + + ## minimum MS2 peak intensity + minimumIntensityOfMaximalMS2peak <- input$minimumIntensityOfMaximalMS2peak + minimumProportionOfMS2peaks <- input$minimumProportionOfMS2peaks + ## grouping of MS2 peaks + mzDeviationAbsolute_grouping <- input$mzDeviationAbsolute_grouping + mzDeviationInPPM_grouping <- input$mzDeviationInPPM_grouping + ## precursor deisotoping + doPrecursorDeisotoping <- input$doPrecursorDeisotoping + mzDeviationAbsolute_precursorDeisotoping <- input$mzDeviationAbsolute_precursorDeisotoping + mzDeviationInPPM_precursorDeisotoping <- input$mzDeviationInPPM_precursorDeisotoping + maximumRtDifference <- input$maximumRtDifference + ## fragment deisotoping + doMs2PeakGroupDeisotoping <- input$doMs2PeakGroupDeisotoping + mzDeviationAbsolute_ms2PeakGroupDeisotoping <- input$mzDeviationAbsolute_ms2PeakGroupDeisotoping + mzDeviationInPPM_ms2PeakGroupDeisotoping <- input$mzDeviationInPPM_ms2PeakGroupDeisotoping + ## neutral losses + neutralLossesPrecursorToFragments <- input$neutralLossesPrecursorToFragments + neutralLossesFragmentsToFragments <- input$neutralLossesFragmentsToFragments + #neutralLossesPrecursorToFragments <- TRUE + #neutralLossesFragmentsToFragments <- FALSE + + ## fixed + proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere <- proportionOfMatchingPeaks_ms2PeakGroupDeisotoping + mzDeviationAbsolute_mappingHere <- mzDeviationAbsolute_mapping + #minimumNumberOfMS2PeaksPerGroupHere <- minimumNumberOfMS2PeaksPerGroup + + ################################################# + ## check params + error <- FALSE + if(any(is.null(minimumIntensityOfMaximalMS2peak), length(minimumIntensityOfMaximalMS2peak) == 0, nchar(minimumIntensityOfMaximalMS2peak) == 0)) + error <- TRUE + else{ + minimumIntensityOfMaximalMS2peak <- as.numeric(minimumIntensityOfMaximalMS2peak) + error <- error | is.na(minimumIntensityOfMaximalMS2peak) + } + if(any(is.null(minimumProportionOfMS2peaks), length(minimumProportionOfMS2peaks) == 0, nchar(minimumProportionOfMS2peaks) == 0)) + error <- TRUE + else{ + minimumProportionOfMS2peaks <- as.numeric(minimumProportionOfMS2peaks) + error <- error | is.na(minimumProportionOfMS2peaks) + } + if(any(is.null(mzDeviationAbsolute_grouping), length(mzDeviationAbsolute_grouping) == 0, nchar(mzDeviationAbsolute_grouping) == 0)) + error <- TRUE + else{ + mzDeviationAbsolute_grouping <- as.numeric(mzDeviationAbsolute_grouping) + error <- error | is.na(mzDeviationAbsolute_grouping) + } + if(any(is.null(mzDeviationInPPM_grouping), length(mzDeviationInPPM_grouping) == 0, nchar(mzDeviationInPPM_grouping) == 0)) + error <- TRUE + else{ + mzDeviationInPPM_grouping <- as.numeric(mzDeviationInPPM_grouping) + error <- error | is.na(mzDeviationInPPM_grouping) + } + if(doPrecursorDeisotoping){ + if(any(is.null(mzDeviationAbsolute_precursorDeisotoping), length(mzDeviationAbsolute_precursorDeisotoping) == 0, nchar(mzDeviationAbsolute_precursorDeisotoping) == 0)) + error <- TRUE + else{ + mzDeviationAbsolute_precursorDeisotoping <- as.numeric(mzDeviationAbsolute_precursorDeisotoping) + error <- error | is.na(mzDeviationAbsolute_precursorDeisotoping) + } + if(any(is.null(mzDeviationInPPM_precursorDeisotoping), length(mzDeviationInPPM_precursorDeisotoping) == 0, nchar(mzDeviationInPPM_precursorDeisotoping) == 0)) + error <- TRUE + else{ + mzDeviationInPPM_precursorDeisotoping <- as.numeric(mzDeviationInPPM_precursorDeisotoping) + error <- error | is.na(mzDeviationInPPM_precursorDeisotoping) + } + } + if(any(is.null(maximumRtDifference), length(maximumRtDifference) == 0, nchar(maximumRtDifference) == 0)) + error <- TRUE + else{ + maximumRtDifference <- as.numeric(maximumRtDifference) + error <- error | is.na(maximumRtDifference) + } + if(doMs2PeakGroupDeisotoping){ + if(any(is.null(mzDeviationAbsolute_ms2PeakGroupDeisotoping), length(mzDeviationAbsolute_ms2PeakGroupDeisotoping) == 0, nchar(mzDeviationAbsolute_ms2PeakGroupDeisotoping) == 0)) + error <- TRUE + else{ + mzDeviationAbsolute_ms2PeakGroupDeisotoping <- as.numeric(mzDeviationAbsolute_ms2PeakGroupDeisotoping) + error <- error | is.na(mzDeviationAbsolute_ms2PeakGroupDeisotoping) + } + if(any(is.null(mzDeviationInPPM_ms2PeakGroupDeisotoping), length(mzDeviationInPPM_ms2PeakGroupDeisotoping) == 0, nchar(mzDeviationInPPM_ms2PeakGroupDeisotoping) == 0)) + error <- TRUE + else{ + mzDeviationInPPM_ms2PeakGroupDeisotoping <- as.numeric(mzDeviationInPPM_ms2PeakGroupDeisotoping) + error <- error | is.na(mzDeviationInPPM_ms2PeakGroupDeisotoping) + } + } + + if(any(is.null(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere), length(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) == 0, nchar(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) == 0)) + error <- TRUE + else{ + proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere <- as.numeric(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) + error <- error | is.na(proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere) + } + if(any(is.null(mzDeviationAbsolute_mappingHere), length(mzDeviationAbsolute_mappingHere) == 0, nchar(mzDeviationAbsolute_mappingHere) == 0)) + error <- TRUE + else{ + mzDeviationAbsolute_mappingHere <- as.numeric(mzDeviationAbsolute_mappingHere) + error <- error | is.na(mzDeviationAbsolute_mappingHere) + } + # if(any(is.null(minimumNumberOfMS2PeaksPerGroupHere), length(minimumNumberOfMS2PeaksPerGroupHere) == 0, nchar(minimumNumberOfMS2PeaksPerGroupHere) == 0)) + # error <- TRUE + # else{ + # minimumNumberOfMS2PeaksPerGroupHere <- as.numeric(minimumNumberOfMS2PeaksPerGroupHere) + # error <- error | is.na(minimumNumberOfMS2PeaksPerGroupHere) + # } + + if(error){ + setImportState() + output$fileInfo <- renderText({paste("There are invalid parameter values. Please check the parameters and press 'Import MS\u00B9 and MS/MS data' again.")}) + return() + } + + ## box parameters + print(paste("Observe importMs1Ms2Data", "e", error, "mi", minimumIntensityOfMaximalMS2peak, "mp", minimumProportionOfMS2peaks, "ga", mzDeviationAbsolute_grouping, "gr", mzDeviationInPPM_grouping, "pd", doPrecursorDeisotoping, "pa", mzDeviationAbsolute_precursorDeisotoping, "pr", mzDeviationInPPM_precursorDeisotoping, "mr", maximumRtDifference, "fd", doMs2PeakGroupDeisotoping, "fa", mzDeviationAbsolute_ms2PeakGroupDeisotoping, "fr", mzDeviationInPPM_ms2PeakGroupDeisotoping, "pm", proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere, "ma", mzDeviationAbsolute_mappingHere)) + parameterSet <- list() + parameterSet$projectName <- projectName + parameterSet$projectDescription <- projectDescription + parameterSet$toolVersion <- paste(toolName, toolVersion, sep = " ") + parameterSet$minimumIntensityOfMaximalMS2peak <- minimumIntensityOfMaximalMS2peak + parameterSet$minimumProportionOfMS2peaks <- minimumProportionOfMS2peaks + parameterSet$mzDeviationAbsolute_grouping <- mzDeviationAbsolute_grouping + parameterSet$mzDeviationInPPM_grouping <- mzDeviationInPPM_grouping + parameterSet$doPrecursorDeisotoping <- doPrecursorDeisotoping + parameterSet$mzDeviationAbsolute_precursorDeisotoping <- mzDeviationAbsolute_precursorDeisotoping + parameterSet$mzDeviationInPPM_precursorDeisotoping <- mzDeviationInPPM_precursorDeisotoping + parameterSet$maximumRtDifference <- maximumRtDifference + parameterSet$doMs2PeakGroupDeisotoping <- doMs2PeakGroupDeisotoping + parameterSet$mzDeviationAbsolute_ms2PeakGroupDeisotoping <- mzDeviationAbsolute_ms2PeakGroupDeisotoping + parameterSet$mzDeviationInPPM_ms2PeakGroupDeisotoping <- mzDeviationInPPM_ms2PeakGroupDeisotoping + parameterSet$proportionOfMatchingPeaks_ms2PeakGroupDeisotoping <- proportionOfMatchingPeaks_ms2PeakGroupDeisotopingHere + parameterSet$mzDeviationAbsolute_mapping <- mzDeviationAbsolute_mappingHere + #parameterSet$minimumNumberOfMS2PeaksPerGroup <- minimumNumberOfMS2PeaksPerGroupHere + parameterSet$neutralLossesPrecursorToFragments <- neutralLossesPrecursorToFragments + parameterSet$neutralLossesFragmentsToFragments <- neutralLossesFragmentsToFragments + + ################################################# + ## convert to project file + + ## built matrix + error <- NULL + withProgress(message = 'Generating matrix...', value = 0, { + resultObj <- tryCatch( + { + convertToProjectFile( + filePeakMatrix = fileMs1Path, + fileSpectra = fileMs2Path, + parameterSet = parameterSet, + progress = TRUE + ) + }, error = function(e) { + error <<- e + } + ) + }) + + if(!is.null(error)){ + msg <- paste( + "There occurred an error while processing the input file. Please check the file format and content and try again.", "\n", + "Occurred error: ", error, sep = "" + ) + output$fileInfo <- renderText({msg}) + showErrorDialog(msg) + setImportState() + return() + } + if(length(resultObj) == 1){ + if(resultObj == "Number of spectra is zero"){ + msg <- paste("There are no MS/MS spectra which fulfill the given criteria. Please refine parameter 'Spectrum intensity' and try 'Import MS\u00B9 and MS/MS data' again.") + output$fileInfo <- renderText({msg}) + showErrorDialog(msg) + setImportState() + return() + } + } + + error <- NULL + withProgress(message = 'Processing matrix...', value = 0, { + lines <- sparseMatrixToString(matrixRows = resultObj$matrixRows, matrixCols = resultObj$matrixCols, matrixVals = resultObj$matrixVals, parameterSet = parameterSet) + + ################################################# + ## process project file + + dataList <<- tryCatch({ + readProjectData(fileLines = lines, progress = TRUE) + }, error = function(e) { + error <<- e + } + ) + }) + + if(!is.null(error)){ + msg <- paste( + "There occurred an error while processing the input file. Please check the file format and content and try again.", "\n", + "Occurred error: ", error, sep = "" + ) + output$fileInfo <- renderText({msg}) + showErrorDialog(msg) + setImportState() + return() + } + + print(paste("readProjectData do data finished", dataList$minimumMass)) + + spectraImport <- paste( + ## spectra + resultObj$numberOfParsedSpectra, " / ", resultObj$numberOfSpectraOriginal, " spectra were imported successfully.", + ifelse(test = resultObj$numberOfParsedSpectra < resultObj$numberOfSpectraOriginal, yes = paste(" (",paste( Filter(nchar, c( + ifelse(test = resultObj$numberOfSpectraDiscardedDueToNoPeaks > 0, yes = paste(resultObj$numberOfSpectraDiscardedDueToNoPeaks, " empty", sep = ""), no = ""), + ifelse(test = resultObj$numberOfSpectraDiscardedDueToMaxIntensity > 0, yes = paste(resultObj$numberOfSpectraDiscardedDueToMaxIntensity, " low intensity", sep = ""), no = ""), + ifelse(test = resultObj$numberOfSpectraDiscardedDueToTooHeavy > 0, yes = paste(resultObj$numberOfSpectraDiscardedDueToTooHeavy, " too heavy", sep = ""), no = "") + )), collapse = ", "), ")", sep = ""), no = ""), + sep = "" + ) + spectraMapping <- paste( + ## mapping + resultObj$numberOfPrecursors, " / ", resultObj$numberOfParsedSpectra, " spectra were successfully mapped to MS\u00B9 features.", + ifelse(test = resultObj$numberOfPrecursors < resultObj$numberOfParsedSpectra, yes = paste(" (",paste( Filter(nchar, c( + #ifelse(test = resultObj$numberOfUnmappedPrecursorsMz > 0, yes = paste(resultObj$numberOfUnmappedPrecursorsMz, " with m/z deviation", sep = ""), no = ""), + #ifelse(test = resultObj$numberOfUnmappedPrecursorsRt > 0, yes = paste(resultObj$numberOfUnmappedPrecursorsRt, " with RT deviation", sep = ""), no = "") + ifelse(test = resultObj$numberOfUnmappedSpectra > 0, yes = paste(resultObj$numberOfUnmappedSpectra, " unmapped", sep = ""), no = "") + )), collapse = ", "), ")", sep = ""), no = ""), + sep = "" + ) + fragmentImport <- paste( + ## fragments + resultObj$numberOfMS2PeaksAboveThreshold, " / ", resultObj$numberOfMS2PeaksOriginal, " fragments were successfully imported.", + ifelse(test = resultObj$numberOfMS2PeaksAboveThreshold < resultObj$numberOfMS2PeaksOriginal, yes = paste(" (",paste( Filter(nchar, c( + ifelse(test = resultObj$numberOfTooHeavyFragments > 0, yes = paste(resultObj$numberOfTooHeavyFragments, " too heavy", sep = ""), no = ""), + ifelse(test = resultObj$numberOfMS2PeaksBelowThreshold > 0, yes = paste(resultObj$numberOfMS2PeaksBelowThreshold, " low intensity", sep = ""), no = "") + )), collapse = ", "), ")", sep = ""), no = ""), + sep = "" + ) + featureImport <- paste( + ## MS1 features + resultObj$numberOfPrecursors, " / ", resultObj$numberOfParsedMs1Features, " MS\u00B9 features were successfully imported.", + ifelse(test = resultObj$numberOfPrecursors < resultObj$numberOfParsedMs1Features, yes = paste(" (",paste( Filter(nchar, c( + ifelse(test = resultObj$numberOfRemovedPrecursorIsotopePeaks > 0, yes = paste(resultObj$numberOfRemovedPrecursorIsotopePeaks, " were isotopes", sep = ""), no = ""), + ifelse(test = resultObj$numberOfUnmappedPrecursors > 0, yes = paste(resultObj$numberOfUnmappedPrecursors, " without spectra", sep = ""), no = ""), + ifelse(test = resultObj$numberOfDuplicatedPrecursors > 0, yes = paste(resultObj$numberOfDuplicatedPrecursors, " duplicated", sep = ""), no = "") + )), collapse = ", "), ")", sep = ""), no = ""), + sep = "" + ) + + msg <- paste( + "The data import was successful.", "
", + "
", + spectraImport, "
", + ifelse(test = resultObj$numberOfParsedMs1Features!=-1, yes = paste(spectraMapping, "
", sep = ""), no = ""), + fragmentImport, "
", + ifelse(test = resultObj$numberOfParsedMs1Features!=-1, yes = featureImport, no = ""), + sep = "" + ) + showInfoDialog(msg) + + ## MS2 + # + returnObj$numberOfSpectraOriginal + # + returnObj$numberOfMS2PeaksOriginal + # - returnObj$numberOfMS2PeaksWithNeutralLosses + # + returnObj$numberOfMS2PeaksAboveThreshold + # + returnObj$numberOfMS2PeaksBelowThreshold + # + returnObj$numberOfTooHeavyFragments + # + returnObj$numberOfSpectraDiscardedDueToNoPeaks + # + returnObj$numberOfSpectraDiscardedDueToMaxIntensity + # + returnObj$numberOfSpectraDiscardedDueToTooHeavy + # + ## MS1 + # + returnObj$numberOfPrecursors + # + # + returnObj$numberOfDuplicatedPrecursors + # + returnObj$numberOfUnmappedPrecursors + # + returnObj$numberOfUnmappedPrecursorsMz + # + returnObj$numberOfUnmappedPrecursorsRt + # + returnObj$numberOfParsedSpectra + # + returnObj$numberOfParsedMs1Features + # + returnObj$numberOfRemovedPrecursorIsotopePeaks + + + resetWorkspace() + + if(importMS1andMS2data) + state_tabInput$importedOrLoadedFile_s_ <<- c(fileMs1Name, fileMs2Name) + else + state_tabInput$importedOrLoadedFile_s_ <<- c(fileMs2Name) + updateFileInputInfo() + + setImportState() +} +obsFileInputSelection <- observeEvent(input$fileInputSelection, { + updateFileInputInfo() +}) +obsApplyImportParameterFile <- observeEvent(input$importParameterFileInput$datapath, { + filePath <- input$importParameterFileInput$datapath + fileName <- input$importParameterFileInput$name + print(paste("Observe importParameterFile", fileName)) + if(is.null(filePath)) + return() + + ## read and parse + fileContent <- readLines(con = filePath) + parameterSet <- deserializeParameterSetFile(fileContent) + + ## apply + #projectName2 <- parameterSet$projectName + #projectName2 <- paste(projectName2, " adopted", sep = "") + #updateTextInput (session = session, inputId = "projectName", value = projectName2) + #parameterSet$toolVersion + updateTextInput (session = session, inputId = "minimumIntensityOfMaximalMS2peak", value = parameterSet$minimumIntensityOfMaximalMS2peak) + updateTextInput (session = session, inputId = "minimumProportionOfMS2peaks", value = parameterSet$minimumProportionOfMS2peaks) + updateTextInput (session = session, inputId = "mzDeviationAbsolute_grouping", value = parameterSet$mzDeviationAbsolute_grouping) + updateTextInput (session = session, inputId = "mzDeviationInPPM_grouping", value = parameterSet$mzDeviationInPPM_grouping) + updateCheckboxInput(session = session, inputId = "doPrecursorDeisotoping", value = parameterSet$doPrecursorDeisotoping) + updateTextInput (session = session, inputId = "mzDeviationAbsolute_precursorDeisotoping", value = parameterSet$mzDeviationAbsolute_precursorDeisotoping) + updateTextInput (session = session, inputId = "mzDeviationInPPM_precursorDeisotoping", value = parameterSet$mzDeviationInPPM_precursorDeisotoping) + updateTextInput (session = session, inputId = "maximumRtDifference", value = parameterSet$maximumRtDifference) + updateCheckboxInput(session = session, inputId = "doMs2PeakGroupDeisotoping", value = parameterSet$doMs2PeakGroupDeisotoping) + updateTextInput (session = session, inputId = "mzDeviationAbsolute_ms2PeakGroupDeisotoping", value = parameterSet$mzDeviationAbsolute_ms2PeakGroupDeisotoping) + updateTextInput (session = session, inputId = "mzDeviationInPPM_ms2PeakGroupDeisotoping", value = parameterSet$mzDeviationInPPM_ms2PeakGroupDeisotoping) + #parameterSet$proportionOfMatchingPeaks_ms2PeakGroupDeisotoping + #parameterSet$mzDeviationAbsolute_mapping + #parameterSet$minimumNumberOfMS2PeaksPerGroup + updateCheckboxInput(session = session, inputId = "neutralLossesPrecursorToFragments", value = parameterSet$neutralLossesPrecursorToFragments) + updateCheckboxInput(session = session, inputId = "neutralLossesFragmentsToFragments", value = parameterSet$neutralLossesFragmentsToFragments) +}) +updateFileInputInfo <- function(){ + fileInputSelection <- input$fileInputSelection + filePath <- input$matrixFile$datapath + fileName <- input$matrixFile$name + fileMs1Path <- input$ms1DataFile$datapath + fileMs1Name <- input$ms1DataFile$name + fileMs2Path <- input$ms2DataFile$datapath + fileMs2Name <- input$ms2DataFile$name + #exampleDataSelection <- input$exampleDataSelection + + if(all(fileInputSelection == "Example data")) + output$fileInfo <- renderText({paste("Please press 'Load example data' to load the full example data set")}) + if(all(fileInputSelection == "Load project", is.null(filePath))) + output$fileInfo <- renderText({paste("Please select a project file and press 'Load project data'")}) + if(all(fileInputSelection == "Load project", !is.null(filePath), any(is.null(state_tabInput$importedOrLoadedFile_s_), fileName != state_tabInput$importedOrLoadedFile_s_))) + output$fileInfo <- renderText({paste("Please press 'Load project data'")}) + if(all(fileInputSelection == "Load project", !is.null(filePath), !is.null(state_tabInput$importedOrLoadedFile_s_), fileName == state_tabInput$importedOrLoadedFile_s_)) + output$fileInfo <- renderText({paste(fileName)}) + if(all(fileInputSelection == "Import data", is.null(fileMs1Path), is.null(fileMs2Path))) + output$fileInfo <- renderText({paste("Please select a metabolite profile and a MS/MS library and press 'Import MS1 and MS/MS data' or select a MS/MS library and press 'Import MS/MS data'")}) + if(all(fileInputSelection == "Import data", is.null(fileMs1Path), !is.null(fileMs2Path))) + output$fileInfo <- renderText({paste("Please press 'Import MS/MS data' or select a metabolite profile and press 'Import MS1 and MS/MS data'")}) + if(all(fileInputSelection == "Import data", !is.null(fileMs1Path), is.null(fileMs2Path))) + output$fileInfo <- renderText({paste("Please select a MS/MS library and press 'Import MS1 and MS/MS data'")}) + if(all(fileInputSelection == "Import data", !is.null(fileMs1Path), !is.null(fileMs2Path), any(is.null(state_tabInput$importedOrLoadedFile_s_), c(fileMs1Name,fileMs2Name) != state_tabInput$importedOrLoadedFile_s_))) + output$fileInfo <- renderText({paste("Please press 'Import MS1 and MS/MS data'")}) + if(all(fileInputSelection == "Import data", !is.null(fileMs1Path), !is.null(fileMs2Path), !is.null(state_tabInput$importedOrLoadedFile_s_), c(fileMs1Name,fileMs2Name) == state_tabInput$importedOrLoadedFile_s_)) + output$fileInfo <- renderText({paste(fileMs1Name, "\n", fileMs2Name, sep = "")}) +} + +output$fileInfo <- renderText({ + print(paste("init output$fileInfo")) + paste("Please select a project file and press 'Load project data'") +}) + +suspendOnExitFunctions <- c(suspendOnExitFunctions, function(){ + print("Suspending tabInput observers") + obsFile$suspend() + obsLoadProjectData$suspend() + obsLoadExampleData$suspend() + obsImportMs1DataFile$suspend() + obsImportMs2DataFile$suspend() + obsImportMs1Ms2Data$suspend() + obsImportMs2Data$suspend() + obsFileInputSelection$suspend() + obsApplyImportParameterFile$suspend() +}) diff --git a/inst/MetFamily/app_files/server_guiTabPca.R b/inst/MetFamily/app_files/server_guiTabPca.R index 7e1b1bf..f5e09ab 100644 --- a/inst/MetFamily/app_files/server_guiTabPca.R +++ b/inst/MetFamily/app_files/server_guiTabPca.R @@ -171,10 +171,10 @@ calculatePca <- function(ms1AnalysisMethod, pcaScaling, pcaLogTransform, pcaDime drawAnnotationLegendPCA(consoleInfo = "init output$plotAnnoLegend") scoresGroups <<- list( - groups = filterPca$groups, - colors = colorPaletteScores()[unlist(lapply(X = filterPca$groups, FUN = dataList$groupIdxFromGroupName))] + grouXXXps = filterPca$grouXXXps, + colors = colorPaletteScores()[unlist(lapply(X = filterPca$grouXXXps, FUN = dataList$groupIdxFromGroupName))] ) - state_tabPca$scoresGroupsLegendHeight <<- scoresGroupsLegendEntryHeight * (length(scoresGroups$groups) + 1) + state_tabPca$scoresGroupsLegendHeight <<- scoresGroupsLegendEntryHeight * (length(scoresGroups$grouXXXps) + 1) drawScoresGroupsLegend(consoleInfo = "init output$plotScoresGroupsLegend") if(!state$anyPlotDrawn){ @@ -303,7 +303,7 @@ obsPCAscoresHover <- observeEvent(input$plotPcaScores_hover, { } else{ dataColumnName <- filterPca$sampleSet[[minimumIndex]] - #dataColumnName <- dataList$dataColumnsNameFunctionFromGroupNames(groups = filterPca$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))[[minimumIndex]] + #dataColumnName <- dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = filterPca$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))[[minimumIndex]] group <- dataList$groupNameFunctionFromDataColumnName(dataColumnName = dataColumnName, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) output$information <- renderText({ print(paste("update output$information PCA scores hover", sep = "")) @@ -543,14 +543,14 @@ observeGroupSet <- observeEvent(input$pcaGroups, { if(FALSE){ ## update samples - sampleNames <- dataList$dataColumnsNameFunctionFromGroupNames(groups = input$pcaGroups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) + sampleNames <- dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = input$pcaGroups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) updateCheckboxGroupInput(session = session, inputId = "pcaSamples", selected = sampleNames) } }) observeSampleSet <- observeEvent(input$pcaSamples, { print(paste("observe samples change", paste(input$pcaSamples, collapse = "-"), length(input$pcaSamples))) - groupsFromSamples <- unlist(lapply(X = dataList$groups, FUN = function(x){ + groupsFromSamples <- unlist(lapply(X = dataList$grouXXXps, FUN = function(x){ samplesOfGroups <- dataList$dataColumnsNameFunctionFromGroupName(group = x, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) if(any(samplesOfGroups %in% input$pcaSamples)) return(x) @@ -580,15 +580,15 @@ observeSampleSet <- observeEvent(input$pcaSamples, { observeSelectAllPCAGroups <- observeEvent(input$selectAllPCAGroups, { print(paste("observe selectAllPCAGroups")) - updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$groups, selected = dataList$groups) + updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$grouXXXps, selected = dataList$grouXXXps) }) observeSelectNoPCAGroups <- observeEvent(input$selectNoPCAGroups, { print(paste("observe selectNoPCAGroups")) - updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$groups, selected = NULL) + updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$grouXXXps, selected = NULL) }) observeSelectInvertedPCAGroups <- observeEvent(input$selectInvertedPCAGroups, { print(paste("observe selectInvertedPCAGroups")) - updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$groups, selected = setdiff(dataList$groups, input$pcaGroups)) + updateCheckboxGroupInput(session = session, inputId = "pcaGroups", choices = dataList$grouXXXps, selected = setdiff(dataList$grouXXXps, input$pcaGroups)) }) output$plotPcaScores_hover_info <- renderUI({ @@ -620,7 +620,7 @@ output$plotPcaScores_hover_info <- renderUI({ ## compile information dataColumnName <- filterPca$sampleSet[[minimumIndex]] - #dataColumnName <- dataList$dataColumnsNameFunctionFromGroupNames(groups = filterPca$groups, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))[[minimumIndex]] + #dataColumnName <- dataList$dataColumnsNameFunctionFromGroupNames(grouXXXps = filterPca$grouXXXps, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame))[[minimumIndex]] group <- dataList$groupNameFunctionFromDataColumnName(dataColumnName = dataColumnName, sampleNamesToExclude = dataList$excludedSamples(dataList$groupSampleDataFrame)) info <- paste( "Sample: ", "", dataColumnName, "
", diff --git a/inst/MetFamily/app_files/server_guiTabSampleFilter.R b/inst/MetFamily/app_files/server_guiTabSampleFilter.R index 17be61e..47e84df 100644 --- a/inst/MetFamily/app_files/server_guiTabSampleFilter.R +++ b/inst/MetFamily/app_files/server_guiTabSampleFilter.R @@ -81,7 +81,7 @@ updateSampleOrderAndExclusion <- function(){ sampleNamesToExclude <- dataList$groupSampleDataFrame[sampleExclusion_tmp, "Sample"] returnObj <- processMS1data(sampleNamesToExclude=sampleNamesToExclude, numberOfMS1features=dataList$numberOfPrecursors, precursorLabels=dataList$precursorLabels, - groups=dataList$groups, metaboliteProfileColumnNames=dataList$metaboliteProfileColumnNames, tagsSector = dataList$tagsSector, + grouXXXps=dataList$grouXXXps, metaboliteProfileColumnNames=dataList$metaboliteProfileColumnNames, tagsSector = dataList$tagsSector, dataColumnIndecesFunctionFromGroupIndex=dataList$dataColumnIndecesFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupIndex=dataList$dataColumnsNameFunctionFromGroupIndex, dataColumnsNameFunctionFromGroupName=dataList$dataColumnsNameFunctionFromGroupName, dataColumnsNameFunctionFromGroupNames=dataList$dataColumnsNameFunctionFromGroupNames, groupNameFunctionFromDataColumnName=dataList$groupNameFunctionFromDataColumnName, metaboliteProfile=dataList$dataFrameInfos, progress=FALSE) diff --git a/inst/MetFamily/app_files/server_guiTabSearch.R b/inst/MetFamily/app_files/server_guiTabSearch.R index 7870798..8dbc213 100644 --- a/inst/MetFamily/app_files/server_guiTabSearch.R +++ b/inst/MetFamily/app_files/server_guiTabSearch.R @@ -81,7 +81,7 @@ obsApplySearch <- observeEvent(input$applySearch, { doApplySearch <- function(filter_ms2_masses1 = NULL, filter_ms2_masses2 = NULL, filter_ms2_masses3 = NULL, filter_ms2_ppm = NULL, filter_ms1_masses = NULL, filter_ms1_ppm = NULL, includeIgnoredPrecursors){ filter_lfc <- NULL filter_average <- NULL - groupSet <- dataList$groups + groupSet <- dataList$grouXXXps ################################################# ## do filtering diff --git a/inst/MetFamily/server.R b/inst/MetFamily/server.R index a4fd894..1123452 100755 --- a/inst/MetFamily/server.R +++ b/inst/MetFamily/server.R @@ -6,15 +6,13 @@ options(shiny.sanitize.errors = FALSE) ## libraries and functions sourceFolder <- getwd() -#isDevelopment <- TRUE -isDevelopment <- grepl(pattern = "htreutle", x = sourceFolder) | grepl(pattern = "Treutler", x = sourceFolder) +isDevelopment <- FALSE errorHunting <- FALSE hcaHeatMapNew <- TRUE ##################################################################################################### ## handling of errors and warnings if(errorHunting){ - #options(warn = 2) options(warn = 2, shiny.error = recover) options(shiny.trace=TRUE) options(shiny.fullstacktrace=TRUE) @@ -26,84 +24,10 @@ if(errorHunting){ options(shiny.testmode=FALSE) } -##################################################################################################### -## file paths -getFile <- function(files){ - isPackage <- "MetFamily" %in% rownames(installed.packages()) - #isPackage <- FALSE - - resultFiles <- vector(mode = "character", length = length(files)) - for(idx in seq_along(files)){ - switch(files[[idx]], - "logo_ipb_en.png"={ file <- ifelse(test = isPackage, yes = system.file("www/logo_ipb_en.png", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "www/logo_ipb_en.png", sep = "/"))}, - - "MetFamily_Input_Specification.pdf"={ file <- ifelse(test = isPackage, yes = system.file("doc/MetFamily_Input_Specification.pdf", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/doc/MetFamily_Input_Specification.pdf", sep = "/"))}, - "MetFamily_user_guide.pdf"={ file <- ifelse(test = isPackage, yes = system.file("doc/MetFamily_user_guide.pdf", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/doc/MetFamily_user_guide.pdf", sep = "/"))}, - "MetFamily_Showcase_protocol.pdf"={ file <- ifelse(test = isPackage, yes = system.file("doc/MetFamily_Showcase_protocol.pdf", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/doc/MetFamily_Showcase_protocol.pdf", sep = "/"))}, - "Fragment_matrix_showcase.csv"={ file <- ifelse(test = isPackage, yes = system.file("data/showcase/Fragment_matrix_showcase.csv", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/data/showcase/Fragment_matrix_showcase.csv", sep = "/"))}, - "Metabolite_profile_showcase.txt"={ file <- ifelse(test = isPackage, yes = system.file("data/showcase/Metabolite_profile_showcase.txt", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/data/showcase/Metabolite_profile_showcase.txt", sep = "/"))}, - "MSMS_library_showcase.msp"={ file <- ifelse(test = isPackage, yes = system.file("data/showcase/MSMS_library_showcase.msp", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/data/showcase/MSMS_library_showcase.msp", sep = "/"))}, - "Project_file_showcase_annotated.csv.gz"={ file <- ifelse(test = isPackage, yes = system.file("data/showcase/Project_file_showcase_annotated.csv.gz", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/data/showcase/Project_file_showcase_annotated.csv.gz", sep = "/"))}, - "Project_file_showcase_annotated_reduced.csv.gz"={ file <- ifelse(test = isPackage, yes = system.file("data/showcase/Project_file_showcase_annotated_reduced.csv.gz", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/data/showcase/Project_file_showcase_annotated_reduced.csv.gz", sep = "/"))}, - "Project_file_showcase_reduced.csv.gz"={ file <- ifelse(test = isPackage, yes = system.file("data/showcase/Project_file_showcase_reduced.csv.gz", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/data/showcase/Project_file_showcase_reduced.csv.gz", sep = "/"))}, - - "Classifiers"={ file <- ifelse(test = isPackage, yes = system.file("data/classifier/", package = "MetFamily", lib.loc=.libPaths()), no = paste(sourceFolder, "inst/data/classifier/", sep = "/"))}, - - "Analysis.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/Analysis.R", sep = "/"))}, - "DataProcessing.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/DataProcessing.R", sep = "/"))}, - "FragmentMatrixFunctions.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/FragmentMatrixFunctions.R", sep = "/"))}, - "Plots.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/Plots.R", sep = "/"))}, - "R_packages.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/R_packages.R", sep = "/"))}, - "StartApp.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/StartApp.R", sep = "/"))}, - "Annotation.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/Annotation.R", sep = "/"))}, - "Classifiers.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/Classifiers.R", sep = "/"))}, - "TreeAlgorithms.R"={ file <- ifelse(test = isPackage, yes = "", no = paste("R/TreeAlgorithms.R", sep = "/"))}, - {## unknown state - stop(paste("Unknown file", file)) - } - ) - resultFiles[[idx]] <- file - } - return(resultFiles) -} - -##################################################################################################### -## source code -getSourceFileNames <- function(){ - return(c( - "R_packages.R", - "FragmentMatrixFunctions.R", - "DataProcessing.R", - "TreeAlgorithms.R", - "Analysis.R", - "Annotation.R", - "Classifiers.R", - "Plots.R" - )) -} -getSourceFiles <- function(){ - return(getFile(getSourceFileNames())) -} -sourceTheCode <- function(){ - print("Testing whether files must be sourced") - isPackage <- "MetFamily" %in% rownames(installed.packages()) - - if(!isPackage){ - sourceFiles <- getSourceFiles() - print(paste("Sourcing", length(sourceFiles), "files")) - - for(sourceFile in sourceFiles) - source(sourceFile) - } else { - library("MetFamily") - } -} -sourceTheCode() - -######################################################################################### -######################################################################################### -## global variables -# none +## +## Load dependency libraries. Formerly in sourceTheCode() +## +load_metfamily_dependencies() ######################################################################################### ######################################################################################### @@ -128,14 +52,15 @@ shinyServer( ExportMatrixName <- NULL ## GUI constants - ### I am changing RightColumnWidth 12 to 15 runRightColumnWidthFull <- 11 - ##runRightColumnWidthFull <- 12 + ### changing the legendcolumn width part 2 to 1.8 legendColumnWidthFull <- 1.8 runRightColumnWidthPart <- 8 + ### changing the legendcolumn width part 2 to 1.8 legendColumnWidthPart <- 1.8 + ### change the anno legend height ... 20 to 18 annoLegendEntryHeight <- 18 maximumNumberOfTableEntries <- 50 @@ -193,9 +118,7 @@ shinyServer( source(file = "app_files/server_guiTabExport.R", local = TRUE)$value source(file = "app_files/server_guiPlotControls.R", local = TRUE)$value source(file = "app_files/server_guiMs2plot.R", local = TRUE)$value - ## ui generation - #source(file = "app_files/ui_rightColumn.R", local = TRUE) - + ## Parse the input file resetWorkspace <- function(){ print(paste("resetWorkspace")) @@ -215,6 +138,7 @@ shinyServer( state$analysisType <<- "HCA" state$anyPlotDrawn <<- FALSE + ## plot controls showPlotControls <<- FALSE } @@ -270,14 +194,7 @@ shinyServer( if(tabId == "Input" & !initialGuiUpdatePerformed){ print(paste("update GUI initially", tabId)) - #shinyjs::disable("importMs1Ms2Data") - #shinyjs::disable("importMs2Data") - #shinyjs::disable("loadProjectData") - - ## annotation classifier selection - #session$sendCustomMessage("disableButton", "doAnnotation") - - filePath <- getFile("Classifiers") + filePath <- system.file("extdata/classifier/", package = "MetFamily") resultObj <- getAvailableClassifiers(filePath) availableClassifiersDf <<- resultObj$availableClassifiersDf availableClassifiersDfProperties <<- resultObj$availableClassifiersDfProperties @@ -299,15 +216,8 @@ shinyServer( preDrawCallback = JS('function() { Shiny.unbindAll(this.api().table().node()); }'), drawCallback = JS('function() { Shiny.bindAll( this.api().table().node()); }'), iDisplayLength=nrow(availableClassifiersDf), # initial number of records - #aLengthMenu = c(5,10), # records/page options - #bLengthChange =0, # show/hide records per page dropdown - #bFilter = 0, # global search box on/off - #bInfo = 0, # information on/off (how many records filtered, etc) - #bAutoWidth = 0, # automatic column width calculation, disable if passing column width via aoColumnDefs - #aoColumnDefs = list(list(sWidth="300px", aTargets=c(list(0),list(1)))) # custom column size ordering = F, # row ordering sDom = 't' - #sDom = '<"top">rt<"bottom">ip' ) ) } @@ -398,16 +308,11 @@ shinyServer( ifelse(test = state_tabPca$showLoadingsFeaturesSelected, yes = "Selected", no = NULL), ifelse(test = state_tabPca$showLoadingsFeaturesUnselected, yes = "Not Selected", no = NULL) )) - #updateRadioButtons( session = session, inputId = "showLoadingsFeaturesAnnotated", selected = state_tabPca$showLoadingsFeaturesAnnotated) - #updateRadioButtons( session = session, inputId = "showLoadingsFeaturesUnannotated", selected = state_tabPca$showLoadingsFeaturesUnannotated) - #updateRadioButtons( session = session, inputId = "showLoadingsFeaturesSelected", selected = state_tabPca$showLoadingsFeaturesSelected) - #updateRadioButtons( session = session, inputId = "showLoadingsFeaturesUnselected", selected = state_tabPca$showLoadingsFeaturesUnselected) updateCheckboxInput(session = session, inputId = "showLoadingsAbundance", value = state_tabPca$showLoadingsAbundance) }) ## display of tabs observe({ - #toggle(condition = !is.null(state_tabInput$importedOrLoadedFile_s_), selector = "#runTabs li a[data-value='Filter']") toggle(condition = !is.null(state_tabInput$importedOrLoadedFile_s_), selector = "#runTabs li a[data-value='MS/MS filter']") toggle(condition = !is.null(state_tabInput$importedOrLoadedFile_s_), selector = "#runTabs li a[data-value='Sample filter']") toggle(condition = !is.null(state_tabInput$importedOrLoadedFile_s_), selector = "#runTabs li a[data-value='PCA']") @@ -451,7 +356,7 @@ shinyServer( ) }) output$ipbImage <- renderImage({ - file <- getFile("logo_ipb_en.png") + file <- system.file("MetFamily/www/logo_ipb_en.png", package = "MetFamily") list(src = file, alt = "IPB Halle" @@ -512,11 +417,7 @@ shinyServer( print(paste("reactive update plotAnnotationShown", state$plotAnnotationShown)) return(state$plotAnnotationShown) }) - #output$putativeAnnotationsTableFromAnalysisRowSelected <- reactive({ - # print(paste("reactive update putativeAnnotationsTableFromAnalysisRowSelected", state$putativeAnnotationsTableFromAnalysisRowSelected)) - # return(state$putativeAnnotationsTableFromAnalysisRowSelected) - #}) - + updateChangePlotRadioButton <- function(){ if((sum(c(state$showHCAplotPanel, state$showPCAplotPanel, state$showAnnotationplotPanel)) > 1) & !is.null(state$analysisType)){ if(state$analysisType == "HCA") @@ -552,6 +453,5 @@ shinyServer( outputOptions(output, 'plotHcaShown', suspendWhenHidden=FALSE) outputOptions(output, 'plotPcaShown', suspendWhenHidden=FALSE) outputOptions(output, 'plotAnnotationShown', suspendWhenHidden=FALSE) - #outputOptions(output, 'putativeAnnotationsTableFromAnalysisRowSelected', suspendWhenHidden=FALSE) }## function(input, output, session) )## shinyServer diff --git a/inst/extdata/classifier/README b/inst/extdata/classifier/README new file mode 100644 index 0000000..e69de29 diff --git a/inst/data/showcase/Fragment_matrix_showcase.csv b/inst/extdata/showcase/Fragment_matrix_showcase.csv similarity index 100% rename from inst/data/showcase/Fragment_matrix_showcase.csv rename to inst/extdata/showcase/Fragment_matrix_showcase.csv diff --git a/inst/data/showcase/MSMS_library_showcase.msp b/inst/extdata/showcase/MSMS_library_showcase.msp similarity index 100% rename from inst/data/showcase/MSMS_library_showcase.msp rename to inst/extdata/showcase/MSMS_library_showcase.msp diff --git a/inst/data/showcase/Metabolite_profile_showcase.txt b/inst/extdata/showcase/Metabolite_profile_showcase.txt similarity index 100% rename from inst/data/showcase/Metabolite_profile_showcase.txt rename to inst/extdata/showcase/Metabolite_profile_showcase.txt diff --git a/inst/data/showcase/Project_file_showcase_annotated.csv.gz b/inst/extdata/showcase/Project_file_showcase_annotated.csv.gz similarity index 100% rename from inst/data/showcase/Project_file_showcase_annotated.csv.gz rename to inst/extdata/showcase/Project_file_showcase_annotated.csv.gz diff --git a/inst/data/showcase/Project_file_showcase_annotated_reduced.csv.gz b/inst/extdata/showcase/Project_file_showcase_annotated_reduced.csv.gz similarity index 100% rename from inst/data/showcase/Project_file_showcase_annotated_reduced.csv.gz rename to inst/extdata/showcase/Project_file_showcase_annotated_reduced.csv.gz diff --git a/inst/data/showcase/Project_file_showcase_reduced.csv.gz b/inst/extdata/showcase/Project_file_showcase_reduced.csv.gz similarity index 100% rename from inst/data/showcase/Project_file_showcase_reduced.csv.gz rename to inst/extdata/showcase/Project_file_showcase_reduced.csv.gz diff --git a/inst/extdata/testdata/processMS1data.Rdata b/inst/extdata/testdata/processMS1data.Rdata new file mode 100644 index 0000000..f187f0d Binary files /dev/null and b/inst/extdata/testdata/processMS1data.Rdata differ diff --git a/inst/extdata/testdata/readMSPreturnObj.Rdata b/inst/extdata/testdata/readMSPreturnObj.Rdata new file mode 100644 index 0000000..5d9af35 Binary files /dev/null and b/inst/extdata/testdata/readMSPreturnObj.Rdata differ diff --git a/tests/testthat/test_dataprocessing.R b/tests/testthat/test_dataprocessing.R new file mode 100644 index 0000000..37524b0 --- /dev/null +++ b/tests/testthat/test_dataprocessing.R @@ -0,0 +1,25 @@ +test_that("metaboliteProfileParsing works", { + load(system.file("extdata/testdata/processMS1data.Rdata", package = "MetFamily")) + + d <- dataColumnsNameFunctionFromGroupIndex(groupIdx = 2, sampleNamesToExclude = NA) + + ## Test dimensions + expect_equal(length(d), 3) + + result <- processMS1data(sampleNamesToExclude=sampleNamesToExclude, + numberOfMS1features=numberOfMS1features, + precursorLabels=precursorLabels, + grouXXXps=c("TRI", "LVS"), + metaboliteProfileColumnNames=metaboliteProfileColumnNames, + dataColumnIndecesFunctionFromGroupIndex=dataColumnIndecesFunctionFromGroupIndex, + dataColumnsNameFunctionFromGroupIndex=dataColumnsNameFunctionFromGroupIndex, + dataColumnsNameFunctionFromGroupName=dataColumnsNameFunctionFromGroupName, + dataColumnsNameFunctionFromGroupNames=dataColumnsNameFunctionFromGroupNames, + groupNameFunctionFromDataColumnName=groupNameFunctionFromDataColumnName, + tagsSector=tagsSector, + metaboliteProfile=metaboliteProfile, + progress=FALSE) + +expect_equal(min(result$dataFrameMeasurements[,1]), 0) + +}) diff --git a/tests/testthat/test_fileinput.R b/tests/testthat/test_fileinput.R index d2393f7..45287f4 100644 --- a/tests/testthat/test_fileinput.R +++ b/tests/testthat/test_fileinput.R @@ -3,7 +3,7 @@ test_that("trivial", { }) test_that("exampledata", { - filePeakMatrix <- system.file("data/showcase/Metabolite_profile_showcase.txt", package = "MetFamily") + filePeakMatrix <- system.file("extdata/showcase/Metabolite_profile_showcase.txt", package = "MetFamily") data <- parsePeakAbundanceMatrix(filePeakMatrix, doPrecursorDeisotoping=TRUE, mzDeviationInPPM_precursorDeisotoping=10, mzDeviationAbsolute_precursorDeisotoping=0.01, maximumRtDifference=0.05, @@ -19,7 +19,7 @@ test_that("exampledata", { }) test_that("MS-Dial 4.X", { -# filePeakMatrix <- system.file("data/showcase/Metabolite_profile_showcase.txt", package = "MetFamily") +# filePeakMatrix <- system.file("extdata/showcase/Metabolite_profile_showcase.txt", package = "MetFamily") # data <- parsePeakAbundanceMatrix(filePeakMatrix, doPrecursorDeisotoping, # mzDeviationInPPM_precursorDeisotoping, mzDeviationAbsolute_precursorDeisotoping, # maximumRtDifference, @@ -28,3 +28,70 @@ test_that("MS-Dial 4.X", { # expect_equal(ncol(data$dataFrame), 41) }) +test_that("MSP reading works", { + + if(FALSE) { + fileSpectra <- system.file("extdata/showcase/Metabolite_profile_showcase.msp", package = "MetFamily") + #load(system.file("extdata/testdata/readMSPreturnObj.Rdata", package = "MetFamily")) + + returnObj <- parseMSP(fileSpectra = fileSpectra, + minimumIntensityOfMaximalMS2peak = 2000, + minimumProportionOfMS2peaks = 0.05, + neutralLossesPrecursorToFragments = TRUE, + neutralLossesFragmentsToFragments = FALSE, + progress = FALSE) + + + expect_equal(returnObj$numberOfSpectra, 2640) + expect_equal(returnObj$numberOfSpectraOriginal, 5824) + expect_equal(returnObj$numberOfMS2PeaksOriginal, 145973) + expect_equal(returnObj$numberOfMS2PeaksWithNeutralLosses, 68738) + expect_equal(returnObj$numberOfMS2PeaksAboveThreshold, 34369) + expect_equal(returnObj$numberOfMS2PeaksBelowThreshold, 109797) + expect_equal(returnObj$numberOfTooHeavyFragments, 1807) + expect_equal(returnObj$numberOfSpectraDiscardedDueToNoPeaks, 15) + expect_equal(returnObj$numberOfSpectraDiscardedDueToMaxIntensity, 3163) + expect_equal(returnObj$numberOfSpectraDiscardedDueToTooHeavy, 5) + expect_equal(length(returnObj$precursorMz), 2640) + expect_equal(returnObj$precursorMz[1], 85) + expect_equal(length(returnObj$precursorRt), 2640) + expect_equal(returnObj$precursorRt[1], 10.98) + + } + + # > str(returnObj, max.level = 1) + # List of 14 + # $ fileSpectra : chr "/tmp/RtmpL82I4E/ff16bb92ac5e87cb202eec2e/0.msp" + # $ spectraList :List of 2640 + # $ numberOfSpectra : int 2640 + # $ numberOfSpectraOriginal : int 5824 + # $ numberOfMS2PeaksOriginal : num 145973 + # $ numberOfMS2PeaksWithNeutralLosses : num 68738 + # $ numberOfMS2PeaksAboveThreshold : num 34369 + # $ numberOfMS2PeaksBelowThreshold : num 109797 + # $ numberOfTooHeavyFragments : num 1807 + # $ numberOfSpectraDiscardedDueToNoPeaks : num 15 + # $ numberOfSpectraDiscardedDueToMaxIntensity: num 3163 + # $ numberOfSpectraDiscardedDueToTooHeavy : num 5 + # $ precursorMz : num [1:2640] 85 85 85 85 85 ... + # $ precursorRt : num [1:2640] 10.98 7.42 12.4 16.26 13 ... + # + + + + ## + ## returnObj contains the following elements: + ## + + # returnObj <- list() + # returnObj$fileSpectra <- fileSpectra + # returnObj$spectraList <- list() + # returnObj$numberOfSpectra <- 0 + # returnObj$numberOfMS2PeaksOriginal <- 0 + # returnObj$numberOfMS2PeaksWithNeutralLosses <- 0 + # returnObj$numberOfMS2PeaksAboveThreshold <- 0 + # returnObj$numberOfMS2PeaksBelowThreshold <- 0 + # returnObj$precursorMz <- vector(mode = "numeric") + # returnObj$precursorRt <- vector(mode = "numeric") + +})