Skip to content

Commit

Permalink
Merge pull request #83 from NorStorz/feature/qfeature
Browse files Browse the repository at this point in the history
WIP: support for Sirius annotations
  • Loading branch information
sneumann authored Nov 7, 2024
2 parents eb5311a + 660541a commit 7ae4c32
Show file tree
Hide file tree
Showing 11 changed files with 25,587 additions and 117 deletions.
51 changes: 8 additions & 43 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,43 +106,13 @@ jobs:
echo $sysreqs
sudo -s eval "$sysreqs"
# Step 9: Install macOS system dependencies
- name: Install macOS system dependencies
if: matrix.config.os == 'macOS-latest'
run: |
## Enable installing XML from source if needed
brew install libxml2
echo "XML_CONFIG=/usr/local/opt/libxml2/bin/xml2-config" >> $GITHUB_ENV
## Required to install magick as noted at
## https://github.com/r-lib/usethis/commit/f1f1e0d10c1ebc75fd4c18fa7e2de4551fd9978f#diff-9bfee71065492f63457918efcd912cf2
brew install imagemagick@6
## For textshaping, required by ragg, and required by pkgdown
brew install harfbuzz fribidi
## For installing usethis's dependency gert
brew install libgit2
## required for ncdf4
## brew install netcdf ## Does not work as it is compiled with gcc
## Use pre-compiled libraries from https://mac.r-project.org/libs-4/
curl -O https://mac.r-project.org/libs-4/netcdf-4.7.4-darwin.17-x86_64.tar.gz
tar fvxzm netcdf-4.7.4-darwin.17-x86_64.tar.gz -C /
rm netcdf-4.7.4-darwin.17-x86_64.tar.gz
curl -O https://mac.r-project.org/libs-4/hdf5-1.12.0-darwin.17-x86_64.tar.gz
tar fvxzm hdf5-1.12.0-darwin.17-x86_64.tar.gz -C /
rm hdf5-1.12.0-darwin.17-x86_64.tar.gz
curl -O https://mac.r-project.org/libs-4/szip-2.1.1-darwin.17-x86_64.tar.gz
tar fvxzm szip-2.1.1-darwin.17-x86_64.tar.gz -C /
rm szip-2.1.1-darwin.17-x86_64.tar.gz
# Step 10: Install Windows system dependencies
- name: Install Windows system dependencies
if: runner.os == 'Windows'
run: |
## Edit below if you have any Windows system dependencies
shell: Rscript {0}
# Steps 9 and 10 (macOS and Windows dependencies) removed
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck, testthat, openxlsx2
needs: check
dependencies: '"hard"'


# Step 11: Install BiocManager
- name: Install BiocManager
Expand Down Expand Up @@ -206,12 +176,7 @@ jobs:
BiocManager::install("BiocGenerics")
shell: Rscript {0}

# Step 16: Install covr
- name: Install covr
if: github.ref == 'refs/heads/devel' && env.run_covr == 'true' && runner.os == 'Linux'
run: |
remotes::install_cran("covr")
shell: Rscript {0}
# Skipping Step 16: Install covr

# Step 17: Install pkgdown
- name: Install pkgdown
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
repopack-output.txt
.Rproj.user
.Rhistory
.RData
Expand Down
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ Imports:
methods,
QFeatures,
stats,
utils
utils,
S4Vectors,
SummarizedExperiment,
openxlsx2
Remotes:
decisionpatterns/searchable
Suggests:
Expand Down
91 changes: 35 additions & 56 deletions R/DataProcessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ readClusterDataFromProjectFile <- function(file, progress = FALSE)
fileLines <- readLines(con = file)
)
base::close(con = file)
dataList <- readProjectData(fileLines = fileLines, progress = progress)

dataList <- readProjectData(fileLines = fileLines, progress = progress, qfeatures = qfeatures)
fileLines <- NULL

return(dataList)
Expand All @@ -98,7 +98,7 @@ readClusterDataFromProjectFile <- function(file, progress = FALSE)
#' @export
#'
#' @examples
readProjectData <- function(fileLines, progress = FALSE)
readProjectData <- function(fileLines, progress = FALSE, qfeatures = NULL)
{
allowedTags <- c("ID")
allowedTagPrefixes <- c("AnnotationColors=")
Expand Down Expand Up @@ -196,70 +196,43 @@ readProjectData <- function(fileLines, progress = FALSE)
listMatrixRows <- NULL
listMatrixCols <- NULL

## Disable command line reading of answer
if (FALSE) {

################################################################################
#Start of importing annotation part1 from two
# Display the message and give the user the option to choose whether to upload the annotation file or not.
#If Y shows selection window for annotation file. if N ignores annotation process
#message("Do you want to upload the annotation file? (Y/N)")
#user_choice <- readline()
user_choice <- "N"

if (toupper(user_choice) == "Y") {


# Read the annotation_file file (if needed)
annotation_file <- read.delim(file.choose(), header = TRUE, check.names = FALSE) # select interactively

# Display the available columns in annotation_file
message("Available columns in annotation_file:")
available_columns <- colnames(annotation_file)
for (i in 1:length(available_columns)) {
message(paste(i, "-", available_columns[i]))
######debugging
tryCatch(
{
rowData(qfeatures)
},
error = function(e) {
message("Error: ", e$message)
traceback()
}
)
######debugging

if (!is.null(attr(rowData(qfeatures[[1]]), "annotation column"))) {

# Prompt the user to select the column containing IDs
message("Enter the number corresponding to the column containing IDs:")
selected_column_id <- as.integer(readline())
# Extract the relevant data: Alignment ID and the annotation column from qfeatures
annot_colname <- attr(rowData(qfeatures[[1]]), "annotation column")
annotation_data <- rowData(qfeatures[[1]])[[annot_colname]]
alignment_ids <- rowData(qfeatures[[1]])[["Alignment ID"]]

# Check if the selected column index is valid
if (selected_column_id >= 1 && selected_column_id <= length(available_columns)) {
id_column <- available_columns[selected_column_id]

# Prompt the user to select the Annotation column to use
message("Enter the number corresponding to the annotation column:")
selected_column_annot <- as.integer(readline())

# Check if the selected column index is valid
if (selected_column_annot >= 1 && selected_column_annot <= length(available_columns)) {
selected_column <- available_columns[selected_column_annot]

# Iterate through all values in the "Annotation" column of metaboliteProfile, excluding first row
for (i in 1:nrow(metaboliteProfile)) {
# Perform the lookup based on metaboliteProfile's "Alignment ID" column and annotation_file's selected ID column
matching_indices <- which(annotation_file[[id_column]] == metaboliteProfile$'Alignment ID'[i])

# Check data types and unique values of IDs column in annotation_file

# Check if any matches were found
if (length(matching_indices) > 0) {
# Update the specified column (Annotation) in metaboliteProfile with the corresponding value from annotation_file
metaboliteProfile[i, "Annotation"] <- annotation_file[matching_indices[1], selected_column]
} else {
# Handle the case where no match was found (you can add custom logic here)
warning(paste("No match found for row", i, "in metaboliteProfile"))
}
}
} else {
message("Invalid column selection. Skipping annotation step.")
}
}
# Find the matching indices between metaboliteProfile and annotation_data
matching_indices <- match(metaboliteProfile[["Alignment ID"]], alignment_ids)

metaboliteProfile$Annotation[!is.na(matching_indices)] <- annotation_data[matching_indices[!is.na(matching_indices)]]
#eliminate NAs replace by "" so nchar(annoVals[[i]]) > 0 works in l. 597
metaboliteProfile$Annotation[is.na(metaboliteProfile$Annotation)] <- ""
}

#####################################################################################################################################
#end of importing annotation part1 from two
}

listMatrixVals <- NULL

Expand Down Expand Up @@ -319,13 +292,17 @@ readProjectData <- function(fileLines, progress = FALSE)
}

## STN: Disabled.
if (FALSE) {
if (!is.null(attr(rowData(qfeatures[[1]]), "annotation column"))) {
#Start of importing annotation part2 from two
################################################################################
#adding HEX color codes from external annotations to the annotationColorsMapInitValue of dataFrameHeader
if (toupper(user_choice) == "Y") {

# Copy the selected column by user, Remove duplicates and exclude the first row
uniqueAnnotations <- unique(unlist(strsplit(metaboliteProfile$Annotation, ",")))
###Debug
print("Unique Annotations Before Filtering:")
print(uniqueAnnotations)
###/Debug
uniqueAnnotations <- paste0(uniqueAnnotations, "=")
# Add a random string from the hex color list to each element of uniqueAnnotions
# strings_list <- c("#000000", "#FFFFFF", "#FF0000", "#00FF00", "#0000FF", "#FFFF00", "#FF00FF", "#00FFFF", "#800000", "#008000", "#000080", "#808000", "#800080", "#008080", "#808080", "#C0C0C0", "#FFA500", "#FFC0CB", "#FFD700", "#A52A2A")
Expand All @@ -338,7 +315,7 @@ readProjectData <- function(fileLines, progress = FALSE)
uniqueAnnotationsHexs <- gsub("AnnotationColors=\\{\\s+", "AnnotationColors={", paste("AnnotationColors={", paste(uniqueAnnotations1, collapse = ","), "}"))
# Assuming dataFrameHeader is your data frame
dataFrameHeader$Annotation[2] <- uniqueAnnotationsHexs
}

################################################################################
#End of importing annotation part2 from two
}
Expand Down Expand Up @@ -612,9 +589,11 @@ readProjectData <- function(fileLines, progress = FALSE)
annotationValueIgnore <- "Ignore"
annotationColorIgnore <- "red"


## present annotations
annotations <- vector(mode='list', length=numberOfMS1features)
annoVals <- metaboliteProfile[, annotationColumnName]

for(i in seq_len(numberOfMS1features)){
if(nchar(annoVals[[i]]) > 0){
annotations[[i]] <- as.list(unlist(strsplit(x = annoVals[[i]], split = ", ")))
Expand Down
15 changes: 11 additions & 4 deletions R/FragmentMatrixFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1397,7 +1397,8 @@ mzClustGeneric <- function(p,
}

convertToProjectFile <- function(filePeakMatrixPath,
fileSpectra,
fileSpectra,
fileAnnotation,
parameterSet,
progress = FALSE){
####################################################################################
Expand Down Expand Up @@ -1436,6 +1437,10 @@ convertToProjectFile <- function(filePeakMatrixPath,


filePeakMatrixQF <- readMSDial(filePeakMatrixPath)
if (!is.null(fileAnnotation)){
# TODO: determine colums to merge by
filePeakMatrixQF <- addSiriusAnnotations(filePeakMatrixQF,fileAnnotation)
}

returnObj <- convertToProjectFile2(
filePeakMatrixQF = filePeakMatrixQF,
Expand All @@ -1453,7 +1458,7 @@ convertToProjectFile <- function(filePeakMatrixPath,
returnObj$numberOfSpectraDiscardedDueToNoPeaks <- numberOfSpectraDiscardedDueToNoPeaks
returnObj$numberOfSpectraDiscardedDueToMaxIntensity <- numberOfSpectraDiscardedDueToMaxIntensity
returnObj$numberOfSpectraDiscardedDueToTooHeavy <- numberOfSpectraDiscardedDueToTooHeavy

returnObj$qfeatures <- filePeakMatrixQF
return(returnObj)
}

Expand Down Expand Up @@ -1497,6 +1502,7 @@ convertToProjectFile2 <- function(filePeakMatrixQF,
numberOfParsedMs1Features <- returnObj$numberOfPrecursorsPrior
numberOfRemovedPrecursorIsotopePeaks <- returnObj$numberOfRemovedIsotopePeaks

qfeatures <- returnObj$qfeatures
rm(returnObj)
} else {
propList <- list(
Expand Down Expand Up @@ -1589,7 +1595,7 @@ convertToProjectFile2 <- function(filePeakMatrixQF,
#temporary fix
#filePeakMatrix <- NULL

if(!is.null(filePeakMatrix)){
if(!is.null(filePeakMatrixQF)){
## allHits: dataFrame$"Average Mz" --> precursorMz; allHits indexes the spectraList
diffAll <- abs(outer(X = precursorMz, Y = dataFrame$"Average Mz", FUN = function(x, y){abs(x-y)}))
allHits <- apply(X = diffAll, MARGIN = 2, FUN = function(x){which(x == min(x[x < parameterSet$mzDeviationAbsolute_mapping], Inf))})
Expand Down Expand Up @@ -1850,7 +1856,8 @@ convertToProjectFile2 <- function(filePeakMatrixQF,
numberOfUnmappedSpectra = numberOfUnmappedSpectra,
numberOfUnmappedPrecursors = numberOfUnmappedPrecursors,
numberOfUnmappedPrecursorsMz = numberOfUnmappedPrecursorsMz,
numberOfUnmappedPrecursorsRt = numberOfUnmappedPrecursorsRt
numberOfUnmappedPrecursorsRt = numberOfUnmappedPrecursorsRt,
qfeatures <- qfeatures
)

if(!is.na(progress)) if(progress) setProgress(1) else print("Ready")
Expand Down
51 changes: 50 additions & 1 deletion R/parsePeakAbundanceMatrixQF.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ parsePeakAbundanceMatrixQF <- function(qfeatures,
dataFrame <- cbind(rowData(qfeatures)[[1]][,cols_to_keep] ,assay(qfeatures))
#workaround for avoiding change in colnames during coercion
cnames <- colnames(dataFrame)
dataFrame <- as.data.frame(dataFrame)
dataFrame <- as.data.frame(dataFrame, check.names = FALSE)
colnames(dataFrame) <- cnames
oldFormat <- ncol(colData(qfeatures))==3
numRowDataCols <- ncol(rowData(qfeatures)[[1]])
Expand Down Expand Up @@ -161,6 +161,9 @@ parsePeakAbundanceMatrixQF <- function(qfeatures,
returnObj$dataFrame <- dataFrame
returnObj$vals <- vals

## qfeatures
returnObj$qfeatures <- qfeatures

## meta
returnObj$oldFormat <- oldFormat
returnObj$numberOfPrecursors <- numberOfPrecursors
Expand All @@ -179,3 +182,49 @@ parsePeakAbundanceMatrixQF <- function(qfeatures,

return (returnObj)
}

#' Title
#'
#' @param qfeatures
#' @param siriusFile
#' @param featureID
#' @param siriusID
#'
#' @return
#' @export
#'
#' @examples
addSiriusAnnotations <- function(qfeatures,
siriusFile,
rowData_col = "Alignment ID",
sirius_col = "featureId") {
#TODO: specify more parameters in read delim
annotation <- read.delim(siriusFile)

rowData <- rowData(qfeatures[[1]])

# Print for debugging
print(paste("Merging by:", sirius_col, "and", rowData_col))

# Merge the data frames
annotatedRowData <- S4Vectors::merge( rowData, annotation,
by.x = rowData_col, by.y = sirius_col, all.x = TRUE)

#TODO: ? check for duplicate columns ?
annotation_cols <- colnames(annotation)[colnames(annotation) != rowData_col]
rowData_cols <- colnames(rowData)

for (col in colnames(annotatedRowData)) {
if (col %in% annotation_cols) {
attr(annotatedRowData[[col]], "source") <- "sirius"
} else if (col %in% rowData_cols) {
attr(annotatedRowData[[col]], "source") <- "data"
}
}

# Set the annotation column
attr(annotatedRowData, "annotation column") <- "ClassyFire.subclass"

rowData(qfeatures[[1]]) <- annotatedRowData
return(qfeatures)
}
Loading

0 comments on commit 7ae4c32

Please sign in to comment.