From 792b03c95ee091c406de8308ef23aa13c662189d Mon Sep 17 00:00:00 2001 From: Lucille Delisle Date: Thu, 11 Jul 2019 17:59:09 +0200 Subject: [PATCH] linting and completed imports --- DESCRIPTION | 7 +- NAMESPACE | 13 + R/plottingFunctions.R | 632 ++++++++++++++++++---------------- man/barplotFromNamedVector.Rd | 8 +- man/createMyGRs.Rd | 4 +- 5 files changed, 357 insertions(+), 307 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b4dd700..fb1f050 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: analysePeaks Title: Compare, annotate your peaks -Version: 0.0.1 +Version: 0.0.2 Authors@R: person(given = "Lucille", family = "Lopez-Delisle", @@ -13,6 +13,11 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 6.1.1 Imports: + grDevices, + graphics, + utils, + usefulLDfunctions, + GenomicRanges, stats, pheatmap, reshape diff --git a/NAMESPACE b/NAMESPACE index 73b745e..af44312 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,19 @@ export(plotCateComparisonSetAndRef) export(plotClassicalHistogramForMyGRs) export(plotPairwiseComparison) import(stats) +importFrom(GenomicRanges,distanceToNearest) +importFrom(GenomicRanges,end) +importFrom(GenomicRanges,findOverlaps) +importFrom(GenomicRanges,flank) importFrom(GenomicRanges,mcols) +importFrom(GenomicRanges,start) +importFrom(grDevices,rainbow) +importFrom(grDevices,rgb) +importFrom(graphics,barplot) +importFrom(graphics,hist) +importFrom(graphics,legend) +importFrom(graphics,par) +importFrom(graphics,plot) importFrom(pheatmap,pheatmap) importFrom(reshape,cast) +importFrom(utils,head) diff --git a/R/plottingFunctions.R b/R/plottingFunctions.R index 83bab68..b7a6544 100644 --- a/R/plottingFunctions.R +++ b/R/plottingFunctions.R @@ -3,14 +3,18 @@ #' @param data a named vector where values are number and names are category1(which can have dot).category2(no dot possible here) #' @param colorsForGraphs a named vector with the colors which should be used in the barplot (names should correspond to category2) #' @param orderForCategory1 a vector with the names of the category1 in the order it should be plotted +#' @param legend a logical to precise if the legend should be added (default is TRUE) +#' @param args.legend list of arguments for the leged (default is topright inset=(-0.2, 0), bg= "white") +#' @param las a numeric value between 0 and 3 to specify the style of axis label (default is 2) #' @param ... other arguments for barplot #' @details Will plot a barplot with one bar per category1 #' @return invisible x values for the barplot #' @importFrom reshape cast +#' @importFrom graphics par barplot #' @export #' @examples #'topTs <- c(5, 10, 15, 20) * 1e3 -#'colorsForGraphs<-rainbow(2+length(topTs)) +#'colorsForGraphs<-grDevices::rainbow(2+length(topTs)) #'names(colorsForGraphs)<-c(paste0("overlapTop",sort(topTs/1e3),"k"),"overlap","specific") #'v <- c(15809, rep(5000, 4), c(9316, 4762, 4327, 3831, 5051, 36638)) #'names(v) <- paste(c(rep("ChIP1", 5), rep("ChIP2", 6)), @@ -18,36 +22,44 @@ #' "specific"), #' sep = ".") #'barplotFromNamedVector(v, colorsForGraphs = colorsForGraphs) -barplotFromNamedVector <- function(data, colorsForGraphs = NULL, orderForCategory1 = NULL, legend=T, args.legend=list(x="topright", inset=c(-0.2,0),bg="white"), las=2,...){ +barplotFromNamedVector <- function(data, colorsForGraphs = NULL, orderForCategory1 = NULL, + legend = T, args.legend = list(x = "topright", inset = c(-0.2, 0), + bg = "white"), + las = 2, ...){ # require package reshape # Split the names of data: first row: everything before the last dot (category1) and second row: after the last dot (category2) - namesCate<-sapply(names(data),function(s){v=strsplit(s,"\\.")[[1]];n=length(v);return(c(paste(v[1:(n-1)],collapse = "."),v[n]))}) + namesCate <- sapply(names(data), function(s){ + v = strsplit(s,"\\.")[[1]] + n = length(v) + return(c(paste(v[1:(n - 1)], collapse = "."), v[n])) + }) # Put these names with the values in a dataframe - temp.df<-data.frame(value=data,t(namesCate)) + temp.df <- data.frame(value = data, t(namesCate)) # Reshape the dataframe to have a matrix - mat.tmp<-reshape::cast(temp.df, X1~X2) + mat.tmp <- reshape::cast(temp.df, X1 ~ X2) # Remove the first column which is the category1 and get the transpose - mat<-t(mat.tmp[,2:ncol(mat.tmp)]) + mat <- t(mat.tmp[, 2:ncol(mat.tmp)]) # Replace NA by 0 to be able to do the barplot - mat[is.na(mat)]<-0 + mat[is.na(mat)] <- 0 # Put right names - rownames(mat)<-colnames(mat.tmp)[-1] - colnames(mat)<-mat.tmp$X1 + rownames(mat) <- colnames(mat.tmp)[-1] + colnames(mat) <- mat.tmp$X1 if (!is.null(colorsForGraphs)){ # If the colors are provided, use them to select the rownames and order them. - newOrder<-intersect(names(colorsForGraphs),rownames(mat)) - mat<-mat[newOrder,] + newOrder <- intersect(names(colorsForGraphs), rownames(mat)) + mat <- mat[newOrder, ] myColor <- colorsForGraphs[rownames(mat)] } else { myColor <- NULL } if (! is.null(orderForCategory1)) { # If the order is provided, use it - mat<-mat[,intersect(orderForCategory1,colnames(mat))] + mat <- mat[, intersect(orderForCategory1, colnames(mat))] } maxChar<-max(sapply(colnames(mat), nchar)) - par(mar=c(4+maxChar/3,4,4,6),xpd=TRUE) - b<-barplot(mat, col = myColor, legend = legend, args.legend = args.legend, las = las, ...) + graphics::par(mar = c(4 + maxChar/3, 4, 4, 6), xpd = TRUE) + b <- graphics::barplot(mat, col = myColor, legend = legend, + args.legend = args.legend, las = las, ...) return(invisible(b)) } @@ -63,69 +75,102 @@ barplotFromNamedVector <- function(data, colorsForGraphs = NULL, orderForCategor #' The name of the item in the list should be name1____name2 #' @importFrom pheatmap pheatmap #' @import stats +#' @importFrom grDevices rainbow rgb +#' @importFrom graphics plot #' @export plotPairwiseComparison <- function(name1, name2, ovF, allExpe, step = 5000){ # e is the name of the experiment - e <- paste(sort(c(name1,name2)),collapse ="____") + e <- paste(sort(c(name1, name2)), collapse = "____") # temp.df contains the filtered overlap between name1 and name2 - temp.df<-ovF[[e]] - maxValue<-max(temp.df,na.rm = T) - steps<-seq(step,maxValue+step,step) - cate<-rep(paste0(steps/1e3,"k"),each=step) - cate<-factor(cate,levels = c(unique(cate),"nonOverlap")) + temp.df <- ovF[[e]] + maxValue <- max(temp.df, na.rm = T) + steps <- seq(step, maxValue + step, step) + cate <- rep(paste0(steps/1e3, "k"), each = step) + cate <- factor(cate, levels = c(unique(cate), "nonOverlap")) # To be able to make plots more easily - # The exact item index will be converted to a factor which contains its "binned" category. - temp.dfCate<-temp.df - for(i in 1:2){ - temp.dfCate[,i]<-cate[temp.dfCate[,i]] - temp.dfCate[is.na(temp.dfCate[,i]),i]<-"nonOverlap" + # The exact item index will be converted + # to a factor which contains its "binned" category. + temp.dfCate <- temp.df + for (i in 1:2){ + temp.dfCate[, i] <- cate[temp.dfCate[, i]] + temp.dfCate[is.na(temp.dfCate[, i]), i] <- "nonOverlap" } # We will now summary the number of time each duo appears: - t<-table(temp.dfCate) - t<-t[rowSums(t)>0,colSums(t)>0] - # Another statistics is the overlapping proportion taking only the first category, + t <- table(temp.dfCate) + t <- t[rowSums(t) > 0, colSums(t) > 0] + # Another statistics is the overlapping proportion taking + # only the first category, # The two first etc... - propOverlap<-sapply(1:(min(nrow(t),ncol(t))-2),function(i){sum(t[1:i,1:i]/(step*i))}) + propOverlap <- sapply(1:(min(nrow(t), ncol(t)) - 2), + function(i){ + sum(t[1:i, 1:i] / (step * i)) + }) # We plot it. # First keeping y extremities to 0 and 1 - plot((1:length(propOverlap))*step,propOverlap, type="b", - xlab="top peaks considered",ylab="Overlapping proportion", - ylim = c(0,1), - main="Overlapping proportion using top peaks") + graphics::plot((1:length(propOverlap)) * step, + propOverlap, type = "b", + xlab = "top peaks considered", + ylab = "Overlapping proportion", + ylim = c(0, 1), + main = "Overlapping proportion using top peaks") # Then letting free the y axis. - plot((1:length(propOverlap))*step,propOverlap, type="b", - xlab="top peaks considered",ylab="Overlapping proportion", - main="Overlapping proportion using top peaks") + graphics::plot((1:length(propOverlap)) * step, propOverlap, type="b", + xlab = "top peaks considered", + ylab = "Overlapping proportion", + main = "Overlapping proportion using top peaks") # We now plot the proportion of each category of name1 for each category of name2 - plot(t(t),color=c(rainbow((nrow(t)-1)),"white"),main=paste0("correlation between the ranking of\n",name1," and ",name2)) + graphics::plot(t(t), color = c(grDevices::rainbow((nrow(t) - 1)), "white"), + main = paste0("correlation between the ranking of\n", + name1, " and ", name2)) # We now plot the proportion of each category of name2 for each category of name1 - plot(t,color=c(rainbow((ncol(t)-1)),"white"),main=paste0("correlation between the ranking of\n",name1," and ",name2)) + graphics::plot(t, color = c(grDevices::rainbow((ncol(t) - 1)), "white"), + main = paste0("correlation between the ranking of\n", + name1, " and ", name2)) # We will now plot the same but without proportion just with colors with pheatmap - tWithNames<-t - rownames(tWithNames)<-paste0(name1,"_",rownames(tWithNames)) - colnames(tWithNames)<-paste0(name2,"_",colnames(tWithNames)) - pheatmap::pheatmap(t(tWithNames),cluster_cols = F,cluster_rows = F,display_numbers = T,number_format = "%d", - main=paste0("correlation between the ranking of\n",name1," and ",name2)) - pheatmap::pheatmap(tWithNames,cluster_cols = F,cluster_rows = F,display_numbers = T,number_format = "%d", - main=paste0("correlation between the ranking of\n",name1," and ",name2)) + tWithNames <- t + rownames(tWithNames) <- paste0(name1, "_", rownames(tWithNames)) + colnames(tWithNames) <- paste0(name2, "_", colnames(tWithNames)) + pheatmap::pheatmap(t(tWithNames), cluster_cols = F, cluster_rows = F, + display_numbers = T, number_format = "%d", + main = paste0("correlation between the ranking of\n", + name1, " and ", name2)) + pheatmap::pheatmap(tWithNames, cluster_cols = F, cluster_rows = F, + display_numbers = T, number_format = "%d", + main = paste0("correlation between the ranking of\n", + name1, " and ", name2)) # We will now plot the correlation without binning - temp.dfNoNA<-temp.df - for(c in colnames(temp.dfNoNA)){ - # When there is no overlap we arbitrary put the rank to 1.2 x the maximum rank. - temp.dfNoNA[is.na(temp.dfNoNA[,c]),c]<-max(temp.dfNoNA[,c],na.rm = T)*1.2 - } - corTest<-cor.test(temp.dfNoNA[,1],temp.dfNoNA[,2]) - plot(temp.dfNoNA[,c(name1,name2)],pch=16,col=rgb(0,0,0,0.03),main="rank correlation",sub=paste0("cor=",format(corTest$estimate,digits=2))) - plot(temp.dfNoNA[,c(name2,name1)],pch=16,col=rgb(0,0,0,0.03),main="rank correlation",sub=paste0("cor=",format(corTest$estimate,digits=2))) + temp.dfNoNA <- temp.df + for (c in colnames(temp.dfNoNA)){ + # When there is no overlap we arbitrary put + # the rank to 1.2 x the maximum rank. + temp.dfNoNA[is.na(temp.dfNoNA[, c]), c] <- max(temp.dfNoNA[, c], + na.rm = T) * 1.2 + } + corTest <- cor.test(temp.dfNoNA[, 1], temp.dfNoNA[, 2]) + graphics::plot(temp.dfNoNA[, c(name1, name2)], pch = 16, + col = grDevices::rgb(0, 0, 0, 0.03), + main = "rank correlation", + sub = paste0("cor=", format(corTest$estimate, digits = 2))) + graphics::plot(temp.dfNoNA[, c(name2, name1)], pch = 16, + col = grDevices::rgb(0, 0, 0, 0.03), + main = "rank correlation", + sub = paste0("cor=", format(corTest$estimate, digits = 2))) # We will now correlate the scores - temp.dfScore<-temp.df - temp.dfScore[,]<-0 + temp.dfScore <- temp.df + temp.dfScore[, ] <- 0 for(c in colnames(temp.dfNoNA)){ - temp.dfScore[!is.na(temp.df[,c]),c]<-allExpe[[c]]$score[temp.df[!is.na(temp.df[,c]),c]] - } - corTestS<-cor.test(temp.dfScore[,1],temp.dfScore[,2]) - plot(temp.dfScore[,c(name1,name2)],pch=16,col=rgb(0,0,0,0.03),main="score correlation",sub=paste0("cor=",format(corTestS$estimate,digits=2))) - plot(temp.dfScore[,c(name2,name1)],pch=16,col=rgb(0,0,0,0.03),main="score correlation",sub=paste0("cor=",format(corTestS$estimate,digits=2))) + temp.dfScore[!is.na(temp.df[, c]), c] <- + allExpe[[c]]$score[temp.df[!is.na(temp.df[, c]), c]] + } + corTestS <- cor.test(temp.dfScore[, 1], temp.dfScore[, 2]) + graphics::plot(temp.dfScore[, c(name1, name2)], pch = 16, + col = grDevices::rgb(0,0,0,0.03), + main = "score correlation", + sub = paste0("cor=", format(corTestS$estimate, digits = 2))) + graphics::plot(temp.dfScore[, c(name2, name1)], pch = 16, + col = grDevices::rgb(0,0,0,0.03), + main = "score correlation", + sub = paste0("cor=", format(corTestS$estimate, digits = 2))) } @@ -135,7 +180,7 @@ plotPairwiseComparison <- function(name1, name2, ovF, allExpe, step = 5000){ #'@param ovF a list which contains the filtered overlaps between the samples. Should at least contains a item named e. #'@param allExpe a GRangeList with at least each sample in `e` #'@param grLScores a GRangeList with annotations for which you would like to know the best score overlapping the GRanges of experiments -#'@param grDistance a GRangeList with annotations for which you would like to know the distance to the closest item +#'@param grLDistance a GRangeList with annotations for which you would like to know the distance to the closest item #'@param nameOfRef among the samples which one is the reference, the others will be merged as "replicates" #'@param useSummitPMFlanking logical value to specify if you prefer to use the full region of the peak or only the summit +/- the `flankingSize` #'@param flankingSize integer value used to extend the summit if `useSummitPMFlanking` is TRUE @@ -154,70 +199,96 @@ plotPairwiseComparison <- function(name1, name2, ovF, allExpe, step = 5000){ #'distanceToTheNearestNameOfTheGR for each GR in grLDistance #'The first item of myGRs will also have inUniqueRef which is the index of the merged item of the Reference. #'The second item of myGRs will also have inNoneOfTheSet +#'@importFrom GenomicRanges mcols start end flank findOverlaps distanceToNearest #'@export -createMyGRs<-function(e, ovF, allExpe, grLScores = NULL, grLDistance = NULL, - nameOfRef = "ChIP", useSummitPMFlanking = T, flankingSize = 150){ - if( ! e %in% names(ovF)){ +createMyGRs<-function(e, ovF, allExpe, + grLScores = NULL, grLDistance = NULL, + nameOfRef = "ChIP", + useSummitPMFlanking = T, flankingSize = 150){ + if ( ! e %in% names(ovF)){ stop(e, "is not part of the names of ovF") } # df contains the filtered overlaps - df<-ovF[[e]] - samplesToCheck<-colnames(df) - if(!nameOfRef %in% samplesToCheck){ - stop(nameOfRef," is not part of ",e) + df <- ovF[[e]] + samplesToCheck <- colnames(df) + if (!nameOfRef %in% samplesToCheck){ + stop(nameOfRef, " is not part of ", e) } # stringSet contains the name of other samples which are not the Ref - stringSet<-paste(setdiff(samplesToCheck,nameOfRef),collapse=",") - df$nbNA<-apply(df,1,function(v){sum(is.na(v))}) - df$RefisNA<-as.numeric(is.na(df[,nameOfRef])) + stringSet <- paste(setdiff(samplesToCheck, nameOfRef), collapse = ",") + df$nbNA <- apply(df, 1, function(v){ + sum(is.na(v)) + }) + df$RefisNA <- as.numeric(is.na(df[, nameOfRef])) # name1 is the name of the first non Ref sample - name1<-setdiff(samplesToCheck,nameOfRef)[1] + name1 <- setdiff(samplesToCheck, nameOfRef)[1] # We are now selecting the indices of name1 items # which overlap with all other samples which are not the Ref - i_fullOverlap<-df[,name1][(df$nbNA-df$RefisNA)==0] + i_fullOverlap <- df[, name1][(df$nbNA - df$RefisNA) == 0] # grSetRep contains the subset of the GRange of name1 which # Overlap with all other samples which are not the Ref - grSetRep<-allExpe[[name1]][i_fullOverlap] + grSetRep <- allExpe[[name1]][i_fullOverlap] # We store in this GRanges the index of the overlapped item in Ref # (in the metadata "inUniqueRef") - grSetRep$inUniqueRef<-df[match(i_fullOverlap,df[,name1]),nameOfRef] - # We now change the coordiantes of the GRange and keep the summit +/- the flankingSize + grSetRep$inUniqueRef <- df[match(i_fullOverlap, df[, name1]), nameOfRef] + # We now change the coordiantes of the GRange and keep + # the summit +/- the flankingSize grMySummitsExtended <- grSetRep - if(useSummitPMFlanking){ - grMySummits<-grSetRep - start(grMySummits)<-start(grSetRep)+grSetRep$relativeSummit - end(grMySummits)<-start(grMySummits) - grMySummitsExtended<-flank(grMySummits,width = flankingSize,both = T) - } - grRef<-allExpe[[nameOfRef]] + if (useSummitPMFlanking){ + grMySummits <- grSetRep + GenomicRanges::start(grMySummits) <- GenomicRanges::start(grSetRep) + + grSetRep$relativeSummit + GenomicRanges::end(grMySummits) <- GenomicRanges::start(grMySummits) + grMySummitsExtended <- GenomicRanges::flank(grMySummits, + width = flankingSize, + both = T) + } + grRef <- allExpe[[nameOfRef]] # We annotate the reference GRanges to # Specify when it is a totally specific peak - grRef$inNoneOfTheSet<-TRUE - grRef$inNoneOfTheSet[na.omit(df[df$nbNA!=(length(samplesToCheck)-1),nameOfRef])]<-F + grRef$inNoneOfTheSet <- TRUE + grRef$inNoneOfTheSet[na.omit(df[df$nbNA != (length(samplesToCheck) - 1), + nameOfRef])] <- F grRefSummitsExtended <- grRef - if(useSummitPMFlanking){ - grRefSummits<-grRef - start(grRefSummits)<-start(grRef)+grRef$relativeSummit - end(grRefSummits)<-start(grRefSummits) - grRefSummitsExtended<-flank(grRefSummits,width = flankingSize,both = T) - } - myGRs<-list(grMySummitsExtended,grRefSummitsExtended) + if (useSummitPMFlanking){ + grRefSummits <- grRef + GenomicRanges::start(grRefSummits) <- GenomicRanges::start(grRef) + + grRef$relativeSummit + GenomicRanges::end(grRefSummits) <- GenomicRanges::start(grRefSummits) + grRefSummitsExtended <- GenomicRanges::flank(grRefSummits, + width = flankingSize, + both = T) + } + myGRs <- list(grMySummitsExtended, grRefSummitsExtended) # We will annotate the GRanges with the features which are in parameters - for(iGR in 1:2){ - for(jGR in 1:length(grLScores)){ - annotScoreOverlap <- as.data.frame(findOverlaps(myGRs[[iGR]],grLScores[[jGR]])) - annotScoreOverlapByScore<-aggregate(list(score=grLScores[[jGR]]$score[annotScoreOverlap$subjectHits]),by=list(peak=annotScoreOverlap$queryHits),FUN=max) + for (iGR in 1:2){ + for (jGR in 1:length(grLScores)){ + annotScoreOverlap <- + as.data.frame(GenomicRanges::findOverlaps(myGRs[[iGR]], + grLScores[[jGR]])) + annotScoreOverlapByScore <- + aggregate(list( + score = grLScores[[jGR]]$score[annotScoreOverlap$subjectHits] + ), by = list(peak = annotScoreOverlap$queryHits), FUN = max) nameJ <- names(grLScores)[jGR] - mcols(myGRs[[iGR]])[, paste0("best", nameJ, "Score")] <- 0 - mcols(myGRs[[iGR]])[annotScoreOverlapByScore$peak, paste0("best", nameJ, "Score")] <- annotScoreOverlapByScore$score + GenomicRanges::mcols(myGRs[[iGR]])[, paste0("best", nameJ, "Score")] <- 0 + GenomicRanges::mcols(myGRs[[iGR]])[annotScoreOverlapByScore$peak, + paste0("best", nameJ, "Score")] <- + annotScoreOverlapByScore$score } - for(jGR in 1:length(grLDistance)){ + for (jGR in 1:length(grLDistance)){ nameJ <- names(grLDistance)[jGR] - mcols(myGRs[[iGR]])[, paste0("distanceToNearest", nameJ)] <- as.data.frame(distanceToNearest(myGRs[[iGR]],grLDistance[[jGR]]))$distance + GenomicRanges::mcols(myGRs[[iGR]])[, + paste0("distanceToNearest", + nameJ)] <- + as.data.frame( + GenomicRanges::distanceToNearest(myGRs[[iGR]], + grLDistance[[jGR]]))$distance } } - return(list("stringSet" = stringSet, "myGRs"= myGRs, "nameOfRef" = nameOfRef, - "namesOfGrLScores" = names(grLScores), "namesOfGrLDistance" = names(grLDistance), + return(list("stringSet" = stringSet, "myGRs" = myGRs, + "nameOfRef" = nameOfRef, "namesOfGrLScores" = names(grLScores), + "namesOfGrLDistance" = names(grLDistance), "useSummitPMFlanking" = useSummitPMFlanking, "flankingSize" = flankingSize)) } @@ -226,13 +297,16 @@ createMyGRs<-function(e, ovF, allExpe, grLScores = NULL, grLDistance = NULL, #' #' @param myGRAndAttributes Should be the output of \link[analysePeaks]{createMyGRs} #' @return Plot histograms but do not return anything +#' @importFrom GenomicRanges mcols +#' @importFrom graphics hist legend +#' @importFrom grDevices rgb #' @export -plotClassicalHistogramForMyGRs<-function(myGRAndAttributes){ +plotClassicalHistogramForMyGRs <- function(myGRAndAttributes){ myGRs <- myGRAndAttributes[["myGRs"]] # I deduce from the namesOfGrLScores and namesOfGrLDistance # The interests myInterests <- list() - for(nameJ in myGRAndAttributes[["namesOfGrLScores"]]){ + for (nameJ in myGRAndAttributes[["namesOfGrLScores"]]){ whatString = paste("score of", nameJ) if(myGRAndAttributes[["useSummitPMFlanking"]]){ whatString = paste(whatString, @@ -245,7 +319,7 @@ plotClassicalHistogramForMyGRs<-function(myGRAndAttributes){ columnName = paste0("best", nameJ, "Score") ) } - for(nameJ in myGRAndAttributes[["namesOfGrLDistance"]]){ + for (nameJ in myGRAndAttributes[["namesOfGrLDistance"]]){ whatString = paste("distance to the closest", nameJ) myInterests[[paste0("d_", nameJ)]] <- list( what = whatString, @@ -253,41 +327,49 @@ plotClassicalHistogramForMyGRs<-function(myGRAndAttributes){ ) } # For each interest we do histograms - for(i in 1:length(myInterests)){ + for (i in 1:length(myInterests)){ what <- myInterests[[i]][["what"]] myCol <- myInterests[[i]][["columnName"]] - allData<-unlist(sapply(myGRs,function(gr){mcols(gr)[,myCol]})) + allData <- unlist(sapply(myGRs, function(gr){ + GenomicRanges::mcols(gr)[, myCol] + })) # Usually I do not want to plot all values, only the lower 80% - pc80<-quantile(allData, probs=0.8) + pc80 <- quantile(allData, probs = 0.8) # But sometimes the data is full of 0 - if(median(allData)==0){ - pc80<-quantile(allData[allData>0], probs=0.8) + if (median(allData) == 0){ + pc80<-quantile(allData[allData > 0], probs = 0.8) } # And sometimes it is still very low - if(pc80==min(allData)){ - pc80<-max(allData) + if (pc80 == min(allData)){ + pc80 <- max(allData) } - h<-hist(allData[allData1){ - newAllExpe<-subsetByNamesOrIndices(allExpe,1:(positionOfGrNewName-1)) - newAllExpeToOverlap<-subsetByNamesOrIndices(allExpeToOverlap,names(allExpe)[1:(positionOfGrNewName-1)]) - } else { - newAllExpe<-list() - newAllExpeToOverlap<-list() - } - newAllExpe[[grNewName]]<-grNew - newAllExpeToOverlap[[grNewName]]<-grNew - i<-length(newAllExpe) - if(positionOfGrNewName<=length(allExpe)){ - newAllExpe<-c(newAllExpe,subsetByNamesOrIndices(allExpe,positionOfGrNewName:length(allExpe))) - newAllExpeToOverlap<-c(newAllExpeToOverlap, - subsetByNamesOrIndices(allExpeToOverlap,names(allExpe)[positionOfGrNewName:length(allExpe)])) - } - newAllExpeToOverlap<-c(newAllExpeToOverlap,subsetByNamesOrIndices(allExpeToOverlap,setdiff(names(allExpeToOverlap),names(allExpe)))) - cat(grNewName) - mcols(newAllExpe[[i]])$nearestGene<-tss$gene_name[nearest(newAllExpe[[i]],tss)] - for(j in 1:length(newAllExpeToOverlap)){ - cat(".") - v1<-countOverlaps(newAllExpe[[i]],newAllExpeToOverlap[[j]]) - mcols(newAllExpe[[i]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")]<-"specific" - mcols(newAllExpe[[i]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v1>0]<-"overlap" - if(names(newAllExpeToOverlap)[j]%in%names(newAllExpe)){ - for(topT in sort(topTs,decreasing = T)){ - # cat(topT) - v2<-countOverlaps(newAllExpe[[i]],newAllExpeToOverlap[[j]][1:min(topT,length(newAllExpeToOverlap[[j]]))]) - mcols(newAllExpe[[i]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v2>0]<-paste0("overlapTop",topT/1e3,"k") - } - } - } - cat("\n") - j<-i - for(k in setdiff(1:length(newAllExpe),i)){ - cat(names(newAllExpe)[k],".") - v1<-countOverlaps(newAllExpe[[k]],newAllExpeToOverlap[[j]]) - mcols(newAllExpe[[k]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")]<-"specific" - mcols(newAllExpe[[k]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v1>0]<-"overlap" - if(names(newAllExpeToOverlap)[j]%in%names(newAllExpe)){ - for(topT in sort(topTs,decreasing = T)){ - # cat(topT) - v2<-countOverlaps(newAllExpe[[k]],newAllExpeToOverlap[[j]][1:min(topT,length(newAllExpeToOverlap[[j]]))]) - mcols(newAllExpe[[k]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v2>0]<-paste0("overlapTop",topT/1e3,"k") - } - } - cat("\n") - } - return(list("allExpe" = newAllExpe, "allExpeToOverlap" = newAllExpeToOverlap)) -} - - - -addNewGRinAllExpe<-function(grNew, grNewName, allExpe, allExpeToOverlap, tss, positionOfGrNewName = (length(allExpe) + 1)){ - if(positionOfGrNewName>1){ - newAllExpe<-subsetByNamesOrIndices(allExpe,1:(positionOfGrNewName-1)) - newAllExpeToOverlap<-subsetByNamesOrIndices(allExpeToOverlap,names(allExpe)[1:(positionOfGrNewName-1)]) - } else { - newAllExpe<-list() - newAllExpeToOverlap<-list() - } - newAllExpe[[grNewName]]<-grNew - newAllExpeToOverlap[[grNewName]]<-grNew - i<-length(newAllExpe) - if(positionOfGrNewName<=length(allExpe)){ - newAllExpe<-c(newAllExpe,subsetByNamesOrIndices(allExpe,positionOfGrNewName:length(allExpe))) - newAllExpeToOverlap<-c(newAllExpeToOverlap, - subsetByNamesOrIndices(allExpeToOverlap,names(allExpe)[positionOfGrNewName:length(allExpe)])) - } - newAllExpeToOverlap<-c(newAllExpeToOverlap,subsetByNamesOrIndices(allExpeToOverlap,setdiff(names(allExpeToOverlap),names(allExpe)))) - cat(grNewName) - mcols(newAllExpe[[i]])$nearestGene<-tss$gene_name[nearest(newAllExpe[[i]],tss)] - for(j in 1:length(newAllExpeToOverlap)){ - cat(".") - v1<-countOverlaps(newAllExpe[[i]],newAllExpeToOverlap[[j]]) - mcols(newAllExpe[[i]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")]<-"specific" - mcols(newAllExpe[[i]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v1>0]<-"overlap" - if(names(newAllExpeToOverlap)[j]%in%names(newAllExpe)){ - for(topT in sort(topTs,decreasing = T)){ - # cat(topT) - v2<-countOverlaps(newAllExpe[[i]],newAllExpeToOverlap[[j]][1:min(topT,length(newAllExpeToOverlap[[j]]))]) - mcols(newAllExpe[[i]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v2>0]<-paste0("overlapTop",topT/1e3,"k") - } - } - } - cat("\n") - j<-i - for(k in setdiff(1:length(newAllExpe),i)){ - cat(names(newAllExpe)[k],".") - v1<-countOverlaps(newAllExpe[[k]],newAllExpeToOverlap[[j]]) - mcols(newAllExpe[[k]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")]<-"specific" - mcols(newAllExpe[[k]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v1>0]<-"overlap" - if(names(newAllExpeToOverlap)[j]%in%names(newAllExpe)){ - for(topT in sort(topTs,decreasing = T)){ - # cat(topT) - v2<-countOverlaps(newAllExpe[[k]],newAllExpeToOverlap[[j]][1:min(topT,length(newAllExpeToOverlap[[j]]))]) - mcols(newAllExpe[[k]])[,paste0("in",names(newAllExpeToOverlap)[j],"cate")][v2>0]<-paste0("overlapTop",topT/1e3,"k") - } + plotAllPheatmapsFor2CategoriesFromMyGR(myGRs, + allCates[[i]][["nameOfCol"]], + allCates[[i]][["cateNames"]], + allCates[[i]][["what"]], + allCates[[j]][["nameOfCol"]], + allCates[[j]][["cateNames"]], + allCates[[j]][["what"]], + myGRAndAttributes[["stringSet"]], + myGRAndAttributes[["nameOfRef"]]) } - cat("\n") } - return(list("allExpe" = newAllExpe, "allExpeToOverlap" = newAllExpeToOverlap)) } diff --git a/man/barplotFromNamedVector.Rd b/man/barplotFromNamedVector.Rd index d3a1d88..1660233 100644 --- a/man/barplotFromNamedVector.Rd +++ b/man/barplotFromNamedVector.Rd @@ -15,6 +15,12 @@ barplotFromNamedVector(data, colorsForGraphs = NULL, \item{orderForCategory1}{a vector with the names of the category1 in the order it should be plotted} +\item{legend}{a logical to precise if the legend should be added (default is TRUE)} + +\item{args.legend}{list of arguments for the leged (default is topright inset=(-0.2, 0), bg= "white")} + +\item{las}{a numeric value between 0 and 3 to specify the style of axis label (default is 2)} + \item{...}{other arguments for barplot} } \value{ @@ -28,7 +34,7 @@ Will plot a barplot with one bar per category1 } \examples{ topTs <- c(5, 10, 15, 20) * 1e3 -colorsForGraphs<-rainbow(2+length(topTs)) +colorsForGraphs<-grDevices::rainbow(2+length(topTs)) names(colorsForGraphs)<-c(paste0("overlapTop",sort(topTs/1e3),"k"),"overlap","specific") v <- c(15809, rep(5000, 4), c(9316, 4762, 4327, 3831, 5051, 36638)) names(v) <- paste(c(rep("ChIP1", 5), rep("ChIP2", 6)), diff --git a/man/createMyGRs.Rd b/man/createMyGRs.Rd index 6742358..cb943e2 100644 --- a/man/createMyGRs.Rd +++ b/man/createMyGRs.Rd @@ -16,13 +16,13 @@ createMyGRs(e, ovF, allExpe, grLScores = NULL, grLDistance = NULL, \item{grLScores}{a GRangeList with annotations for which you would like to know the best score overlapping the GRanges of experiments} +\item{grLDistance}{a GRangeList with annotations for which you would like to know the distance to the closest item} + \item{nameOfRef}{among the samples which one is the reference, the others will be merged as "replicates"} \item{useSummitPMFlanking}{logical value to specify if you prefer to use the full region of the peak or only the summit +/- the `flankingSize`} \item{flankingSize}{integer value used to extend the summit if `useSummitPMFlanking` is TRUE} - -\item{grDistance}{a GRangeList with annotations for which you would like to know the distance to the closest item} } \value{ A list with: