diff --git a/.gitignore b/.gitignore index 1128f66..f7db638 100644 --- a/.gitignore +++ b/.gitignore @@ -4,10 +4,12 @@ .Ruserdata .Rapp.history .DS_Store +.vscode *.Rproj *_cache workflow/logs/* .snakemake/* +data/* *bam *bam.bai *bdg diff --git a/config/config.yml b/config/config.yml index 2f98524..639e431 100644 --- a/config/config.yml +++ b/config/config.yml @@ -4,8 +4,6 @@ samples: paths: bam: "data/aligned" - macs2: "data/macs2" - bigwig: "data/bigwig" comparisons: fc: 1.2 diff --git a/config/samples.tsv b/config/samples.tsv index 89985cd..57f35b5 100644 --- a/config/samples.tsv +++ b/config/samples.tsv @@ -10,4 +10,4 @@ AR_Veh_2 AR Veh 2 AR_Input AR_Veh_3 AR Veh 3 AR_Input AR_DHT_1 AR DHT 1 AR_Input AR_DHT_2 AR DHT 2 AR_Input -AR_DHT_3 AR DHT 3 AR_Input \ No newline at end of file +AR_DHT_3 AR DHT 3 AR_Input diff --git a/workflow/Snakefile b/workflow/Snakefile index d496b89..aea48a9 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -12,6 +12,34 @@ configfile: "config/config.yml" ## Figure out how to run check_yaml from here if possible ## python scripts/check_yaml.py +## Set the here file as defined by here::here() for Rmarkdown files +## In a valid Rproj file is found, use that. Otherwise use .here +def get_here_file(): + wd = os.getcwd() + rproj = os.path.basename(wd) + ".Rproj" + rproj_path = os.path.join(wd, rproj) + here_file = os.path.join(wd, ".here") + if os.path.isfile(rproj_path): + ## Check contents of file for Version in line 1 + with open(rproj_path) as f: + ln = f.readline().rstrip() + if 'Version:' in ln: + here_file = rproj_path + return(here_file) + +#################################### +## Check all external files exist ## +#################################### +all_exist=True +# if config['external']['rnaseq'] != '': +# if not os.path.isfile(config['external']['rnaseq']): +# all_exist=False +# print(config['external']['rnaseq'] + " does not exist") + + +if not all_exist: + sys.exit(1) + def get_ucsc_genome(x): map = pd.Series(['hg19', 'hg38'], index = ['GRCh37', 'GRCh38']) if not x in map.keys(): @@ -88,13 +116,13 @@ pairs=make_pairwise(config['comparisons']['contrasts']) ############### ## Key Paths ## ############### +here_file = get_here_file() bam_path = config['paths']['bam'] -bw_path = config['paths']['bigwig'] -macs2_path = config['paths']['macs2'] -# ext_path = os.path.join("data", "external") rmd_path = "analysis" -## This still needs to be changed internally in the RMD files +# macs2_path = config['paths']['macs2'] +macs2_path = os.path.join("output", "macs2") annotation_path = os.path.join("output", "annotations") +log_path = os.path.join("workflow", "logs") ############################### ## External Annotation Files ## @@ -168,7 +196,7 @@ ALL_OUTPUTS.extend([os.path.join("docs", "index.html")]) ## Peaks generated from the Rmd files CONS_PEAKS = expand( - os.path.join("output", "{target}", "{file}"), + os.path.join(macs2_path, "{target}", "{file}"), target = targets, file = ['consensus_peaks.bed', 'oracle_peaks.rds'] ) @@ -202,13 +230,13 @@ ALL_OUTPUTS.extend(MERGED_PEAKS) ## BigWig Files ## ################## INDIV_BW = expand( - os.path.join(bw_path, "{path}_treat_pileup.{suffix}"), + os.path.join(macs2_path, "{path}_treat_pileup.{suffix}"), path = indiv_pre, suffix = ['bw']#, 'summary'] ) ALL_OUTPUTS.extend(INDIV_BW) MERGED_BW = expand( - os.path.join(bw_path, "{path}_merged_treat_pileup.{suffix}"), + os.path.join(macs2_path, "{path}_merged_treat_pileup.{suffix}"), path = merged_pre, suffix = ['bw']#, 'summary'] ) diff --git a/workflow/modules/annotation_description.Rmd b/workflow/modules/annotation_description.Rmd index e24887d..5685e34 100644 --- a/workflow/modules/annotation_description.Rmd +++ b/workflow/modules/annotation_description.Rmd @@ -5,7 +5,7 @@ editor_options: chunk_output_type: console --- -```{r set-knitr-opts, echo=FALSE, child = here::here('workflow/modules/setup_chunk.Rmd')} +```{r set-knitr-opts, echo=FALSE, child = here::here('analysis/setup_chunk.Rmd')} ``` ```{r packages} diff --git a/workflow/modules/differential_binding.Rmd b/workflow/modules/differential_binding.Rmd index 7f94a1f..27dbcd8 100644 --- a/workflow/modules/differential_binding.Rmd +++ b/workflow/modules/differential_binding.Rmd @@ -60,7 +60,8 @@ source(here::here("workflow", "scripts", "custom_functions.R")) ```{r load-config} params <- read_yaml(here::here("config", "params.yml")) config <- read_yaml(here::here("config", "config.yml")) -samples <- here::here("output", target, "qc_samples.tsv") %>% +macs2_path <- here::here("output", "macs2", target) +samples <- file.path(macs2_path, "qc_samples.tsv") %>% read_tsv() %>% dplyr::filter( qc == "pass", @@ -81,8 +82,6 @@ bam_path <- here::here(config$paths$bam) stopifnot(dir.exists(bam_path)) annotation_path <- here::here("output", "annotations") stopifnot(dir.exists(annotation_path)) -bw_path <- here::here(config$paths$bigwig, target) -stopifnot(dir.exists(bw_path)) out_path <- here::here("output", target) if (!dir.exists(out_path)) dir.create(out_path) ``` @@ -286,7 +285,7 @@ all_out <- list( # Data Preparation ```{r import-peaks} -consensus_peaks <- here::here("output", target, "consensus_peaks.bed") %>% +consensus_peaks <- here::here(macs2_path, "consensus_peaks.bed") %>% import.bed(seqinfo = sq) ``` @@ -370,8 +369,8 @@ Taking the first `r comma(ys)` alignments, a brief inspection of the Bam Files r ## Sliding Window Counts ```{r window-counts} -macs2_merged_logs <- here::here( - "data", "macs2", target, glue("{treat_levels}_merged_callpeak.log") +macs2_merged_logs <- file.path( + macs2_path, glue("{treat_levels}_merged_callpeak.log") ) %>% importNgsLogs() fl <- max(macs2_merged_logs$fragment_length) @@ -874,7 +873,7 @@ For visualisation purposes, only genes which were considered as detected in any ```{r make-heatmaps} profile_width <- 5e3 n_bins <- 100 -bwfl <- file.path(bw_path, glue("{treat_levels}_merged_treat_pileup.bw")) %>% +bwfl <- file.path(macs2_path, glue("{treat_levels}_merged_treat_pileup.bw")) %>% BigWigFileList() %>% setNames(treat_levels) sig_ranges <- merged_results %>% @@ -1655,7 +1654,7 @@ grl_to_plot <- GRangesList( ## The coverage bwfl <- list2( "{target}" := file.path( - bw_path, glue("{treat_levels}_merged_treat_pileup.bw") + macs2_path, glue("{treat_levels}_merged_treat_pileup.bw") ) %>% BigWigFileList() %>% setNames(treat_levels) @@ -2677,12 +2676,18 @@ file.create(all_out$de_genes) if (any_detected) write_csv(de_genes_db_regions, all_out$de_genes) file.create(all_out$up_regions) -if (length(sig_ranges$Up) > 0) - export.bed(granges(sig_ranges$Up), all_out$up_regions) +if (length(sig_ranges$Up) > 0) { + sig_ranges$Up %>% + granges() %>% + export.bed(all_out$up_regions) +} file.create(all_out$down_regions) -if (length(sig_ranges$Down) > 0) - export.bed(granges(sig_ranges$Down), all_out$down_regions) +if (length(sig_ranges$Down) > 0) { + sig_ranges$Down %>% + granges() %>% + export.bed(all_out$down_regions) +} save.image(all_out$renv) ``` diff --git a/workflow/modules/ihw_targets.Rmd b/workflow/modules/ihw_targets.Rmd index 10976ec..3f3df2e 100644 --- a/workflow/modules/ihw_targets.Rmd +++ b/workflow/modules/ihw_targets.Rmd @@ -7,7 +7,7 @@ other_targets <- here::here(config$samples$file) %>% ihw_all <- other_targets %>% sapply( function(x) { - here::here("output", x, "consensus_peaks.bed") %>% + here::here(dirname(macs2_path), x, "consensus_peaks.bed") %>% import.bed(seqinfo = sq) }, simplify = FALSE diff --git a/analysis/index.Rmd b/workflow/modules/index.Rmd similarity index 91% rename from analysis/index.Rmd rename to workflow/modules/index.Rmd index 4c45b1d..f7bdfb6 100644 --- a/analysis/index.Rmd +++ b/workflow/modules/index.Rmd @@ -34,14 +34,14 @@ Treatment groups and targets are specified using `config/config.yml`. ## Workflow -```{r plot-workflow, fig.height = 10, fig.width = 10, fig.cap = "*Summary of processing workflow. The primary data pipeline is shown in red, preparatory steps are shown in blue whilst collation of final output is in green.*"} +```{r plot-workflow, fig.height = 10, fig.width = 10, fig.cap = "*Summary of processing workflow. The primary data pipeline producing key outputs is shown in red, preparatory steps are shown in blue whilst steps involved in the collation of final output are in green.*"} here::here("workflow", "rules", "rulegraph.dot") %>% readLines() %>% rm_dot_node(node = "\"all\"") %>% add_input_node(node = "Alignments\n(Bam Files)", col = "red", ignore = "(download|define|macs2|create|install)") %>% - change_node_colour("(setup|index|macs2|bedgraph|identify|binding|install)", "red") %>% + change_node_colour("(index|macs2|annotations|bedgraph|compile)", "red") %>% + change_node_colour("(create|build)", "forestgreen") %>% change_node_colour("(download|write|copy|install)", "blue") %>% - change_node_colour("(site|create|build|differential|pairwise)", "forestgreen") %>% str_replace_all("_", "\n") %>% str_replace_all("snakemake\ndag", "snakemake_dag") %>% str_replace_all("fontsize=[0-9]+", "fontsize=16") %>% diff --git a/workflow/modules/macs2_summary.Rmd b/workflow/modules/macs2_summary.Rmd index 8a8c8e3..78ebba8 100644 --- a/workflow/modules/macs2_summary.Rmd +++ b/workflow/modules/macs2_summary.Rmd @@ -46,6 +46,7 @@ register(MulticoreParam(workers = threads)) source(here::here("workflow/scripts/custom_functions.R")) ## This has been pushed to the main repo of ggside, but is not yet ## incorporated into the packge. The functions are geom_*sidelabel +## Once ggside 0.2.1 is released, this can be deleted source(here::here("workflow/scripts/geom_sidelabel.R")) ``` @@ -55,11 +56,7 @@ config <- read_yaml( here::here("config", "config.yml") ) bam_path <- here::here(config$paths$bam, target) -stopifnot(dir.exists(bam_path)) -macs2_path <- here::here(config$paths$macs2, target) -stopifnot(dir.exists(macs2_path)) -out_path <- here::here("output", target) -if (!dir.exists(out_path)) dir.create(out_path) +macs2_path <- here::here("output", "macs2", target) annotation_path <- here::here("output", "annotations") colours <- read_rds( file.path(annotation_path, "colours.rds") @@ -68,7 +65,7 @@ colours <- read_rds( ``` ```{r read-samples} -samples <-read_tsv(file.path(out_path, "qc_samples.tsv")) +samples <-read_tsv(file.path(macs2_path, "qc_samples.tsv")) treat_levels <- unique(samples$treat) if (!is.null(config$comparisons$contrasts)) { ## Ensure levels respect those provided in contrasts @@ -325,7 +322,7 @@ suppressWarnings( ### Cross Correlations ```{r plot_correlation, fig.height=6, fig.cap = glue("*Cross Correlaton between alignments up to 1kb apart. The dashed, grey, vertical line is the fragment length estimated by `macs2 callpeak` for each sample. For speed, only the first 5 chromosomes were used for sample-specific estimates.*")} -file.path(out_path, "cross_correlations.tsv") %>% +file.path(macs2_path, "cross_correlations.tsv") %>% read_tsv() %>% left_join(samples, by = "sample") %>% ggplot(aes(fl, correlation, colour = treat)) + @@ -680,8 +677,8 @@ consensus_peaks %>% ```{r export} all_out <- list( - consensus_peaks = file.path(out_path, "consensus_peaks.bed"), - oracle_peaks = file.path(out_path, "oracle_peaks.rds"), + consensus_peaks = file.path(macs2_path, "consensus_peaks.bed"), + oracle_peaks = file.path(macs2_path, "oracle_peaks.rds"), renv = file.path( here::here("output/envs"), glue("{target}_macs2_summary.RData") @@ -693,6 +690,7 @@ oracle_peaks %>% lapply(select, -keep) %>% GRangesList() %>% write_rds(all_out$oracle_peaks, compress = "gz") +if (!dir.exists(dirname(all_out$renv))) dir.create(dirname(all_out$renv)) save.image(all_out$renv) ``` diff --git a/workflow/modules/pairwise_comparison.Rmd b/workflow/modules/pairwise_comparison.Rmd index db72a02..76de2be 100644 --- a/workflow/modules/pairwise_comparison.Rmd +++ b/workflow/modules/pairwise_comparison.Rmd @@ -50,7 +50,11 @@ theme_set( ```{r samples} +config <- read_yaml(here::here("config", "config.yml")) +params <- read_yaml(here::here("config", "params.yml")) targets <- names(pairs) +macs2_path <- here::here("output", "macs2", targets) +fdr_alpha <- config$comparisons$fdr comps <- targets if (length(unique(targets)) == 1) { comps <- seq_along(pairs) %>% @@ -63,14 +67,8 @@ if (length(unique(targets)) == 1) { character(1) ) } -samples <- seq_along(pairs) %>% - lapply( - function(x) { - fl <- here::here("output", names(pairs)[[x]], "qc_samples.tsv") - read_tsv(fl) %>% - dplyr::filter(treat %in% pairs[[x]]) - } - ) %>% +samples <- file.path(macs2_path, "qc_samples.tsv") %>% + lapply(read_tsv) %>% bind_rows() %>% dplyr::filter(qc == "pass") %>% unite(label, target, label, remove = FALSE) %>% @@ -87,14 +85,12 @@ rep_col <- setdiff( samples[[rep_col]] <- as.factor(samples[[rep_col]]) ``` -```{r load-config} -params <- read_yaml(here::here("config", "params.yml")) -config <- read_yaml(here::here("config", "config.yml")) -fdr_alpha <- config$comparisons$fdr +```{r load-colours} colours <- read_rds(here::here("output", "annotations", "colours.rds")) treat_colours <- unlist(colours$treat[levels(samples$treat)]) ``` + ```{r load-annotations} cb <- config$genome$build %>% str_to_lower() %>% @@ -103,8 +99,6 @@ data(list = cb) bands_df <- mget(cb)[[cb]] annotation_path <- here::here("output", "annotations") sq <- read_rds(file.path(annotation_path, "seqinfo.rds")) -bw_path <- here::here(config$paths$bigwig, targets) -stopifnot(all(dir.exists(bw_path))) all_gr <- file.path(annotation_path, "all_gr.rds") %>% read_rds() @@ -276,26 +270,12 @@ with_tooltip <- function(value, width = 30) { ```{r load-data} -oracle_peaks <- targets %>% - unique() %>% - lapply( - function(x) { - read_rds( - here::here("output", x, "oracle_peaks.rds") - ) - } - ) %>% - setNames(unique(targets)) -consensus_peaks <- targets %>% - unique() %>% - lapply( - function(x) { - import.bed( - here::here("output", x, "consensus_peaks.bed"), seqinfo = sq - ) - } - ) %>% - setNames(unique(targets)) +oracle_peaks <- file.path(macs2_path, "oracle_peaks.rds") %>% + lapply(read_rds) %>% + setNames(basename(macs2_path)) +consensus_peaks <- file.path(macs2_path, "consensus_peaks.bed") %>% + lapply(import.bed, seqinfo = sq) %>% + setNames(basename(macs2_path)) all_consensus <- consensus_peaks %>% GRangesList() %>% unlist() %>% @@ -310,6 +290,7 @@ db_results <- seq_along(pairs) %>% } ) %>% lapply(read_rds) %>% + lapply(mutate, fdr_mu0 = p.adjust(p_mu0, "fdr")) %>% setNames(comps) fdr_column <- ifelse( "fdr_ihw" %in% colnames(mcols(db_results[[1]])), @@ -410,12 +391,13 @@ samples %>% # Differentially Bound Windows {.tabset} ```{r all-windows} +lambda <- log2(config$comparisons$fc) all_windows <- db_results %>% lapply(granges) %>% GRangesList() %>% - unlist() %>% + unlist()%>% sort() %>% - reduce() %>% + GenomicRanges::reduce() %>% mutate( region = regions[ bestOverlap(., GRangesList(lapply(gene_regions, granges))) @@ -435,6 +417,7 @@ all_windows <- db_results %>% !!sym(glue("{comps[[1]]}_logFC")) := logFC, !!sym(glue("{comps[[1]]}_status")) := status, !!sym(glue("{comps[[1]]}_fdr")) := !!sym(fdr_column), + !!sym(glue("{comps[[1]]}_fdr_mu0")) := fdr_mu0, !!sym(glue("{comps[[1]]}_centre")) := peak_centre ) ) %>% @@ -451,22 +434,25 @@ all_windows <- db_results %>% !!sym(glue("{comps[[2]]}_logFC")) := logFC, !!sym(glue("{comps[[2]]}_status")) := status, !!sym(glue("{comps[[2]]}_fdr")) := !!sym(fdr_column), + !!sym(glue("{comps[[2]]}_fdr_mu0")) := fdr_mu0, !!sym(glue("{comps[[2]]}_centre")) := peak_centre ) ) %>% mutate( - ## Change the classification for unchanged sites to DB using 3 * fdr_alpha - ## if the other target is significantly DB + ## Change the classification for unchanged sites to DB if the other target + ## is DB, and the FDR using mu0 = 0 is < fdr_alpha + |logFC| > log2(lambda) "{comps[[1]]}_status" := case_when( (!!sym(glue("{comps[[2]]}_fdr")) < fdr_alpha) & - (!!sym(glue("{comps[[1]]}_fdr")) < fdr_alpha * 3) & - (!!sym(glue("{comps[[1]]}_logFC")) > 0) ~ "Up", + # (!!sym(glue("{comps[[1]]}_fdr")) < fdr_alpha * 3) & + (!!sym(glue("{comps[[1]]}_fdr_mu0")) < fdr_alpha) & + (!!sym(glue("{comps[[1]]}_logFC")) > lambda) ~ "Up", (!!sym(glue("{comps[[2]]}_fdr")) < fdr_alpha) & - (!!sym(glue("{comps[[1]]}_fdr")) < fdr_alpha * 3) & - (!!sym(glue("{comps[[1]]}_logFC")) < 0) ~ "Down", + # (!!sym(glue("{comps[[1]]}_fdr")) < fdr_alpha * 3) & + (!!sym(glue("{comps[[1]]}_fdr_mu0")) < fdr_alpha) & + (!!sym(glue("{comps[[1]]}_logFC")) < -1 * lambda) ~ "Down", TRUE ~ !!sym(glue("{comps[[1]]}_status")) ) %>% @@ -477,12 +463,14 @@ all_windows <- db_results %>% "{comps[[2]]}_status" := case_when( (!!sym(glue("{comps[[1]]}_fdr")) < fdr_alpha) & - (!!sym(glue("{comps[[2]]}_fdr")) < fdr_alpha * 3) & - (!!sym(glue("{comps[[2]]}_logFC")) > 0) ~ "Up", + # (!!sym(glue("{comps[[2]]}_fdr")) < fdr_alpha * 3) & + (!!sym(glue("{comps[[2]]}_fdr_mu0")) < fdr_alpha) & + (!!sym(glue("{comps[[2]]}_logFC")) > lambda) ~ "Up", (!!sym(glue("{comps[[1]]}_fdr")) < fdr_alpha) & - (!!sym(glue("{comps[[2]]}_fdr")) < fdr_alpha * 3) & - (!!sym(glue("{comps[[2]]}_logFC")) < 0) ~ "Down", + # (!!sym(glue("{comps[[2]]}_fdr")) < fdr_alpha * 3) & + (!!sym(glue("{comps[[2]]}_fdr_mu0")) < fdr_alpha) & + (!!sym(glue("{comps[[2]]}_logFC")) < -1 * lambda) ~ "Down", TRUE ~ !!sym(glue("{comps[[2]]}_status")) ) %>% @@ -601,7 +589,7 @@ The median distance between peaks where found in both was `r median(all_windows `r comma(sum(all_windows$distance == 0, na.rm = TRUE))` peaks (`r percent(mean(all_windows$distance == 0, na.rm = TRUE), 0.1)`) were found to directly overlap. Merged windows were compared across both targets and in order to obtain the best classification for each window, the initial values of an FDR-adjusted p-value < `r fdr_alpha` was used. -A secondary step was then introduced for each window such that if one target was significant and the other target had received an FDR-adjusted p-value < `r 3 * fdr_alpha`, these windows were then considered significant in the second comparison. +A secondary step was then introduced for each window such that if one target was significant and the other target had 1) received an FDR-adjusted p-value < `r fdr_alpha` *using a point-based H~0~*, and 2) had an estimated logFC beyond the range specified for the range-based H~0~ ($0 < |\mu| < \lambda$ for $\lambda = $`r round(lambda, 2)`), these windows were then considered significant in the second comparison. This was to help *reduce the number of windows incorrectly classified as unchanged* whilst the alternative ChIP target was defined as changed. ```{r tbl-summary} @@ -831,6 +819,7 @@ all_windows %>% inherit.aes = FALSE ) + geom_hline(yintercept = 0) + + geom_hline(yintercept = c(1, -1) * lambda, linetype = 2, colour = "grey30") + scale_xsidey_discrete() + scale_ysidex_discrete(guide = guide_axis(angle = 90)) + scale_colour_manual( @@ -907,6 +896,7 @@ all_windows %>% inherit.aes = FALSE ) + geom_hline(yintercept = 0) + + geom_hline(yintercept = c(1, -1) * lambda, linetype = 2, colour = "grey30") + scale_xsidey_discrete() + scale_ysidex_discrete(guide = guide_axis(angle = 90)) + scale_colour_manual( @@ -972,12 +962,12 @@ all_windows %>% geom_point(alpha = 0.6) + geom_hline(yintercept = 0) + geom_hline( - yintercept = c(1, -1)*log2(config$comparisons$fc), + yintercept = c(1, -1) * lambda, linetype = 2, colour = "grey" ) + geom_vline(xintercept = 0) + geom_vline( - xintercept = c(1, -1)*log2(config$comparisons$fc), + xintercept = c(1, -1)* lambda, linetype = 2, colour = "grey" ) + geom_label_repel( @@ -1062,12 +1052,12 @@ all_windows %>% geom_point(alpha = 0.6) + geom_hline(yintercept = 0) + geom_hline( - yintercept = c(1, -1)*log2(config$comparisons$fc), + yintercept = c(1, -1) * lambda, linetype = 2, colour = "grey" ) + geom_vline(xintercept = 0) + geom_vline( - xintercept = c(1, -1)*log2(config$comparisons$fc), + xintercept = c(1, -1) * lambda, linetype = 2, colour = "grey" ) + geom_label_repel( @@ -1144,12 +1134,12 @@ all_windows %>% geom_point(alpha = 0.6) + geom_hline(yintercept = 0) + geom_hline( - yintercept = c(1, -1)*log2(config$comparisons$fc), + yintercept = c(1, -1) * lambda, linetype = 2, colour = "grey" ) + geom_vline(xintercept = 0) + geom_vline( - xintercept = c(1, -1)*log2(config$comparisons$fc), + xintercept = c(1, -1) * lambda, linetype = 2, colour = "grey" ) + geom_label_repel( @@ -1673,7 +1663,7 @@ div(class = "table", ## Comparison of Differentially Bound Windows {.tabset} -```{r goseq-groups} +```{r goseq-groups, fig.show='hide'} goseq_group_res <- group_ids %>% mclapply( function(x) { @@ -1879,7 +1869,7 @@ bwfl <- seq_along(pairs) %>% lapply( function(x) { file.path( - bw_path[[x]], glue("{pairs[[x]]}_merged_treat_pileup.bw") + macs2_path[[x]], glue("{pairs[[x]]}_merged_treat_pileup.bw") ) %>% BigWigFileList() %>% setNames(pairs[[x]]) diff --git a/workflow/rules/annotations.smk b/workflow/rules/annotations.smk index 997ffdc..f95b338 100644 --- a/workflow/rules/annotations.smk +++ b/workflow/rules/annotations.smk @@ -10,7 +10,7 @@ rule download_gtf: '', '', '' ) ) - log: "workflow/logs/downloads/download_gtf.log" + log: log_path + "/downloads/download_gtf.log" shell: """ curl \ @@ -32,7 +32,7 @@ rule download_blacklist: git = git_add, interval = random.uniform(0, 1), tries = 10 - log: "workflow/logs/downloads/download_blacklist.log" + log: log_path + "/downloads/download_blacklist.log" shell: """ curl {params.url} --output {output} 2> {log} @@ -72,7 +72,7 @@ rule create_annotations: tries = 10 conda: "../envs/rmarkdown.yml" threads: 16 - log: "workflow/logs/scripts/create_annotations.log" + log: log_path + "/scripts/create_annotations.log" shell: """ Rscript --vanilla {input.r} {input.gtf} {threads} &>> {log} diff --git a/workflow/rules/differential_binding.smk b/workflow/rules/differential_binding.smk index 0373538..10a1dcd 100644 --- a/workflow/rules/differential_binding.smk +++ b/workflow/rules/differential_binding.smk @@ -1,4 +1,54 @@ -rule differential_binding: +rule create_differential_binding_rmd: + input: + db_mod = os.path.join( + "workflow", "modules", "differential_binding.Rmd" + ), + r = "workflow/scripts/create_differential_binding.R" + output: + rmd = os.path.join( + rmd_path, "{target}_{ref}_{treat}_differential_binding.Rmd" + ) + params: + git = git_add, + interval = random.uniform(0, 1), + threads = lambda wildcards: len(df[ + (df['target'] == wildcards.target) & + ((df['treat'] == wildcards.ref) | (df['treat'] == wildcards.treat)) + ]), + tries = 10 + conda: "../envs/rmarkdown.yml" + log: log_path + "/create_rmd/{target}_{ref}_{treat}_differential_binding.log" + threads: 1 + shell: + """ + ## Create the generic markdown header + Rscript --vanilla \ + {input.r} \ + {wildcards.target} \ + {wildcards.ref} \ + {wildcards.treat} \ + {params.threads} \ + {output.rmd} &>> {log} + + ## Add the remainder of the module as literal text + cat {input.db_mod} >> {output.rmd} + + if [[ {params.git} == "True" ]]; then + TRIES={params.tries} + while [[ -f .git/index.lock ]] + do + if [[ "$TRIES" == 0 ]]; then + echo "ERROR: Timeout while waiting for removal of git index.lock" &>> {log} + exit 1 + fi + sleep {params.interval} + ((TRIES--)) + done + git add {output.rmd} + fi + """ + +rule compile_differential_binding_html: input: annotations = ALL_RDS, aln = lambda wildcards: expand( @@ -20,17 +70,18 @@ rule differential_binding: ), merged_bw = lambda wildcards: expand( os.path.join( - bw_path, "{{target}}", "{pre}_merged_treat_pileup.bw" + macs2_path, "{{target}}", "{pre}_merged_treat_pileup.bw" ), pre = [wildcards.ref, wildcards.treat] ), peaks = expand( - os.path.join("output", "{target}", "consensus_peaks.bed"), + os.path.join(macs2_path, "{target}", "consensus_peaks.bed"), target = targets ), + here = here_file, indiv_bw = lambda wildcards: expand( os.path.join( - bw_path, "{{target}}", "{sample}_treat_pileup.bw" + macs2_path, "{{target}}", "{sample}_treat_pileup.bw" ), sample = df['sample'][ (df['target'] == wildcards.target) & @@ -40,25 +91,21 @@ rule differential_binding: ) ] ), - samples = os.path.join("output", "{target}", "qc_samples.tsv"), pkgs = rules.install_packages.output, - r = "workflow/scripts/create_differential_binding.R", + rmd = os.path.join( + rmd_path, "{target}_{ref}_{treat}_differential_binding.Rmd" + ), + samples = os.path.join(macs2_path, "{target}", "qc_samples.tsv"), setup = rules.create_setup_chunk.output, site_yaml = rules.create_site_yaml.output, yml = expand( os.path.join("config", "{file}.yml"), file = ['config', 'params'] ), - db_mod = os.path.join( - "workflow", "modules", "differential_binding.Rmd" - ), rnaseq_mod = os.path.join( "workflow", "modules", "rnaseq_differential_binding.Rmd" ) output: - rmd = os.path.join( - rmd_path, "{target}_{ref}_{treat}_differential_binding.Rmd" - ), html = "docs/{target}_{ref}_{treat}_differential_binding.html", fig_path = directory( os.path.join( @@ -93,23 +140,10 @@ rule differential_binding: (df['target'] == wildcards.target) & ((df['treat'] == wildcards.ref) | (df['treat'] == wildcards.treat)) ]) - log: - "workflow/logs/differential_binding/{target}_{ref}_{treat}_differential_binding.log" + log: log_path + "/differential_binding/{target}_{ref}_{treat}_differential_binding.log" shell: """ - ## Create the generic markdown header - Rscript --vanilla \ - {input.r} \ - {wildcards.target} \ - {wildcards.ref} \ - {wildcards.treat} \ - {threads} \ - {output.rmd} &>> {log} - - ## Add the remainder of the module as literal text - cat {input.db_mod} >> {output.rmd} - - R -e "rmarkdown::render_site('{output.rmd}')" &>> {log} + R -e "rmarkdown::render_site('{input.rmd}')" &>> {log} if [[ {params.git} == "True" ]]; then TRIES={params.tries} @@ -122,7 +156,6 @@ rule differential_binding: sleep {params.interval} ((TRIES--)) done - git add {output.rmd} git add {output.html} git add {output.fig_path} git add {output.outs} diff --git a/workflow/rules/macs2.smk b/workflow/rules/macs2.smk index 6aee7b6..491da0d 100644 --- a/workflow/rules/macs2.smk +++ b/workflow/rules/macs2.smk @@ -55,21 +55,19 @@ rule macs2_individual: macs2_path, "{target}", "{sample}_treat_pileup.bdg" ) ), - log = os.path.join( - macs2_path, "{target}", "{sample}_callpeak.log" - ), + log = os.path.join(macs2_path, "{target}", "{sample}_callpeak.log"), + summits = os.path.join(macs2_path, "{target}", "{sample}_summits.bed"), other = temp( expand( os.path.join( macs2_path, "{{target}}", "{{sample}}{suffix}" ), suffix = [ - '_model.r', '_peaks.xls', '_summits.bed', - '_control_lambda.bdg' + '_model.r', '_peaks.xls', '_control_lambda.bdg' ] ) ) - log: "workflow/logs/macs2_individual/{target}/{sample}.log" + log: log_path + "/macs2_individual/{target}/{sample}.log" conda: "../envs/macs2.yml" params: outdir = os.path.join(macs2_path, "{target}"), @@ -131,15 +129,15 @@ rule macs2_qc: seqinfo = os.path.join(annotation_path, "seqinfo.rds"), r = "workflow/scripts/macs2_qc.R" output: - cors = os.path.join("output", "{target}", "cross_correlations.tsv"), - qc = os.path.join("output", "{target}", "qc_samples.tsv") + cors = os.path.join(macs2_path, "{target}", "cross_correlations.tsv"), + qc = os.path.join(macs2_path, "{target}", "qc_samples.tsv") params: git = git_add, interval = random.uniform(0, 1), tries = 10 conda: "../envs/rmarkdown.yml" threads: lambda wildcards: len(df[df['target'] == wildcards.target]) - log: "workflow/logs/macs2_individual/{target}/{target}_macs2_qc.log" + log: log_path + "/macs2_individual/{target}/{target}_macs2_qc.log" shell: """ ## Run the QC script @@ -169,20 +167,26 @@ rule macs2_merged: bai = get_merged_bai_from_treat_and_target, control = get_input_bam_from_treat_and_target, control_bai = get_input_bai_from_treat_and_target, - qc = os.path.join("output", "{target}", "qc_samples.tsv") + qc = os.path.join(macs2_path, "{target}", "qc_samples.tsv") output: narrow_peaks = os.path.join( macs2_path, "{target}", "{treat}_merged_peaks.narrowPeak" ), - bedgraph = temp( - os.path.join( - macs2_path, "{target}", "{treat}_merged_treat_pileup.bdg" + summits = os.path.join( + macs2_path, "{target}", "{treat}_merged_summits.bed" + ), + bedgraph = temp( + expand( + os.path.join( + macs2_path, "{{target}}", "{{treat}}_merged_{type}.bdg" + ), + type = ['treat_pileup', 'control_lambda'] ) ), log = os.path.join( macs2_path, "{target}", "{treat}_merged_callpeak.log" ), - log: "workflow/logs/macs2_merged/{target}/{treat}_merged.log" + log: log_path + "/macs2_merged/{target}/{treat}_merged.log" conda: "../envs/macs2.yml" params: bamdir = bam_path, @@ -237,9 +241,9 @@ rule bedgraph_to_bigwig: ), chrom_sizes = chrom_sizes output: - bigwig = os.path.join(bw_path, "{target}", "{sample}_treat_pileup.bw") + bigwig = os.path.join(macs2_path, "{target}", "{sample}_treat_pileup.bw") conda: "../envs/bedgraph_to_bigwig.yml" - log: "workflow/logs/bedgraph_to_bigwig/{target}/{sample}.log" + log: log_path + "/bedgraph_to_bigwig/{target}/{sample}.log" threads: 1 shell: """ @@ -262,14 +266,14 @@ rule bedgraph_to_bigwig: rule get_coverage_summary: input: rules.bedgraph_to_bigwig.output.bigwig - output: os.path.join(bw_path, "{target}", "{sample}_treat_pileup.summary") + output: os.path.join(macs2_path, "{target}", "{sample}_treat_pileup.summary") params: script = "workflow/scripts/get_bigwig_summary.R", git = git_add, interval = random.uniform(0, 1), tries = 10 conda: "../envs/rmarkdown.yml" - log: "workflow/logs/get_coverage_summary/{target}/{sample}.log" + log: log_path + "/get_coverage_summary/{target}/{sample}.log" threads: 1 shell: """ @@ -294,7 +298,49 @@ rule get_coverage_summary: fi """ -rule build_macs2_summary: +rule create_macs2_summary_rmd: + input: + module = "workflow/modules/macs2_summary.Rmd", + r = "workflow/scripts/create_macs2_summary.R" + output: + rmd = os.path.join(rmd_path, "{target}_macs2_summary.Rmd") + params: + git = git_add, + interval = random.uniform(0, 1), + threads = lambda wildcards: len(df[df['target'] == wildcards.target]), + tries = 10 + conda: "../envs/rmarkdown.yml" + threads: 1 + log: log_path + "/create_rmd/create_{target}_macs2_summary.log" + shell: + """ + ## Create the generic markdown + Rscript --vanilla \ + {input.r} \ + {wildcards.target} \ + {params.threads} \ + {output.rmd} &>> {log} + + ## Add the module directly as literal code + cat {input.module} >> {output.rmd} + + if [[ {params.git} == "True" ]]; then + TRIES={params.tries} + while [[ -f .git/index.lock ]] + do + if [[ "$TRIES" == 0 ]]; then + echo "ERROR: Timeout while waiting for removal of git index.lock" &>> {log} + exit 1 + fi + sleep {params.interval} + ((TRIES--)) + done + git add {output.rmd} + fi + """ + + +rule compile_macs2_summary_html: input: annotations = ALL_RDS, aln = lambda wildcards: expand( @@ -303,6 +349,9 @@ rule build_macs2_summary: suffix = ['bam', 'bam.bai'] ), blacklist = blacklist, + cors = os.path.join(macs2_path, "{target}", "cross_correlations.tsv"), + config = "config/config.yml", + here = here_file, indiv_macs2 = lambda wildcards: expand( os.path.join(macs2_path, "{{target}}", "{sample}_{suffix}"), sample = set(df[df.target == wildcards.target]['sample']), @@ -315,16 +364,12 @@ rule build_macs2_summary: treat = set(df[df.target == wildcards.target]['treat']), suffix = ['callpeak.log', 'peaks.narrowPeak'] ), - cors = os.path.join("output", "{target}", "cross_correlations.tsv"), - qc = os.path.join("output", "{target}", "qc_samples.tsv"), - config = "config/config.yml", pkgs = rules.install_packages.output, - r = "workflow/scripts/create_macs2_summary.R", + qc = os.path.join(macs2_path, "{target}", "qc_samples.tsv"), + rmd = os.path.join(rmd_path, "{target}_macs2_summary.Rmd"), setup = rules.create_setup_chunk.output, - yaml = rules.create_site_yaml.output, - module = "workflow/modules/macs2_summary.Rmd" + yaml = rules.create_site_yaml.output output: - rmd = os.path.join(rmd_path, "{target}_macs2_summary.Rmd"), html = "docs/{target}_macs2_summary.html", fig_path = directory( os.path.join( @@ -333,7 +378,10 @@ rule build_macs2_summary: ) ), peaks = expand( - "output/{{target}}/{file}", + os.path.join( + macs2_path, "{{target}}", "{file}" + ), + ## Should change export of oracle peaks to be bed files file = ['consensus_peaks.bed', 'oracle_peaks.rds'] ), renv = os.path.join("output", "envs", "{target}_macs2_summary.RData"), @@ -344,20 +392,10 @@ rule build_macs2_summary: tries = 10 conda: "../envs/rmarkdown.yml" threads: lambda wildcards: len(df[df['target'] == wildcards.target]) - log: "workflow/logs/macs2_summmary/build_{target}_macs2_summary.log" + log: log_path + "/macs2_summmary/compile_{target}_macs2_summary.log" shell: """ - ## Create the generic markdown - Rscript --vanilla \ - {input.r} \ - {wildcards.target} \ - {threads} \ - {output.rmd} &>> {log} - - ## Add the module directly as literal code - cat {input.module} >> {output.rmd} - - R -e "rmarkdown::render_site('{output.rmd}')" &>> {log} + R -e "rmarkdown::render_site('{input.rmd}')" &>> {log} if [[ {params.git} == "True" ]]; then TRIES={params.tries} @@ -370,10 +408,6 @@ rule build_macs2_summary: sleep {params.interval} ((TRIES--)) done - git add {output.rmd} \ - {output.html}\ - {output.fig_path} \ - {output.peaks} \ - {output.venn} + git add {output.html} {output.fig_path} {output.peaks} {output.venn} fi """ diff --git a/workflow/rules/pairwise_comparisons.smk b/workflow/rules/pairwise_comparisons.smk index eac006a..cd85914 100644 --- a/workflow/rules/pairwise_comparisons.smk +++ b/workflow/rules/pairwise_comparisons.smk @@ -1,17 +1,64 @@ -rule pairwise_comparisons: +rule create_pairwise_comparisons_rmd: + input: + module_pw = "workflow/modules/pairwise_comparison.Rmd", + r = "workflow/scripts/create_pairwise_comparison.R" + output: + rmd = expand( + os.path.join( + "analysis", + "{{t1}}_{{ref1}}_{{treat1}}_{{t2}}_{{ref2}}_{{treat2}}_{f}" + ), + f = "pairwise_comparison.Rmd" + ) + params: + git = git_add, + interval = random.uniform(0, 1), + threads = 4, + tries = 10 + conda: "../envs/rmarkdown.yml" + threads: 1 + log: log_path + "/create_rmd/create_{t1}_{ref1}_{treat1}_{t2}_{ref2}_{treat2}_pairwise_comparison_rmd" + shell: + """ + ## Create the generic markdown header + Rscript --vanilla \ + {input.r} \ + {wildcards.t1} \ + {wildcards.ref1} \ + {wildcards.treat1} \ + {wildcards.t2} \ + {wildcards.ref2} \ + {wildcards.treat2} \ + {params.threads} \ + {output.rmd} &>> {log} + + ## Add the remainder of the module as literal text + cat {input.module_pw} >> {output.rmd} + + if [[ {params.git} == "True" ]]; then + TRIES={params.tries} + while [[ -f .git/index.lock ]] + do + if [[ "$TRIES" == 0 ]]; then + echo "ERROR: Timeout while waiting for removal of git index lock" &>> {log} + exit 1 + fi + sleep {params.interval} + ((TRIES--)) + done + git add {output.rmd} + fi + """ + +rule compile_pairwise_comparisons_html: input: annotations = ALL_RDS, config = "config/config.yml", - pkgs = rules.install_packages.output, - r = "workflow/scripts/create_pairwise_comparison.R", - setup = rules.create_setup_chunk.output, - yaml = rules.create_site_yaml.output, - rmd_config = "config/rmarkdown.yml", - module_pw = "workflow/modules/pairwise_comparison.Rmd", + here = here_file, module_rna = "workflow/modules/rnaseq_pairwise.Rmd", + pkgs = rules.install_packages.output, results_t1 = "docs/{t1}_{ref1}_{treat1}_differential_binding.html", - results_t2 = "docs/{t2}_{ref2}_{treat2}_differential_binding.html" - output: + results_t2 = "docs/{t2}_{ref2}_{treat2}_differential_binding.html", rmd = expand( os.path.join( "analysis", @@ -19,6 +66,10 @@ rule pairwise_comparisons: ), f = "pairwise_comparison.Rmd" ), + rmd_config = "config/rmarkdown.yml", + setup = rules.create_setup_chunk.output, + yaml = rules.create_site_yaml.output + output: html = expand( os.path.join( "docs", @@ -45,22 +96,7 @@ rule pairwise_comparisons: log: "workflow/logs/pairwise/{t1}_{ref1}_{treat1}_{t2}_{ref2}_{treat2}_pairwise_comparison.log" shell: """ - ## Create the generic markdown header - Rscript --vanilla \ - {input.r} \ - {wildcards.t1} \ - {wildcards.ref1} \ - {wildcards.treat1} \ - {wildcards.t2} \ - {wildcards.ref2} \ - {wildcards.treat2} \ - {threads} \ - {output.rmd} &>> {log} - - ## Add the remainder of the module as literal text - cat {input.module_pw} >> {output.rmd} - - R -e "rmarkdown::render_site('{output.rmd}')" &>> {log} + R -e "rmarkdown::render_site('{input.rmd}')" &>> {log} if [[ {params.git} == "True" ]]; then TRIES={params.tries} @@ -73,7 +109,6 @@ rule pairwise_comparisons: sleep {params.interval} ((TRIES--)) done - git add {output.rmd} git add {output.html} git add {output.fig_path} git add {output.csv} diff --git a/workflow/rules/rmarkdown.smk b/workflow/rules/rmarkdown.smk index 01956ee..04dff81 100644 --- a/workflow/rules/rmarkdown.smk +++ b/workflow/rules/rmarkdown.smk @@ -3,7 +3,7 @@ rule install_packages: output: "output/packages.installed" conda: "../envs/rmarkdown.yml" threads: 1 - log: "workflow/logs/rmarkdown/install_packages.log" + log: log_path + "/rmarkdown/install_packages.log" shell: """ Rscript --vanilla {input} {output} &>> {log} @@ -20,7 +20,7 @@ rule create_site_yaml: tries = 10 conda: "../envs/rmarkdown.yml" threads: 1 - log: "workflow/logs/rmarkdown/create_site_yaml.log" + log: log_path + "/rmarkdown/create_site_yaml.log" shell: """ Rscript --vanilla {input.r} {output} &>> {log} @@ -44,14 +44,14 @@ rule create_setup_chunk: config = "config/rmarkdown.yml", r = "workflow/scripts/create_setup_chunk.R" output: - rmd = "workflow/modules/setup_chunk.Rmd" + rmd = "analysis/setup_chunk.Rmd" params: git = git_add, interval = random.uniform(0, 1), tries = 10 conda: "../envs/rmarkdown.yml" threads: 1 - log: "workflow/logs/rmarkdown/create_setup_chunk.log" + log: log_path + "/rmarkdown/create_setup_chunk.log" shell: """ Rscript --vanilla {input.r} {output.rmd} &>> {log} @@ -71,9 +71,18 @@ rule create_setup_chunk: fi """ -rule build_annotations_rmd: +rule create_here_file: + output: here_file + threads: 1 + shell: + """ + touch here_file + """ + +rule compile_annotations_html: input: blacklist = blacklist, + here = here_file, rmd = "workflow/modules/annotation_description.Rmd", rds = expand( os.path.join(annotation_path, "{file}.rds"), @@ -98,7 +107,7 @@ rule build_annotations_rmd: tries = 10 conda: "../envs/rmarkdown.yml" threads: 1 - log: "workflow/logs/rmarkdown/build_annotations_rmd.log" + log: log_path + "/rmarkdown/compile_annotations_html.log" shell: """ cp {input.rmd} {output.rmd} @@ -119,9 +128,39 @@ rule build_annotations_rmd: fi """ -rule build_site_index: +rule create_index_rmd: + input: + os.path.join("workflow", "modules", "index.Rmd") + output: + os.path.join(rmd_path, "index.Rmd") + threads: 1 + params: + git = git_add, + interval = random.uniform(0, 1), + tries = 10 + shell: + """ + cat {input} > {output} + + if [[ {params.git} == "True" ]]; then + TRIES={params.tries} + while [[ -f .git/index.lock ]] + do + if [[ "$TRIES" == 0 ]]; then + echo "ERROR: Timeout while waiting for removal of git index.lock" + exit 1 + fi + sleep {params.interval} + ((TRIES--)) + done + git add {output} + fi + """ + +rule compile_index_html: input: html = HTML_OUT, + here = here_file, rmd = os.path.join(rmd_path, "index.Rmd"), setup = rules.create_setup_chunk.output, site_yaml = rules.create_site_yaml.output, @@ -134,7 +173,7 @@ rule build_site_index: tries = 10 conda: "../envs/rmarkdown.yml" threads: 1 - log: "workflow/logs/rmarkdown/build_site_index.log" + log: log_path + "/rmarkdown/compile_index_html.log" shell: """ R -e "rmarkdown::render_site('{input.rmd}')" &>> {log} diff --git a/workflow/rules/rulegraph.dot b/workflow/rules/rulegraph.dot index 287e722..fd809b2 100644 --- a/workflow/rules/rulegraph.dot +++ b/workflow/rules/rulegraph.dot @@ -2,79 +2,93 @@ digraph snakemake_dag { graph[bgcolor=white, margin=0]; node[shape=box, style=rounded, fontname=sans, fontsize=10, penwidth=2]; edge[penwidth=2, color=grey]; - 0[label = "all", color = "0.07 0.6 0.85", style="rounded"]; - 1[label = "create_annotations", color = "0.63 0.6 0.85", style="rounded"]; - 2[label = "download_gtf", color = "0.33 0.6 0.85", style="rounded"]; - 3[label = "install_packages", color = "0.44 0.6 0.85", style="rounded"]; - 4[label = "build_annotations_rmd", color = "0.00 0.6 0.85", style="rounded"]; - 5[label = "download_blacklist", color = "0.37 0.6 0.85", style="rounded"]; - 6[label = "create_setup_chunk", color = "0.22 0.6 0.85", style="rounded"]; - 7[label = "create_site_yaml", color = "0.15 0.6 0.85", style="rounded"]; - 8[label = "build_macs2_summary", color = "0.56 0.6 0.85", style="rounded"]; - 9[label = "index_bam", color = "0.30 0.6 0.85", style="rounded"]; - 10[label = "macs2_individual", color = "0.11 0.6 0.85", style="rounded"]; - 11[label = "macs2_merged", color = "0.19 0.6 0.85", style="rounded"]; - 12[label = "macs2_qc", color = "0.26 0.6 0.85", style="rounded"]; - 13[label = "differential_binding", color = "0.41 0.6 0.85", style="rounded"]; - 14[label = "bedgraph_to_bigwig", color = "0.48 0.6 0.85", style="rounded"]; - 15[label = "pairwise_comparisons", color = "0.59 0.6 0.85", style="rounded"]; - 16[label = "build_site_index", color = "0.04 0.6 0.85", style="rounded"]; - 4 -> 0 - 1 -> 0 + 0[label = "all", color = "0.55 0.6 0.85", style="rounded"]; + 1[label = "create_annotations", color = "0.06 0.6 0.85", style="rounded"]; + 2[label = "download_gtf", color = "0.17 0.6 0.85", style="rounded"]; + 3[label = "install_packages", color = "0.58 0.6 0.85", style="rounded"]; + 4[label = "compile_annotations_rmd", color = "0.41 0.6 0.85", style="rounded"]; + 5[label = "download_blacklist", color = "0.38 0.6 0.85", style="rounded"]; + 6[label = "create_here_file", color = "0.03 0.6 0.85", style="rounded"]; + 7[label = "create_setup_chunk", color = "0.32 0.6 0.85", style="rounded"]; + 8[label = "create_site_yaml", color = "0.09 0.6 0.85", style="rounded"]; + 9[label = "compile_macs2_summary_html", color = "0.49 0.6 0.85", style="rounded"]; + 10[label = "index_bam", color = "0.61 0.6 0.85", style="rounded"]; + 11[label = "macs2_qc", color = "0.35 0.6 0.85", style="rounded"]; + 12[label = "macs2_individual", color = "0.14 0.6 0.85", style="rounded"]; + 13[label = "macs2_merged", color = "0.52 0.6 0.85", style="rounded"]; + 14[label = "create_macs2_summary_rmd", color = "0.29 0.6 0.85", style="rounded"]; + 15[label = "compile_differential_binding_html", color = "0.64 0.6 0.85", style="rounded"]; + 16[label = "bedgraph_to_bigwig", color = "0.00 0.6 0.85", style="rounded"]; + 17[label = "create_differential_binding_rmd", color = "0.46 0.6 0.85", style="rounded"]; + 18[label = "compile_pairwise_comparisons_html", color = "0.26 0.6 0.85", style="rounded"]; + 19[label = "create_pairwise_comparisons_rmd", color = "0.23 0.6 0.85", style="rounded"]; + 20[label = "compile_index_html", color = "0.43 0.6 0.85", style="rounded"]; + 21[label = "create_index_rmd", color = "0.12 0.6 0.85", style="rounded"]; 16 -> 0 - 10 -> 0 - 11 -> 0 13 -> 0 - 14 -> 0 - 8 -> 0 15 -> 0 + 12 -> 0 + 4 -> 0 + 20 -> 0 + 18 -> 0 + 1 -> 0 + 9 -> 0 2 -> 1 3 -> 1 7 -> 4 - 6 -> 4 5 -> 4 + 6 -> 4 1 -> 4 - 4 -> 8 - 10 -> 8 - 1 -> 8 - 11 -> 8 - 7 -> 8 - 12 -> 8 - 6 -> 8 - 5 -> 8 - 9 -> 8 - 3 -> 8 - 9 -> 10 + 8 -> 4 + 11 -> 9 + 13 -> 9 + 3 -> 9 + 10 -> 9 + 5 -> 9 + 12 -> 9 + 4 -> 9 + 6 -> 9 + 1 -> 9 + 14 -> 9 + 7 -> 9 + 8 -> 9 + 3 -> 11 + 10 -> 11 + 5 -> 11 12 -> 11 - 9 -> 11 - 1 -> 12 + 1 -> 11 10 -> 12 - 5 -> 12 - 9 -> 12 - 3 -> 12 - 4 -> 13 - 1 -> 13 11 -> 13 - 7 -> 13 - 12 -> 13 - 6 -> 13 - 9 -> 13 - 14 -> 13 - 3 -> 13 - 8 -> 13 - 11 -> 14 - 10 -> 14 - 1 -> 14 + 10 -> 13 + 16 -> 15 + 13 -> 15 + 11 -> 15 + 3 -> 15 + 10 -> 15 4 -> 15 + 17 -> 15 + 6 -> 15 1 -> 15 + 9 -> 15 7 -> 15 - 6 -> 15 - 13 -> 15 - 3 -> 15 - 4 -> 16 - 7 -> 16 - 6 -> 16 + 8 -> 15 13 -> 16 - 8 -> 16 - 15 -> 16 + 12 -> 16 + 1 -> 16 + 3 -> 18 + 15 -> 18 + 4 -> 18 + 6 -> 18 + 19 -> 18 + 1 -> 18 + 7 -> 18 + 8 -> 18 + 15 -> 20 + 4 -> 20 + 6 -> 20 + 18 -> 20 + 9 -> 20 + 7 -> 20 + 8 -> 20 + 21 -> 20 } diff --git a/workflow/rules/rulegraph.pdf b/workflow/rules/rulegraph.pdf index 2e918e2..71d810d 100644 Binary files a/workflow/rules/rulegraph.pdf and b/workflow/rules/rulegraph.pdf differ diff --git a/workflow/scripts/create_differential_binding.R b/workflow/scripts/create_differential_binding.R index 6b6359b..32ccf35 100644 --- a/workflow/scripts/create_differential_binding.R +++ b/workflow/scripts/create_differential_binding.R @@ -15,7 +15,7 @@ glue( date: \"`r format(Sys.Date(), '%d %B, %Y')`\" --- - ```{r set-knitr-opts, echo=FALSE, child = here::here('workflow/modules/setup_chunk.Rmd')} + ```{r set-knitr-opts, echo=FALSE, child = here::here('analysis/setup_chunk.Rmd')} ``` ```{r set-vals} diff --git a/workflow/scripts/create_macs2_summary.R b/workflow/scripts/create_macs2_summary.R index 9fb4e7e..008adff 100644 --- a/workflow/scripts/create_macs2_summary.R +++ b/workflow/scripts/create_macs2_summary.R @@ -15,7 +15,7 @@ glue( chunk_output_type: console --- - ```{r set-knitr-opts, echo=FALSE, child = here::here('workflow/modules/setup_chunk.Rmd')} + ```{r set-knitr-opts, echo=FALSE, child = here::here('analysis/setup_chunk.Rmd')} ``` ```{r set-vals} diff --git a/workflow/scripts/create_pairwise_comparison.R b/workflow/scripts/create_pairwise_comparison.R index d562c2e..d10808c 100644 --- a/workflow/scripts/create_pairwise_comparison.R +++ b/workflow/scripts/create_pairwise_comparison.R @@ -18,7 +18,7 @@ glue( date: \"`r format(Sys.Date(), '%d %B, %Y')`\" --- - ```{r set-knitr-opts, echo=FALSE, child = here::here('workflow/modules/setup_chunk.Rmd')} + ```{r set-knitr-opts, echo=FALSE, child = here::here('analysis/setup_chunk.Rmd')} ``` ```{r set-vals} diff --git a/workflow/scripts/macs2_qc.R b/workflow/scripts/macs2_qc.R index 9a5277b..3b7e403 100644 --- a/workflow/scripts/macs2_qc.R +++ b/workflow/scripts/macs2_qc.R @@ -58,10 +58,8 @@ samples <- samples %>% qc_prop <- config$peaks$qc$min_prop_peaks annotation_path <- here::here("output", "annotations") -macs2_path <- here::here(config$paths$macs2, target) +macs2_path <- here::here("output", "macs2", target) if (!dir.exists(macs2_path)) dir.create(macs2_path) -out_path <- here::here("output", target) -if (!dir.exists(out_path)) dir.create(out_path) sq <- read_rds(file.path(annotation_path, "seqinfo.rds")) blacklist <- file.path(annotation_path, "blacklist.bed.gz") %>% @@ -106,7 +104,7 @@ macs2_logs <- file.path(macs2_path, glue("{samples$sample}_callpeak.log")) %>% macs2_logs %>% dplyr::select(sample = name, any_of(colnames(samples)), qc) %>% write_tsv( - file.path(out_path, "qc_samples.tsv") + file.path(macs2_path, "qc_samples.tsv") ) ################## @@ -166,4 +164,4 @@ read_corrs <- bfl[samples$sample] %>% names_to = "sample", values_to = "correlation" ) -write_tsv(read_corrs, file.path(out_path, "cross_correlations.tsv")) +write_tsv(read_corrs, file.path(macs2_path, "cross_correlations.tsv"))