Skip to content

Commit

Permalink
Merged dev into main
Browse files Browse the repository at this point in the history
  • Loading branch information
smped committed May 30, 2022
2 parents cda61ce + a064a28 commit feca5f9
Show file tree
Hide file tree
Showing 21 changed files with 442 additions and 268 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
.Ruserdata
.Rapp.history
.DS_Store
.vscode
*.Rproj
*_cache
workflow/logs/*
.snakemake/*
data/*
*bam
*bam.bai
*bdg
Expand Down
2 changes: 0 additions & 2 deletions config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ samples:

paths:
bam: "data/aligned"
macs2: "data/macs2"
bigwig: "data/bigwig"

comparisons:
fc: 1.2
Expand Down
2 changes: 1 addition & 1 deletion config/samples.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ AR_Veh_2 AR Veh 2 AR_Input
AR_Veh_3 AR Veh 3 AR_Input
AR_DHT_1 AR DHT 1 AR_Input
AR_DHT_2 AR DHT 2 AR_Input
AR_DHT_3 AR DHT 3 AR_Input
AR_DHT_3 AR DHT 3 AR_Input
42 changes: 35 additions & 7 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,34 @@ configfile: "config/config.yml"
## Figure out how to run check_yaml from here if possible
## python scripts/check_yaml.py

## Set the here file as defined by here::here() for Rmarkdown files
## In a valid Rproj file is found, use that. Otherwise use .here
def get_here_file():
wd = os.getcwd()
rproj = os.path.basename(wd) + ".Rproj"
rproj_path = os.path.join(wd, rproj)
here_file = os.path.join(wd, ".here")
if os.path.isfile(rproj_path):
## Check contents of file for Version in line 1
with open(rproj_path) as f:
ln = f.readline().rstrip()
if 'Version:' in ln:
here_file = rproj_path
return(here_file)

####################################
## Check all external files exist ##
####################################
all_exist=True
# if config['external']['rnaseq'] != '':
# if not os.path.isfile(config['external']['rnaseq']):
# all_exist=False
# print(config['external']['rnaseq'] + " does not exist")


if not all_exist:
sys.exit(1)

def get_ucsc_genome(x):
map = pd.Series(['hg19', 'hg38'], index = ['GRCh37', 'GRCh38'])
if not x in map.keys():
Expand Down Expand Up @@ -88,13 +116,13 @@ pairs=make_pairwise(config['comparisons']['contrasts'])
###############
## Key Paths ##
###############
here_file = get_here_file()
bam_path = config['paths']['bam']
bw_path = config['paths']['bigwig']
macs2_path = config['paths']['macs2']
# ext_path = os.path.join("data", "external")
rmd_path = "analysis"
## This still needs to be changed internally in the RMD files
# macs2_path = config['paths']['macs2']
macs2_path = os.path.join("output", "macs2")
annotation_path = os.path.join("output", "annotations")
log_path = os.path.join("workflow", "logs")

###############################
## External Annotation Files ##
Expand Down Expand Up @@ -168,7 +196,7 @@ ALL_OUTPUTS.extend([os.path.join("docs", "index.html")])

## Peaks generated from the Rmd files
CONS_PEAKS = expand(
os.path.join("output", "{target}", "{file}"),
os.path.join(macs2_path, "{target}", "{file}"),
target = targets,
file = ['consensus_peaks.bed', 'oracle_peaks.rds']
)
Expand Down Expand Up @@ -202,13 +230,13 @@ ALL_OUTPUTS.extend(MERGED_PEAKS)
## BigWig Files ##
##################
INDIV_BW = expand(
os.path.join(bw_path, "{path}_treat_pileup.{suffix}"),
os.path.join(macs2_path, "{path}_treat_pileup.{suffix}"),
path = indiv_pre,
suffix = ['bw']#, 'summary']
)
ALL_OUTPUTS.extend(INDIV_BW)
MERGED_BW = expand(
os.path.join(bw_path, "{path}_merged_treat_pileup.{suffix}"),
os.path.join(macs2_path, "{path}_merged_treat_pileup.{suffix}"),
path = merged_pre,
suffix = ['bw']#, 'summary']
)
Expand Down
2 changes: 1 addition & 1 deletion workflow/modules/annotation_description.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ editor_options:
chunk_output_type: console
---

```{r set-knitr-opts, echo=FALSE, child = here::here('workflow/modules/setup_chunk.Rmd')}
```{r set-knitr-opts, echo=FALSE, child = here::here('analysis/setup_chunk.Rmd')}
```

```{r packages}
Expand Down
29 changes: 17 additions & 12 deletions workflow/modules/differential_binding.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ source(here::here("workflow", "scripts", "custom_functions.R"))
```{r load-config}
params <- read_yaml(here::here("config", "params.yml"))
config <- read_yaml(here::here("config", "config.yml"))
samples <- here::here("output", target, "qc_samples.tsv") %>%
macs2_path <- here::here("output", "macs2", target)
samples <- file.path(macs2_path, "qc_samples.tsv") %>%
read_tsv() %>%
dplyr::filter(
qc == "pass",
Expand All @@ -81,8 +82,6 @@ bam_path <- here::here(config$paths$bam)
stopifnot(dir.exists(bam_path))
annotation_path <- here::here("output", "annotations")
stopifnot(dir.exists(annotation_path))
bw_path <- here::here(config$paths$bigwig, target)
stopifnot(dir.exists(bw_path))
out_path <- here::here("output", target)
if (!dir.exists(out_path)) dir.create(out_path)
```
Expand Down Expand Up @@ -286,7 +285,7 @@ all_out <- list(
# Data Preparation

```{r import-peaks}
consensus_peaks <- here::here("output", target, "consensus_peaks.bed") %>%
consensus_peaks <- here::here(macs2_path, "consensus_peaks.bed") %>%
import.bed(seqinfo = sq)
```

Expand Down Expand Up @@ -370,8 +369,8 @@ Taking the first `r comma(ys)` alignments, a brief inspection of the Bam Files r
## Sliding Window Counts

```{r window-counts}
macs2_merged_logs <- here::here(
"data", "macs2", target, glue("{treat_levels}_merged_callpeak.log")
macs2_merged_logs <- file.path(
macs2_path, glue("{treat_levels}_merged_callpeak.log")
) %>%
importNgsLogs()
fl <- max(macs2_merged_logs$fragment_length)
Expand Down Expand Up @@ -874,7 +873,7 @@ For visualisation purposes, only genes which were considered as detected in any
```{r make-heatmaps}
profile_width <- 5e3
n_bins <- 100
bwfl <- file.path(bw_path, glue("{treat_levels}_merged_treat_pileup.bw")) %>%
bwfl <- file.path(macs2_path, glue("{treat_levels}_merged_treat_pileup.bw")) %>%
BigWigFileList() %>%
setNames(treat_levels)
sig_ranges <- merged_results %>%
Expand Down Expand Up @@ -1655,7 +1654,7 @@ grl_to_plot <- GRangesList(
## The coverage
bwfl <- list2(
"{target}" := file.path(
bw_path, glue("{treat_levels}_merged_treat_pileup.bw")
macs2_path, glue("{treat_levels}_merged_treat_pileup.bw")
) %>%
BigWigFileList() %>%
setNames(treat_levels)
Expand Down Expand Up @@ -2677,12 +2676,18 @@ file.create(all_out$de_genes)
if (any_detected) write_csv(de_genes_db_regions, all_out$de_genes)
file.create(all_out$up_regions)
if (length(sig_ranges$Up) > 0)
export.bed(granges(sig_ranges$Up), all_out$up_regions)
if (length(sig_ranges$Up) > 0) {
sig_ranges$Up %>%
granges() %>%
export.bed(all_out$up_regions)
}
file.create(all_out$down_regions)
if (length(sig_ranges$Down) > 0)
export.bed(granges(sig_ranges$Down), all_out$down_regions)
if (length(sig_ranges$Down) > 0) {
sig_ranges$Down %>%
granges() %>%
export.bed(all_out$down_regions)
}
save.image(all_out$renv)
```
Expand Down
2 changes: 1 addition & 1 deletion workflow/modules/ihw_targets.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ other_targets <- here::here(config$samples$file) %>%
ihw_all <- other_targets %>%
sapply(
function(x) {
here::here("output", x, "consensus_peaks.bed") %>%
here::here(dirname(macs2_path), x, "consensus_peaks.bed") %>%
import.bed(seqinfo = sq)
},
simplify = FALSE
Expand Down
6 changes: 3 additions & 3 deletions analysis/index.Rmd → workflow/modules/index.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@ Treatment groups and targets are specified using `config/config.yml`.

## Workflow

```{r plot-workflow, fig.height = 10, fig.width = 10, fig.cap = "*Summary of processing workflow. The primary data pipeline is shown in red, preparatory steps are shown in blue whilst collation of final output is in green.*"}
```{r plot-workflow, fig.height = 10, fig.width = 10, fig.cap = "*Summary of processing workflow. The primary data pipeline producing key outputs is shown in red, preparatory steps are shown in blue whilst steps involved in the collation of final output are in green.*"}
here::here("workflow", "rules", "rulegraph.dot") %>%
readLines() %>%
rm_dot_node(node = "\"all\"") %>%
add_input_node(node = "Alignments\n(Bam Files)", col = "red", ignore = "(download|define|macs2|create|install)") %>%
change_node_colour("(setup|index|macs2|bedgraph|identify|binding|install)", "red") %>%
change_node_colour("(index|macs2|annotations|bedgraph|compile)", "red") %>%
change_node_colour("(create|build)", "forestgreen") %>%
change_node_colour("(download|write|copy|install)", "blue") %>%
change_node_colour("(site|create|build|differential|pairwise)", "forestgreen") %>%
str_replace_all("_", "\n") %>%
str_replace_all("snakemake\ndag", "snakemake_dag") %>%
str_replace_all("fontsize=[0-9]+", "fontsize=16") %>%
Expand Down
16 changes: 7 additions & 9 deletions workflow/modules/macs2_summary.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ register(MulticoreParam(workers = threads))
source(here::here("workflow/scripts/custom_functions.R"))
## This has been pushed to the main repo of ggside, but is not yet
## incorporated into the packge. The functions are geom_*sidelabel
## Once ggside 0.2.1 is released, this can be deleted
source(here::here("workflow/scripts/geom_sidelabel.R"))
```

Expand All @@ -55,11 +56,7 @@ config <- read_yaml(
here::here("config", "config.yml")
)
bam_path <- here::here(config$paths$bam, target)
stopifnot(dir.exists(bam_path))
macs2_path <- here::here(config$paths$macs2, target)
stopifnot(dir.exists(macs2_path))
out_path <- here::here("output", target)
if (!dir.exists(out_path)) dir.create(out_path)
macs2_path <- here::here("output", "macs2", target)
annotation_path <- here::here("output", "annotations")
colours <- read_rds(
file.path(annotation_path, "colours.rds")
Expand All @@ -68,7 +65,7 @@ colours <- read_rds(
```

```{r read-samples}
samples <-read_tsv(file.path(out_path, "qc_samples.tsv"))
samples <-read_tsv(file.path(macs2_path, "qc_samples.tsv"))
treat_levels <- unique(samples$treat)
if (!is.null(config$comparisons$contrasts)) {
## Ensure levels respect those provided in contrasts
Expand Down Expand Up @@ -325,7 +322,7 @@ suppressWarnings(
### Cross Correlations
```{r plot_correlation, fig.height=6, fig.cap = glue("*Cross Correlaton between alignments up to 1kb apart. The dashed, grey, vertical line is the fragment length estimated by `macs2 callpeak` for each sample. For speed, only the first 5 chromosomes were used for sample-specific estimates.*")}
file.path(out_path, "cross_correlations.tsv") %>%
file.path(macs2_path, "cross_correlations.tsv") %>%
read_tsv() %>%
left_join(samples, by = "sample") %>%
ggplot(aes(fl, correlation, colour = treat)) +
Expand Down Expand Up @@ -680,8 +677,8 @@ consensus_peaks %>%

```{r export}
all_out <- list(
consensus_peaks = file.path(out_path, "consensus_peaks.bed"),
oracle_peaks = file.path(out_path, "oracle_peaks.rds"),
consensus_peaks = file.path(macs2_path, "consensus_peaks.bed"),
oracle_peaks = file.path(macs2_path, "oracle_peaks.rds"),
renv = file.path(
here::here("output/envs"),
glue("{target}_macs2_summary.RData")
Expand All @@ -693,6 +690,7 @@ oracle_peaks %>%
lapply(select, -keep) %>%
GRangesList() %>%
write_rds(all_out$oracle_peaks, compress = "gz")
if (!dir.exists(dirname(all_out$renv))) dir.create(dirname(all_out$renv))
save.image(all_out$renv)
```

Expand Down
Loading

0 comments on commit feca5f9

Please sign in to comment.