Skip to content

Commit

Permalink
Merge branch 'develop' into np_update_gatk_4.6.1.0_revert_BQSR
Browse files Browse the repository at this point in the history
  • Loading branch information
nikellepetrillo authored Nov 5, 2024
2 parents f82737d + 24be0e4 commit cee2af0
Show file tree
Hide file tree
Showing 12 changed files with 129 additions and 19 deletions.
18 changes: 11 additions & 7 deletions beta-pipelines/skylab/slidetags/SlideTags.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
version 1.0

import "scripts/spatial-count.wdl" as SpatialCount
import "scripts/positioning.wdl" as Positioning

workflow SlideTags {

Expand All @@ -10,25 +11,28 @@ workflow SlideTags {
String id
Array[String] fastq_paths
Array[String] pucks
Int mem_GiB = 64
Int disk_GiB = 128
String docker = "us.gcr.io/broad-gotc-prod/slide-tags:1.0.0"
Array[String] rna_paths
String sb_path
String docker = "us.gcr.io/broad-gotc-prod/slide-tags:1.1.0"
}

parameter_meta {
fastq_paths: "Array of paths to spatial fastq files"
pucks: "Array of paths to puck files"
mem_GiB: "Memory in GiB to allocate to the task"
disk_GiB: "Disk in GiB to allocate to the task"
docker: "Docker image to use"
}

call SpatialCount.count as spatial_count {
input:
fastq_paths = fastq_paths,
pucks = pucks,
mem_GiB = mem_GiB,
disk_GiB = disk_GiB,
docker = docker
}

call Positioning.generate_positioning as positioning {
input:
rna_paths = rna_paths,
sb_path = spatial_count.sb_counts,
docker = docker
}

Expand Down
4 changes: 4 additions & 0 deletions beta-pipelines/skylab/slidetags/scripts/CREDITS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ This project uses code from the following sources:
URL: [https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/spatial-count/spatial-count.wdl](https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/spatial-count/spatial-count.wdl)
This code was adapted and modified from the Macosko Lab's pipeline repository.

- **Positioning Workflow**
URL: [https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/positioning/positioning.wdl](https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/positioning/positioning.wdl)
This code was adapted and modified from the Macosko Lab's pipeline repository.

Additional modifications include output handling and script download changes.

Please refer to the original source for the full context of the workflow.
90 changes: 90 additions & 0 deletions beta-pipelines/skylab/slidetags/scripts/positioning.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
version 1.0

task generate_positioning {
input {
Array[String] rna_paths
String sb_path
Int mem_GiB = 128
Int disk_GiB = 128
Int nthreads = 16
String docker
}
command <<<
set -euo pipefail
set -x
echo "<< starting spatial-count >>"

gcloud config set storage/process_count 16 # is this set by user?
gcloud config set storage/thread_count 2 # is this set by user?

# Download the scripts -- these need to be changed -- also need to add to docker
wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/ee005109446f58764509ee47ff51c212ce8dabe3/positioning/positioning.R
wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/6a78716aa08a9f2506c06844f7e3fd491b03aa8b/positioning/load_matrix.R
wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/a7fc86abbdd3d46461c500e7d024315d88a97e9a/positioning/run-positioning.R

echo "RNA: ~{sep=' ' rna_paths}"
echo "SB: ~{sb_path}"

# Assert that the RNA files exist
rnas=(~{sep=' ' rna_paths})
for rna in "${rnas[@]}" ; do
if ! gsutil stat "$rna" &> /dev/null ; then
echo "ERROR: gsutil stat command failed on file $rna"
exit 1
fi
done

# Download the RNA
echo "Downloading RNA:"
mkdir RNA
gcloud storage cp ~{sep=' ' rna_paths} RNA

# Assert that the SB file exists
if ! gsutil stat "~{sb_path}" &> /dev/null ; then
echo "ERROR: gsutil stat command failed on file ~{sb_path}"
exit 1
fi

# Download the SB
echo "Downloading SB:"
mkdir SB
gcloud storage cp ~{sb_path} SB

# Run the script
echo ; echo "Running run-positioning.R"
Rscript run-positioning.R RNA SB output

# Upload the results
ls output/*

if [[ -f output/seurat.qs ]] ; then
echo "true" > DONE
else
echo ; echo "ERROR: CANNOT FIND: seurat.qs"
fi

echo; echo "Writing logs:"
echo; echo "RNA size:"; du -sh RNA
echo; echo "SB size:"; du -sh SB
echo; echo "output size:"; du -sh output
echo; echo "FREE SPACE:"; df -h

echo "tar files/logs"
cat stdout stderr > positioning.log
tar -zcvf output.tar.gz output
echo "<< completed positioning >>"
>>>

output {
File output_file = "output.tar.gz"
File positioning_log = "positioning.log"
}

runtime {
docker: docker
memory: "~{mem_GiB} GB"
disks: "local-disk ~{disk_GiB} SSD"
cpu: nthreads
}

}
9 changes: 4 additions & 5 deletions beta-pipelines/skylab/slidetags/scripts/spatial-count.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ task count {
input {
Array[String] fastq_paths
Array[String] pucks
Int mem_GiB
Int disk_GiB
Int mem_GiB = 64
Int disk_GiB = 128
Int nthreads = 1
String docker
}
command <<<
Expand Down Expand Up @@ -74,7 +75,6 @@ task count {
>>>

output {
Boolean DONE = read_boolean("DONE")
File sb_counts = "SBcounts.h5"
File spatial_log = "spatial-count.log"

Expand All @@ -83,8 +83,7 @@ task count {
docker: docker
memory: "~{mem_GiB} GB"
disks: "local-disk ~{disk_GiB} SSD"
cpu: 1
preemptible: 0
cpu: nthreads
}
}

1 change: 1 addition & 0 deletions pipelines/skylab/atac/atac.changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Updated the ATAC library CSV to be consistent in file naming convention and to have similar case for metric names to the Optimus workflow library CSV
* Added a new metric to the ATAC library CSV to calculate percent_target, which is the number of estimated cells by SnapATAC2 divided by expected_cells input
* Updated the ATAC workflow so that the output fragment file is bgzipped by default
* Updated memory settings for PairedTag; does not impact the ATAC workflow


# 2.3.2
Expand Down
1 change: 1 addition & 0 deletions pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Updated the ATAC library CSV and the Gene Expression library CSV to be consistent in file naming convention and to have similar case for metric names
* Added a new metric to the ATAC library CSV to calculate percent_target, which is the number of estimated cells by SnapATAC2 divided by expected_cells input
* Updated the ATAC workflow so that the output fragment file is bgzipped by default
* Updated memory settings for PairedTag; does not impact the Multiome workflow

# 5.7.1
2024-10-18 (Date of Last Commit)
Expand Down
1 change: 1 addition & 0 deletions pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Updated gex_expected_cells to a required output
* Reformatted the library CSV output filename to remove an extra gex
* Updated the ATAC fragment file output so that it is bgzipped; this does not impact the Optimus workflow
* Updated memory settings for PairedTag; does not impact the Optimus workflow

# 7.7.0
2024-09-24 (Date of Last Commit)
Expand Down
1 change: 1 addition & 0 deletions pipelines/skylab/paired_tag/PairedTag.changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* Updated the ATAC library CSV and the Gene Expression library CSV to be consistent in file naming convention and to have similar case for metric names
* Added a new metric to the ATAC library CSV to calculate percent_target, which is the number of estimated cells by SnapATAC2 divided by expected_cells input
* Updated the ATAC fragment file output so that it is bgzipped
* Updated memory settings for PairedTag Utils

# 1.7.1
2024-10-18 (Date of Last Commit)
Expand Down
1 change: 1 addition & 0 deletions pipelines/skylab/slideseq/SlideSeq.changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

* Updated the h5adUtils WDL to rename the gene expression library CSV filename; this does not impact slideseq
* Updated the ATAC fragment file output so that it is bgzipped; this does not impact the slideseq workflow
* Updated memory settings for PairedTag; does not impact the Slideseq workflow

# 3.4.2
2024-09-24 (Date of Last Commit)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

* Updated the h5adUtils WDL to rename the gene expression library CSV filename; this does not impact slideseq
* Updated the ATAC fragment file output so that it is bgzipped; this does not impact the Multi-snSS2 workflow
* Updated memory settings for PairedTag; does not impact the snSS2 workflow

# 2.0.1
2024-09-24 (Date of Last Commit)
Expand Down
13 changes: 9 additions & 4 deletions tasks/skylab/H5adUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ task JoinMultiomeBarcodes {

Int nthreads = 1
String cpuPlatform = "Intel Cascade Lake"
Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000
Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10
Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 6) + 10000
Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 8) + 10
String docker_path
}
String gex_base_name = basename(gex_h5ad, ".h5ad")
Expand All @@ -255,8 +255,10 @@ task JoinMultiomeBarcodes {
set -e pipefail

# decompress the bgzipped fragment file
echo "Moving fragment file for bgzipping"
mv ~{atac_fragment} ~{atac_fragment_base}.sorted.tsv.gz
echo "Decompressing fragment file"
bgzip -d ~{atac_fragment} > "~{atac_fragment_base}.sorted.tsv"
bgzip -d "~{atac_fragment_base}.sorted.tsv.gz"
echo "Done decompressing"


Expand All @@ -276,12 +278,14 @@ task JoinMultiomeBarcodes {
print("Reading ATAC h5ad:")
print("~{atac_h5ad}")
print("Read ATAC fragment file:")
print("~{atac_fragment}")
print(atac_fragment)
print("Reading Optimus h5ad:")
print("~{gex_h5ad}")
atac_data = ad.read_h5ad("~{atac_h5ad}")
gex_data = ad.read_h5ad("~{gex_h5ad}")
atac_tsv = pd.read_csv(atac_fragment, sep="\t", names=['chr','start', 'stop', 'barcode','n_reads'])
print("Printing ATAC fragment tsv")
print(atac_tsv)
whitelist_gex = pd.read_csv("~{gex_whitelist}", header=None, names=["gex_barcodes"])
whitelist_atac = pd.read_csv("~{atac_whitelist}", header=None, names=["atac_barcodes"])
Expand Down Expand Up @@ -317,6 +321,7 @@ task JoinMultiomeBarcodes {
atac_data.write_h5ad("~{atac_base_name}.h5ad")
df_fragment.to_csv("~{atac_fragment_base}.tsv", sep='\t', index=False, header = False)
CODE
# sorting the file
echo "Sorting file"
sort -k1,1V -k2,2n "~{atac_fragment_base}.tsv" > "~{atac_fragment_base}.sorted.tsv"
Expand Down
8 changes: 5 additions & 3 deletions tasks/skylab/PairedTagUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,13 @@ task ParseBarcodes {
Int nthreads = 1
String cpuPlatform = "Intel Cascade Lake"
String docker_path
Int disk = ceil((size(atac_h5ad, "GiB") + size(atac_fragment, "GiB")) * 8) + 10
Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(atac_fragment, "MiB")) * 6) + 10000
}

String atac_base_name = basename(atac_h5ad, ".h5ad")
String atac_fragment_base = basename(atac_fragment, ".sorted.tsv.gz")

Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000
Int disk = ceil((size(atac_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10

parameter_meta {
atac_h5ad: "The resulting h5ad from the ATAC workflow."
Expand All @@ -222,8 +222,10 @@ task ParseBarcodes {
set -e pipefail

# decompress the bgzipped atac file
echo "Moving fragment tsv for decompression"
mv ~{atac_fragment} ~{atac_fragment_base}.sorted.tsv.gz
echo "Decompressing fragment file"
bgzip -d ~{atac_fragment} > "~{atac_fragment_base}.sorted.tsv"
bgzip -d "~{atac_fragment_base}.sorted.tsv.gz"
echo "Done decompressing"

python3 <<CODE
Expand Down

0 comments on commit cee2af0

Please sign in to comment.