Skip to content

Commit

Permalink
Merge branch 'develop' into aa-optimus-slidetag
Browse files Browse the repository at this point in the history
  • Loading branch information
aawdeh authored Dec 12, 2024
2 parents f9014db + 2fffa87 commit e4d6aee
Show file tree
Hide file tree
Showing 26 changed files with 284 additions and 114 deletions.
14 changes: 7 additions & 7 deletions pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ ExternalExomeReprocessing 3.3.3 2024-11-04
CramToUnmappedBams 1.1.3 2024-08-02
WholeGenomeReprocessing 3.3.3 2024-11-04
ExomeReprocessing 3.3.3 2024-11-04
BuildIndices 3.0.0 2023-12-06
BuildIndices 3.1.0 2024-11-26
scATAC 1.3.2 2023-08-03
snm3C 4.0.4 2024-08-06
Multiome 5.9.2 2024-11-15
PairedTag 1.8.3 2024-11-15
Multiome 5.9.4 2024-12-05
PairedTag 1.9.0 2024-12-05
MultiSampleSmartSeq2 2.2.22 2024-09-11
MultiSampleSmartSeq2SingleNucleus 2.0.5 2024-11-15
Optimus 7.8.3 2024-11-15
atac 2.5.2 2024-11-12
MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15
Optimus 7.9.0 2024-12-05
atac 2.5.3 2024-11-22
SmartSeq2SingleSample 5.1.21 2024-09-11
SlideSeq 3.4.6 2024-11-15
SlideSeq 3.4.7 2024-12-3
5 changes: 5 additions & 0 deletions pipelines/skylab/atac/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.5.3
2024-11-22 (Date of Last Commit)

* Updated the warp-tools docker; this update changes the way gene_names are identified when creating gene expression h5ad files; does not impact ATAC workflow

# 2.5.2
2024-11-12 (Date of Last Commit)

Expand Down
4 changes: 2 additions & 2 deletions pipelines/skylab/atac/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "2.5.2"
String pipeline_version = "2.5.3"

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Docker image names
String warp_tools_2_2_0 = "warp-tools:2.2.0"
String warp_tools_2_2_0 = "warp-tools:2.5.0"
String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919"
String samtools_docker = "samtools-dist-bwa:3.0.0"
String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/build_indices/BuildIndices.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.1.0
2024-11-26 (Date of Last Commit)

* Added metadata.txt file as an output to the pipeline

# 3.0.0
2023-12-06 (Date of Last Commit)

Expand Down
73 changes: 72 additions & 1 deletion pipelines/skylab/build_indices/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BuildIndices {
}

# version of this pipeline
String pipeline_version = "3.0.0"
String pipeline_version = "3.1.0"


parameter_meta {
Expand Down Expand Up @@ -49,12 +49,25 @@ workflow BuildIndices {
organism = organism
}

call RecordMetadata {
input:
pipeline_version = pipeline_version,
input_files = [annotations_gtf, genome_fa, biotypes],
output_files = [
BuildStarSingleNucleus.star_index,
BuildStarSingleNucleus.modified_annotation_gtf,
CalculateChromosomeSizes.chrom_sizes,
BuildBWAreference.reference_bundle
]
}

output {
File snSS2_star_index = BuildStarSingleNucleus.star_index
String pipeline_version_out = "BuildIndices_v~{pipeline_version}"
File snSS2_annotation_gtf_modified = BuildStarSingleNucleus.modified_annotation_gtf
File reference_bundle = BuildBWAreference.reference_bundle
File chromosome_sizes = CalculateChromosomeSizes.chrom_sizes
File metadata = RecordMetadata.metadata_file
}
}
Expand Down Expand Up @@ -195,3 +208,61 @@ String reference_name = "bwa-mem2-2.2.1-~{organism}-~{genome_source}-build-~{gen
}
}


task RecordMetadata {
input {
String pipeline_version
Array[File] input_files
Array[File] output_files
}

command <<<
set -euo pipefail

# create metadata file
echo "Pipeline Version: ~{pipeline_version}" > metadata.txt
echo "Date of Workflow Run: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> metadata.txt
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Input Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " input_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Output Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " output_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# grab workspace bucket
file="~{output_files[0]}"
workspace_bucket=$(echo $file | awk -F'/' '{print $3}')
echo "Workspace Bucket: $workspace_bucket" >> metadata.txt

# grab submission ID
submission_id=$(echo $file | awk -F'/' '{print $5}')
echo "Submission ID: $submission_id" >> metadata.txt

# grab workflow ID
workflow_id=$(echo $file | awk -F'/' '{print $7}')
echo "Workflow ID: $workflow_id" >> metadata.txt

echo "" >> metadata.txt
>>>

output {
File metadata_file = "metadata.txt"
}
runtime {
docker: "ubuntu:20.04"
memory: "5 GiB"
disks: "local-disk 100 HDD"
cpu: "1"
}
}

13 changes: 12 additions & 1 deletion pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# 5.9.4
2024-12-05 (Date of Last Commit)

* Moved the optional CellBender task to the Optimus.wdl

# 5.9.3
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 5.9.2
2024-11-15 (Date of Last Commit)
2024-11-22 (Date of Last Commit)

* Added bam validation in the StarSoloFastq task; this does not affect the outputs of the pipeline
* Updated the warp-tools docker; this update changes the way gene_names are identified when creating gene expression h5ad files

# 5.9.1
2024-11-12 (Date of Last Commit)
Expand Down
57 changes: 11 additions & 46 deletions pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@ version 1.0
import "../../../pipelines/skylab/atac/atac.wdl" as atac
import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "https://raw.githubusercontent.com/aawdeh/CellBender/aa-cbwithoutcuda/wdl/cellbender_remove_background_azure.wdl" as CellBender_no_cuda
import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender
import "../../../tasks/broad/Utilities.wdl" as utils

workflow Multiome {

String pipeline_version = "5.9.2"
String pipeline_version = "5.9.4"

input {
String cloud_provider
Expand Down Expand Up @@ -103,7 +101,8 @@ workflow Multiome {
count_exons = count_exons,
soloMultiMappers = soloMultiMappers,
cloud_provider = cloud_provider,
gex_expected_cells = expected_cells
gex_expected_cells = expected_cells,
run_cellbender = run_cellbender
}

# Call the ATAC workflow
Expand Down Expand Up @@ -134,39 +133,6 @@ workflow Multiome {
atac_fragment = Atac.fragment_file
}

# Call CellBender
if (run_cellbender) {
if (cloud_provider == "gcp") {
call CellBender.run_cellbender_remove_background_gpu as CellBender {
input:
sample_name = input_id,
input_file_unfiltered = Optimus.h5ad_output_file,
hardware_boot_disk_size_GB = 20,
hardware_cpu_count = 4,
hardware_disk_size_GB = 50,
hardware_gpu_type = "nvidia-tesla-t4",
hardware_memory_GB = 32,
hardware_preemptible_tries = 2,
hardware_zones = "us-central1-a us-central1-c",
nvidia_driver_version = "470.82.01"
}
}
if (cloud_provider == "azure") {
call CellBender_no_cuda.run_cellbender_remove_background_gpu as CellBender_no_cuda {
input:
sample_name = input_id,
input_file_unfiltered = Optimus.h5ad_output_file,
hardware_boot_disk_size_GB = 20,
hardware_cpu_count = 4,
hardware_disk_size_GB = 50,
hardware_gpu_type = "nvidia-tesla-t4",
hardware_memory_GB = 32,
hardware_preemptible_tries = 2,
hardware_zones = "us-central1-a us-central1-c",
nvidia_driver_version = "470.82.01"
}
}
}

meta {
allowNestedInputs: true
Expand Down Expand Up @@ -201,15 +167,14 @@ workflow Multiome {
File? gex_aligner_metrics = Optimus.aligner_metrics
File? library_metrics = Optimus.library_metrics
File? mtx_files = Optimus.mtx_files
File? cell_barcodes_csv = Optimus.cell_barcodes_csv
File? checkpoint_file = Optimus.checkpoint_file
Array[File]? h5_array = Optimus.h5_array
Array[File]? html_report_array = Optimus.html_report_array
File? log = Optimus.log
Array[File]? metrics_csv_array = Optimus.metrics_csv_array
String? output_directory = Optimus.output_directory
File? summary_pdf = Optimus.summary_pdf

# cellbender outputs
File? cell_barcodes_csv = CellBender.cell_csv
File? checkpoint_file = CellBender.ckpt_file
Array[File]? h5_array = CellBender.h5_array
Array[File]? html_report_array = CellBender.report_array
File? log = CellBender.log
Array[File]? metrics_csv_array = CellBender.metrics_array
String? output_directory = CellBender.output_dir
File? summary_pdf = CellBender.pdf
}
}
13 changes: 12 additions & 1 deletion pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# 7.9.0
2024-12-05 (Date of Last Commit)

* Added an optional task to the Optimus.wdl that will run CellBender on the Optimus output h5ad file

# 7.8.4
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 7.8.3
2024-11-15 (Date of Last Commit)
2024-11-22 (Date of Last Commit)

* Added bam validation in the StarSoloFastq task; this does not affect the outputs of the pipeline
* Updated the warp-tools docker; this update changes the way gene_names are identified when creating gene expression h5ad files

# 7.8.2
2024-11-12 (Date of Last Commit)
Expand Down
53 changes: 51 additions & 2 deletions pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import "../../../tasks/skylab/CheckInputs.wdl" as OptimusInputChecks
import "../../../tasks/skylab/MergeSortBam.wdl" as Merge
import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "../../../tasks/broad/Utilities.wdl" as utils
import "https://raw.githubusercontent.com/aawdeh/CellBender/aa-cbwithoutcuda/wdl/cellbender_remove_background_azure.wdl" as CellBender_no_cuda
import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender

workflow Optimus {
meta {
Expand Down Expand Up @@ -38,6 +40,9 @@ workflow Optimus {
String? soloMultiMappers = "Uniform"
Int gex_expected_cells = 3000

# CellBender
Boolean run_cellbender = false

# Chemistry options include: 2 or 3
Int tenx_chemistry_version
# Whitelist is selected based on the input tenx_chemistry_version
Expand Down Expand Up @@ -75,7 +80,7 @@ workflow Optimus {
}

# version of this pipeline
String pipeline_version = "7.8.3"
String pipeline_version = "7.9.0"

# this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
Array[Int] indices = range(length(r1_fastq))
Expand All @@ -94,7 +99,7 @@ workflow Optimus {
String pytools_docker = "pytools:1.0.0-1661263730"
String empty_drops_docker = "empty-drops:1.0.1-4.2"
String star_docker = "star:1.0.1-2.7.11a-1692706072"
String warp_tools_docker_2_2_0 = "warp-tools:2.4.0"
String warp_tools_docker_2_2_0 = "warp-tools:2.5.0"
String star_merge_docker = "star-merge-npz:1.3.0"
String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196"

Expand Down Expand Up @@ -304,6 +309,40 @@ workflow Optimus {
}
}

# Call CellBender
if (run_cellbender) {
if (cloud_provider == "gcp") {
call CellBender.run_cellbender_remove_background_gpu as CellBender {
input:
sample_name = input_id,
input_file_unfiltered = final_h5ad_output,
hardware_boot_disk_size_GB = 20,
hardware_cpu_count = 4,
hardware_disk_size_GB = 50,
hardware_gpu_type = "nvidia-tesla-t4",
hardware_memory_GB = 32,
hardware_preemptible_tries = 2,
hardware_zones = "us-central1-a us-central1-c",
nvidia_driver_version = "470.82.01"
}
}
if (cloud_provider == "azure") {
call CellBender_no_cuda.run_cellbender_remove_background_gpu as CellBender_no_cuda {
input:
sample_name = input_id,
input_file_unfiltered = final_h5ad_output,
hardware_boot_disk_size_GB = 20,
hardware_cpu_count = 4,
hardware_disk_size_GB = 50,
hardware_gpu_type = "nvidia-tesla-t4",
hardware_memory_GB = 32,
hardware_preemptible_tries = 2,
hardware_zones = "us-central1-a us-central1-c",
nvidia_driver_version = "470.82.01"
}
}
}

File final_h5ad_output = select_first([OptimusH5adGenerationWithExons.h5ad_output, OptimusH5adGeneration.h5ad_output])
File final_library_metrics = select_first([OptimusH5adGenerationWithExons.library_metrics, OptimusH5adGeneration.library_metrics])

Expand Down Expand Up @@ -332,5 +371,15 @@ workflow Optimus {

# h5ad
File h5ad_output_file = final_h5ad_output

# cellbender outputs
File? cell_barcodes_csv = CellBender.cell_csv
File? checkpoint_file = CellBender.ckpt_file
Array[File]? h5_array = CellBender.h5_array
Array[File]? html_report_array = CellBender.report_array
File? log = CellBender.log
Array[File]? metrics_csv_array = CellBender.metrics_array
String? output_directory = CellBender.output_dir
File? summary_pdf = CellBender.pdf
}
}
13 changes: 12 additions & 1 deletion pipelines/skylab/paired_tag/PairedTag.changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# 1.9.0
2024-12-05 (Date of Last Commit)

* Added an optional task to the Optimus.wdl that will run CellBender on the Optimus output h5ad file

# 1.8.4
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 1.8.3
2024-11-15 (Date of Last Commit)
2024-11-22 (Date of Last Commit)

* Added bam validation in the StarSoloFastq task; this does not affect the outputs of the pipeline
* Updated the warp-tools docker; this update changes the way gene_names are identified when creating gene expression h5ad files

# 1.8.2
2024-11-12 (Date of Last Commit)
Expand Down
Loading

0 comments on commit e4d6aee

Please sign in to comment.