broadinstitute · ekiernan · Oct 21, 2024 · Oct 15, 2024 · Oct 18, 2024 · Oct 18, 2024
diff --git a/.dockstore.yml b/.dockstore.yml
@@ -130,3 +130,7 @@ workflows:
   - name: VariantCalling
     subclass: WDL
     primaryDescriptorPath: /pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl
+
+  - name: SlideTags
+    subclass: WDL
+    primaryDescriptorPath: /beta-pipelines/skylab/slidetags/SlideTags.wdl
diff --git a/beta-pipelines/skylab/slidetags/SlideTags.wdl b/beta-pipelines/skylab/slidetags/SlideTags.wdl
@@ -0,0 +1,36 @@
+version 1.0
+
+import "scripts/spatial-count.wdl" as SpatialCount
+
+workflow SlideTags {
+
+    String pipeline_version = "1.0.0"
+
+    input {
+        String id
+        Array[String] fastq_paths
+        Array[String] pucks
+        Int mem_GiB = 64
+        Int disk_GiB = 128
+        String docker = "us.gcr.io/broad-gotc-prod/slide-tags:1.0.0"
+     }
+
+    parameter_meta {
+        fastq_paths: "Array of paths to spatial fastq files"
+        pucks: "Array of paths to puck files"
+        mem_GiB: "Memory in GiB to allocate to the task"
+        disk_GiB: "Disk in GiB to allocate to the task"
+        docker: "Docker image to use"
+    }
+
+    call SpatialCount.count as spatial_count {
+        input:
+            fastq_paths = fastq_paths,
+            pucks = pucks,
+            mem_GiB = mem_GiB,
+            disk_GiB = disk_GiB,
+            docker = docker
+     }
+
+}
+
diff --git a/beta-pipelines/skylab/slidetags/scripts/CREDITS.md b/beta-pipelines/skylab/slidetags/scripts/CREDITS.md
@@ -0,0 +1,11 @@
+# Credits
+
+This project uses code from the following sources:
+
+- **Spatial Count Workflow**  
+  URL: [https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/spatial-count/spatial-count.wdl](https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/spatial-count/spatial-count.wdl)  
+  This code was adapted and modified from the Macosko Lab's pipeline repository.
+
+Additional modifications include output handling and script download changes.
+
+Please refer to the original source for the full context of the workflow.
diff --git a/beta-pipelines/skylab/slidetags/scripts/spatial-count.wdl b/beta-pipelines/skylab/slidetags/scripts/spatial-count.wdl
@@ -0,0 +1,90 @@
+version 1.0
+
+task count {
+  input {
+    Array[String] fastq_paths
+    Array[String] pucks
+    Int mem_GiB
+    Int disk_GiB
+    String docker
+  }
+  command <<<
+    set -euo pipefail
+    set -x
+
+    echo "<< starting spatial-count >>"
+
+    gcloud config set storage/process_count 16
+    gcloud config set storage/thread_count  2
+
+    # Download the script -- put this script into a docker
+    wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/5c74e9e6148102081827625b9ce91ec2b7ba3541/spatial-count/spatial-count.jl
+
+    echo "FASTQs: ~{length(fastq_paths)} paths provided"
+    echo "Pucks: ~{length(pucks)} puck(s) provided"
+
+    # Assert that the fastqs exist
+    fastqs=(~{sep=' ' fastq_paths})
+    for fastq in "${fastqs[@]}" ; do
+        if ! gsutil stat "$fastq" &> /dev/null ; then
+            echo "ERROR: gsutil stat command failed on fastq $fastq"
+            exit 1
+        fi
+    done
+
+    # Download the fastqs
+    echo "Downloading fastqs:"
+    mkdir fastqs
+    gcloud storage cp ~{sep=' ' fastq_paths} fastqs
+
+    # Assert that the pucks exist
+    pucks=(~{sep=' ' pucks})
+    for puck in "${pucks[@]}" ; do
+        if ! gsutil stat "$puck" &> /dev/null ; then
+            echo "ERROR: gsutil stat command failed on puck $puck"
+            exit 1
+        fi
+    done
+
+    # Download the pucks
+    echo "Downloading pucks:"
+    mkdir pucks
+    gcloud storage cp ~{sep=' ' pucks} pucks
+
+    # Run the script
+    echo ; echo "Running spatial-count.jl"
+    ## julia --threads=4 /spatial-count.jl fastqs pucks .
+    julia --threads=4 spatial-count.jl fastqs pucks .
+
+    if [[ -f SBcounts.h5 ]] ; then
+        echo ; echo "Success, uploading counts"
+        echo "true" > DONE
+    else
+        echo ; echo "ERROR: CANNOT FIND: SBcounts.h5"
+    fi
+
+    echo; echo "Writing logs:"
+    echo; echo "fastqs size:"; du -sh fastqs
+    echo; echo "pucks size:"; du -sh pucks
+    echo; echo "output size:"; du -sh SBcounts.h5
+    echo; echo "FREE SPACE:"; df -h
+
+    cat stdout stderr > spatial-count.log
+    echo "<< completed spatial-count >>"
+  >>>
+
+  output {
+    Boolean DONE = read_boolean("DONE")
+    File sb_counts = "SBcounts.h5"
+    File spatial_log = "spatial-count.log"
+
+  }
+  runtime {
+    docker: docker
+    memory: "~{mem_GiB} GB"
+    disks: "local-disk ~{disk_GiB} SSD"
+    cpu: 1
+    preemptible: 0
+  }
+}
+
diff --git a/...ecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md b/...ecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md
@@ -0,0 +1,12 @@
+# 1.0.1
+2024-10-16 (Date of Last Commit)
+
+* The CEMBA workflow is deprecated and is no longer supported. However, the CEMBA documentation is still available. See [CEMBA Pipeline Overview](https://broadinstitute.github.io/warp/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README) on the [WARP documentation site](https://broadinstitute.github.io/warp/)!
+
+# 1.0.0
+2020-11-15 (Date of Last Commit)
+
+###Initial release of BuildCembareferences Pipeline
+Builds genomic reference files for [CEMBA pipeline](https://broadinstitute.github.io/warp/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README)
+
+* Added version number to the BuildCembaReferences workflow
diff --git a/...erences/BuildCembaReferences.options.json → ...erences/BuildCembaReferences.options.json b/...erences/BuildCembaReferences.options.json → ...erences/BuildCembaReferences.options.json
diff --git a/...cemba_references/BuildCembaReferences.wdl → ...cemba_references/BuildCembaReferences.wdl b/...cemba_references/BuildCembaReferences.wdl → ...cemba_references/BuildCembaReferences.wdl
diff --git a/..._references/build_bisulfite_references.py → ..._references/build_bisulfite_references.py b/..._references/build_bisulfite_references.py → ..._references/build_bisulfite_references.py
diff --git a/...s/BuildCembaReferences.GRCh38.inputs.json → ...s/BuildCembaReferences.GRCh38.inputs.json b/...s/BuildCembaReferences.GRCh38.inputs.json → ...s/BuildCembaReferences.GRCh38.inputs.json
diff --git a/...s/BuildCembaReferences.GRCm38.inputs.json → ...s/BuildCembaReferences.GRCm38.inputs.json b/...s/BuildCembaReferences.GRCm38.inputs.json → ...s/BuildCembaReferences.GRCm38.inputs.json
diff --git a/...cemba/cemba_methylcseq/CEMBA.changelog.md → ...cemba/cemba_methylcseq/CEMBA.changelog.md b/...cemba/cemba_methylcseq/CEMBA.changelog.md → ...cemba/cemba_methylcseq/CEMBA.changelog.md
@@ -1,3 +1,8 @@
+# 1.1.8
+2024-10-16 (Date of Last Commit)
+
+* The CEMBA workflow is deprecated and is no longer supported. However, the CEMBA documentation is still available. See [CEMBA Pipeline Overview](https://broadinstitute.github.io/warp/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README) on the [WARP documentation site](https://broadinstitute.github.io/warp/)!
+
 # 1.1.7
 2024-09-06 (Date of Last Commit)
 

diff --git a/...s/cemba/cemba_methylcseq/CEMBA.methods.md → ...s/cemba/cemba_methylcseq/CEMBA.methods.md b/...s/cemba/cemba_methylcseq/CEMBA.methods.md → ...s/cemba/cemba_methylcseq/CEMBA.methods.md
diff --git a/pipelines/cemba/cemba_methylcseq/CEMBA.png → ...ipelines/cemba/cemba_methylcseq/CEMBA.png b/pipelines/cemba/cemba_methylcseq/CEMBA.png → ...ipelines/cemba/cemba_methylcseq/CEMBA.png
diff --git a/pipelines/cemba/cemba_methylcseq/CEMBA.wdl → ...ipelines/cemba/cemba_methylcseq/CEMBA.wdl b/pipelines/cemba/cemba_methylcseq/CEMBA.wdl → ...ipelines/cemba/cemba_methylcseq/CEMBA.wdl
@@ -57,7 +57,7 @@ workflow CEMBA {
     }
 
     # version of this pipeline
-    String pipeline_version = "1.1.7"
+    String pipeline_version = "1.1.8"
 
   # trim off hardcoded sequence adapters
   call Trim as TrimAdapters {

diff --git a/pipelines/cemba/cemba_methylcseq/README.md → ...ipelines/cemba/cemba_methylcseq/README.md b/pipelines/cemba/cemba_methylcseq/README.md → ...ipelines/cemba/cemba_methylcseq/README.md
diff --git a/...thylcseq/example_inputs/CEMBA.inputs.json → ...thylcseq/example_inputs/CEMBA.inputs.json b/...thylcseq/example_inputs/CEMBA.inputs.json → ...thylcseq/example_inputs/CEMBA.inputs.json
diff --git a/...hylcseq/example_inputs/CEMBA.options.json → ...hylcseq/example_inputs/CEMBA.options.json b/...hylcseq/example_inputs/CEMBA.options.json → ...hylcseq/example_inputs/CEMBA.options.json
diff --git a/pipeline_versions.txt b/pipeline_versions.txt
@@ -30,13 +30,11 @@ ExomeReprocessing	 3.3.1	2024-09-17
 BuildIndices	 3.0.0	2023-12-06 
 scATAC	 1.3.2	2023-08-03 
 snm3C	 4.0.4	2024-08-06 
-Multiome	 5.7.0	2024-09-24 
-PairedTag	 1.7.0	2024-09-24 
+Multiome	 5.7.1	2024-10-18 
+PairedTag	 1.7.1	2024-10-18 
 MultiSampleSmartSeq2	 2.2.22	2024-09-11 
 MultiSampleSmartSeq2SingleNucleus	 2.0.1	2024-09-24 
 Optimus	 7.7.0	2024-09-24 
-atac	 2.3.1	2024-09-11 
+atac	 2.3.2	2024-10-18 
 SmartSeq2SingleSample	 5.1.21	2024-09-11 
 SlideSeq	 3.4.2	2024-09-24 
-BuildCembaReferences	 1.0.0	2020-11-15 
-CEMBA	 1.1.7	2024-09-06 
diff --git a/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md b/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md
diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md
@@ -1,3 +1,8 @@
+# 2.3.2
+2024-10-18 (Date of Last Commit)
+
+* Removed the underscore of the NHashID in the ATAC library metrics CSV
+
 # 2.3.1
 2024-09-11 (Date of Last Commit)
 

diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl
@@ -46,7 +46,7 @@ workflow ATAC {
     String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
   }
 
-  String pipeline_version = "2.3.1"
+  String pipeline_version = "2.3.2"
 
   # Determine docker prefix based on cloud provider
   String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
@@ -559,8 +559,7 @@ task CreateFragmentFile {
       data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None)
 
     # Add NHashID to metrics 
-    nhash_ID_value = "XXX"
-    data = OrderedDict({'NHash_ID': atac_nhash_id, **data})
+    data = OrderedDict({'NHashID': atac_nhash_id, **data})
     # Flatten the dictionary
     flattened_data = []
     for category, metrics in data.items():

diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md
@@ -1,3 +1,8 @@
+# 5.7.1
+2024-10-18 (Date of Last Commit)
+
+* Removed the underscore of the NHashID in the ATAC library metrics CSV to match the gene expression library metrics
+
 # 5.7.0
 2024-09-24 (Date of Last Commit)
 * Added a python implementation of DoubletFinder to calculate doublet scores in gene expression data; percent doublets are now available as a library-level metric and individual doublet scores for cell barcodes are in the h5ad

diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl
@@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow Multiome {
 
-    String pipeline_version = "5.7.0"
+    String pipeline_version = "5.7.1"
 
 
     input {

diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md
@@ -1,3 +1,8 @@
+# 1.7.1
+2024-10-18 (Date of Last Commit)
+
+* Removed the underscore of the NHashID in the ATAC library metrics CSV
+
 # 1.7.0
 2024-09-24 (Date of Last Commit)
 * Added a python implementation of DoubletFinder to calculate doublet scores in gene expression data; percent doublets are now available as a library-level metric and individual doublet scores for cell barcodes are in the h5ad

diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl
@@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils
 
 workflow PairedTag {
 
-    String pipeline_version = "1.7.0"
+    String pipeline_version = "1.7.1"
 
 
     input {

diff --git a/pull_request_template.md b/pull_request_template.md
@@ -9,7 +9,7 @@ _You can delete these instructions once you have written your PR description._
 ----
 
 ### Checklist 
-If you can answer "yes" to the following items, please add a checkmark next to the appropriate checklist item(s) **and** notify our WARP documentation team by tagging either @ekiernan or @kayleemathews in a comment on this PR.
+If you can answer "yes" to the following items, please add a checkmark next to the appropriate checklist item(s) **and** notify our WARP team by tagging @broadinstitute/warp-admins in a comment on this PR.
 
 - [ ] Did you add inputs, outputs, or tasks to a workflow?
 - [ ] Did you modify, delete or move: file paths, file names, input names, output names, or task names?

diff --git a/verification/test-wdls/scripts/requirements.txt b/verification/test-wdls/scripts/requirements.txt
@@ -1 +1 @@
-jinja2==3.1.2
+jinja2==3.1.3
diff --git a/website/docs/Deprecated_Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md b/website/docs/Deprecated_Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md
@@ -5,9 +5,9 @@ slug: /Pipelines/CEMBA_MethylC_Seq_Pipeline/README
 # CEMBA Overview
 
 :::warning
-9/12/2024
+10/16/2024
 
-We are deprecating the CEMBA pipeline. Although the code will continue to be available, we are no longer supporting it. A possible alternative is the [Single-nucleus Methyl-seq and Chromatin Capture](../../Pipelines/snM3C/README.md) workflow.
+The CEMBA pipeline has been officially deprecated, with support ending on October 16, 2024. Users currently utilizing this pipeline are advised to transition to other options. A recommended alternative is the [Single-nucleus Methyl-seq and Chromatin Capture](../../Pipelines/snM3C/README.md) workflow, which offers similar capabilities for methylation and chromatin analysis.
 :::
 
 | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback |

diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md
@@ -260,7 +260,7 @@ The following table lists the output files produced from the pipeline. For sampl
 | matrix_col_index | `<input_id>_sparse_counts_col_index.npy` | Index of genes in count matrix. | NPY |
 | cell_metrics | `<input_id>.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV |
 | gene_metrics | `<input_id>.gene-metrics.csv.gz` |  Matrix of metrics by genes. | Compressed CSV |
-| aligner_metrics | `<input_id>.star_metrics.tar` | Tarred metrics files produced by the STARsolo aligner; contains align features, cell reads, summary, and UMI per cell metrics files. | TXT |
+| aligner_metrics | `<input_id>.star_metrics.tar` | Tarred metrics files produced by the STARsolo aligner; contains align features, cell reads, summary, and UMI per cell metrics files. See the [STARsolo metrics](./starsolo-metrics.md) for more information about these files. | TXT |
 | library_metrics | `<input_id>_<gex_nash_id>_library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. See the [Library-level metrics](./Library-metrics.md) for how metrics are calculated. | CSV |
 | multimappers_EM_matrix | `UniqueAndMult-EM.mtx` | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX |
 | multimappers_Uniform_matrix | `UniqueAndMult-Uniform.mtx` | Optional output produced when `soloMultiMappers` is "Uniform"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX |
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils @@
     workflow Multiome {
-        String pipeline_version = "5.7.0"
+        String pipeline_version = "5.7.1"
         input {
@@ Expand Down @@