diff --git a/.dockstore.yml b/.dockstore.yml index d7bf17f613..366840f7d4 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -130,3 +130,7 @@ workflows: - name: VariantCalling subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl + + - name: SlideTags + subclass: WDL + primaryDescriptorPath: /beta-pipelines/skylab/slidetags/SlideTags.wdl diff --git a/beta-pipelines/skylab/slidetags/SlideTags.wdl b/beta-pipelines/skylab/slidetags/SlideTags.wdl new file mode 100644 index 0000000000..976133327a --- /dev/null +++ b/beta-pipelines/skylab/slidetags/SlideTags.wdl @@ -0,0 +1,36 @@ +version 1.0 + +import "scripts/spatial-count.wdl" as SpatialCount + +workflow SlideTags { + + String pipeline_version = "1.0.0" + + input { + String id + Array[String] fastq_paths + Array[String] pucks + Int mem_GiB = 64 + Int disk_GiB = 128 + String docker = "us.gcr.io/broad-gotc-prod/slide-tags:1.0.0" + } + + parameter_meta { + fastq_paths: "Array of paths to spatial fastq files" + pucks: "Array of paths to puck files" + mem_GiB: "Memory in GiB to allocate to the task" + disk_GiB: "Disk in GiB to allocate to the task" + docker: "Docker image to use" + } + + call SpatialCount.count as spatial_count { + input: + fastq_paths = fastq_paths, + pucks = pucks, + mem_GiB = mem_GiB, + disk_GiB = disk_GiB, + docker = docker + } + +} + diff --git a/beta-pipelines/skylab/slidetags/scripts/CREDITS.md b/beta-pipelines/skylab/slidetags/scripts/CREDITS.md new file mode 100644 index 0000000000..920e7cee08 --- /dev/null +++ b/beta-pipelines/skylab/slidetags/scripts/CREDITS.md @@ -0,0 +1,11 @@ +# Credits + +This project uses code from the following sources: + +- **Spatial Count Workflow** + URL: [https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/spatial-count/spatial-count.wdl](https://github.com/MacoskoLab/Macosko-Pipelines/blob/main/spatial-count/spatial-count.wdl) + This code was adapted and modified from the Macosko Lab's pipeline repository. + +Additional modifications include output handling and script download changes. + +Please refer to the original source for the full context of the workflow. diff --git a/beta-pipelines/skylab/slidetags/scripts/spatial-count.wdl b/beta-pipelines/skylab/slidetags/scripts/spatial-count.wdl new file mode 100644 index 0000000000..481da62fec --- /dev/null +++ b/beta-pipelines/skylab/slidetags/scripts/spatial-count.wdl @@ -0,0 +1,90 @@ +version 1.0 + +task count { + input { + Array[String] fastq_paths + Array[String] pucks + Int mem_GiB + Int disk_GiB + String docker + } + command <<< + set -euo pipefail + set -x + + echo "<< starting spatial-count >>" + + gcloud config set storage/process_count 16 + gcloud config set storage/thread_count 2 + + # Download the script -- put this script into a docker + wget https://raw.githubusercontent.com/MacoskoLab/Macosko-Pipelines/5c74e9e6148102081827625b9ce91ec2b7ba3541/spatial-count/spatial-count.jl + + echo "FASTQs: ~{length(fastq_paths)} paths provided" + echo "Pucks: ~{length(pucks)} puck(s) provided" + + # Assert that the fastqs exist + fastqs=(~{sep=' ' fastq_paths}) + for fastq in "${fastqs[@]}" ; do + if ! gsutil stat "$fastq" &> /dev/null ; then + echo "ERROR: gsutil stat command failed on fastq $fastq" + exit 1 + fi + done + + # Download the fastqs + echo "Downloading fastqs:" + mkdir fastqs + gcloud storage cp ~{sep=' ' fastq_paths} fastqs + + # Assert that the pucks exist + pucks=(~{sep=' ' pucks}) + for puck in "${pucks[@]}" ; do + if ! gsutil stat "$puck" &> /dev/null ; then + echo "ERROR: gsutil stat command failed on puck $puck" + exit 1 + fi + done + + # Download the pucks + echo "Downloading pucks:" + mkdir pucks + gcloud storage cp ~{sep=' ' pucks} pucks + + # Run the script + echo ; echo "Running spatial-count.jl" + ## julia --threads=4 /spatial-count.jl fastqs pucks . + julia --threads=4 spatial-count.jl fastqs pucks . + + if [[ -f SBcounts.h5 ]] ; then + echo ; echo "Success, uploading counts" + echo "true" > DONE + else + echo ; echo "ERROR: CANNOT FIND: SBcounts.h5" + fi + + echo; echo "Writing logs:" + echo; echo "fastqs size:"; du -sh fastqs + echo; echo "pucks size:"; du -sh pucks + echo; echo "output size:"; du -sh SBcounts.h5 + echo; echo "FREE SPACE:"; df -h + + cat stdout stderr > spatial-count.log + echo "<< completed spatial-count >>" + >>> + + output { + Boolean DONE = read_boolean("DONE") + File sb_counts = "SBcounts.h5" + File spatial_log = "spatial-count.log" + + } + runtime { + docker: docker + memory: "~{mem_GiB} GB" + disks: "local-disk ~{disk_GiB} SSD" + cpu: 1 + preemptible: 0 + } +} + diff --git a/deprecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md b/deprecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md new file mode 100644 index 0000000000..bfa1bee7dc --- /dev/null +++ b/deprecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md @@ -0,0 +1,12 @@ +# 1.0.1 +2024-10-16 (Date of Last Commit) + +* The CEMBA workflow is deprecated and is no longer supported. However, the CEMBA documentation is still available. See [CEMBA Pipeline Overview](https://broadinstitute.github.io/warp/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README) on the [WARP documentation site](https://broadinstitute.github.io/warp/)! + +# 1.0.0 +2020-11-15 (Date of Last Commit) + +###Initial release of BuildCembareferences Pipeline +Builds genomic reference files for [CEMBA pipeline](https://broadinstitute.github.io/warp/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README) + +* Added version number to the BuildCembaReferences workflow diff --git a/pipelines/cemba/build_cemba_references/BuildCembaReferences.options.json b/deprecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.options.json similarity index 100% rename from pipelines/cemba/build_cemba_references/BuildCembaReferences.options.json rename to deprecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.options.json diff --git a/pipelines/cemba/build_cemba_references/BuildCembaReferences.wdl b/deprecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.wdl similarity index 100% rename from pipelines/cemba/build_cemba_references/BuildCembaReferences.wdl rename to deprecated/pipelines/cemba/build_cemba_references/BuildCembaReferences.wdl diff --git a/pipelines/cemba/build_cemba_references/build_bisulfite_references.py b/deprecated/pipelines/cemba/build_cemba_references/build_bisulfite_references.py similarity index 100% rename from pipelines/cemba/build_cemba_references/build_bisulfite_references.py rename to deprecated/pipelines/cemba/build_cemba_references/build_bisulfite_references.py diff --git a/pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCh38.inputs.json b/deprecated/pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCh38.inputs.json similarity index 100% rename from pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCh38.inputs.json rename to deprecated/pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCh38.inputs.json diff --git a/pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCm38.inputs.json b/deprecated/pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCm38.inputs.json similarity index 100% rename from pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCm38.inputs.json rename to deprecated/pipelines/cemba/build_cemba_references/input_files/BuildCembaReferences.GRCm38.inputs.json diff --git a/pipelines/cemba/cemba_methylcseq/CEMBA.changelog.md b/deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.changelog.md similarity index 69% rename from pipelines/cemba/cemba_methylcseq/CEMBA.changelog.md rename to deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.changelog.md index 0a7178369f..9dc8a0f319 100644 --- a/pipelines/cemba/cemba_methylcseq/CEMBA.changelog.md +++ b/deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.changelog.md @@ -1,3 +1,8 @@ +# 1.1.8 +2024-10-16 (Date of Last Commit) + +* The CEMBA workflow is deprecated and is no longer supported. However, the CEMBA documentation is still available. See [CEMBA Pipeline Overview](https://broadinstitute.github.io/warp/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README) on the [WARP documentation site](https://broadinstitute.github.io/warp/)! + # 1.1.7 2024-09-06 (Date of Last Commit) diff --git a/pipelines/cemba/cemba_methylcseq/CEMBA.methods.md b/deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.methods.md similarity index 100% rename from pipelines/cemba/cemba_methylcseq/CEMBA.methods.md rename to deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.methods.md diff --git a/pipelines/cemba/cemba_methylcseq/CEMBA.png b/deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.png similarity index 100% rename from pipelines/cemba/cemba_methylcseq/CEMBA.png rename to deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.png diff --git a/pipelines/cemba/cemba_methylcseq/CEMBA.wdl b/deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.wdl similarity index 99% rename from pipelines/cemba/cemba_methylcseq/CEMBA.wdl rename to deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.wdl index 1779c89194..3c23723a02 100644 --- a/pipelines/cemba/cemba_methylcseq/CEMBA.wdl +++ b/deprecated/pipelines/cemba/cemba_methylcseq/CEMBA.wdl @@ -57,7 +57,7 @@ workflow CEMBA { } # version of this pipeline - String pipeline_version = "1.1.7" + String pipeline_version = "1.1.8" # trim off hardcoded sequence adapters call Trim as TrimAdapters { diff --git a/pipelines/cemba/cemba_methylcseq/README.md b/deprecated/pipelines/cemba/cemba_methylcseq/README.md similarity index 100% rename from pipelines/cemba/cemba_methylcseq/README.md rename to deprecated/pipelines/cemba/cemba_methylcseq/README.md diff --git a/pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.inputs.json b/deprecated/pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.inputs.json similarity index 100% rename from pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.inputs.json rename to deprecated/pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.inputs.json diff --git a/pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.options.json b/deprecated/pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.options.json similarity index 100% rename from pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.options.json rename to deprecated/pipelines/cemba/cemba_methylcseq/example_inputs/CEMBA.options.json diff --git a/pipeline_versions.txt b/pipeline_versions.txt index 58e9900074..66a514f5a9 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -30,13 +30,11 @@ ExomeReprocessing 3.3.1 2024-09-17 BuildIndices 3.0.0 2023-12-06 scATAC 1.3.2 2023-08-03 snm3C 4.0.4 2024-08-06 -Multiome 5.7.0 2024-09-24 -PairedTag 1.7.0 2024-09-24 +Multiome 5.7.1 2024-10-18 +PairedTag 1.7.1 2024-10-18 MultiSampleSmartSeq2 2.2.22 2024-09-11 MultiSampleSmartSeq2SingleNucleus 2.0.1 2024-09-24 Optimus 7.7.0 2024-09-24 -atac 2.3.1 2024-09-11 +atac 2.3.2 2024-10-18 SmartSeq2SingleSample 5.1.21 2024-09-11 SlideSeq 3.4.2 2024-09-24 -BuildCembaReferences 1.0.0 2020-11-15 -CEMBA 1.1.7 2024-09-06 diff --git a/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md b/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md deleted file mode 100644 index 7120bb57d4..0000000000 --- a/pipelines/cemba/build_cemba_references/BuildCembaReferences.changelog.md +++ /dev/null @@ -1,7 +0,0 @@ -# 1.0.0 -2020-11-15 (Date of Last Commit) - -###Initial release of BuildCembareferences Pipeline -Builds genomic reference files for [CEMBA pipeline](https://broadinstitute.github.io/warp/docs/Pipelines/CEMBA_MethylC_Seq_Pipeline/README) - -* Added version number to the BuildCembaReferences workflow diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index d64620354a..34b5704e59 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,3 +1,8 @@ +# 2.3.2 +2024-10-18 (Date of Last Commit) + +* Removed the underscore of the NHashID in the ATAC library metrics CSV + # 2.3.1 2024-09-11 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 8918a8d8ad..2acb133c2b 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -46,7 +46,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.3.1" + String pipeline_version = "2.3.2" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" @@ -559,8 +559,7 @@ task CreateFragmentFile { data = pp.recipe_10x_metrics("~{bam}", "~{bam_base_name}.fragments.tsv", "temp_metrics.h5ad", is_paired=True, barcode_tag="CB", chrom_sizes=chrom_size_dict, gene_anno=atac_gtf, peaks=None) # Add NHashID to metrics - nhash_ID_value = "XXX" - data = OrderedDict({'NHash_ID': atac_nhash_id, **data}) + data = OrderedDict({'NHashID': atac_nhash_id, **data}) # Flatten the dictionary flattened_data = [] for category, metrics in data.items(): diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index 40e399d729..378678f9ba 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,8 @@ +# 5.7.1 +2024-10-18 (Date of Last Commit) + +* Removed the underscore of the NHashID in the ATAC library metrics CSV to match the gene expression library metrics + # 5.7.0 2024-09-24 (Date of Last Commit) * Added a python implementation of DoubletFinder to calculate doublet scores in gene expression data; percent doublets are now available as a library-level metric and individual doublet scores for cell barcodes are in the h5ad diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index ce14e6c476..821e5bead6 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow Multiome { - String pipeline_version = "5.7.0" + String pipeline_version = "5.7.1" input { diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index ab5104b3b8..f6ce64b4ca 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,8 @@ +# 1.7.1 +2024-10-18 (Date of Last Commit) + +* Removed the underscore of the NHashID in the ATAC library metrics CSV + # 1.7.0 2024-09-24 (Date of Last Commit) * Added a python implementation of DoubletFinder to calculate doublet scores in gene expression data; percent doublets are now available as a library-level metric and individual doublet scores for cell barcodes are in the h5ad diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index 938acb24b9..83b470ba47 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow PairedTag { - String pipeline_version = "1.7.0" + String pipeline_version = "1.7.1" input { diff --git a/pull_request_template.md b/pull_request_template.md index cb89332bc9..59feb86a68 100644 --- a/pull_request_template.md +++ b/pull_request_template.md @@ -9,7 +9,7 @@ _You can delete these instructions once you have written your PR description._ ---- ### Checklist -If you can answer "yes" to the following items, please add a checkmark next to the appropriate checklist item(s) **and** notify our WARP documentation team by tagging either @ekiernan or @kayleemathews in a comment on this PR. +If you can answer "yes" to the following items, please add a checkmark next to the appropriate checklist item(s) **and** notify our WARP team by tagging @broadinstitute/warp-admins in a comment on this PR. - [ ] Did you add inputs, outputs, or tasks to a workflow? - [ ] Did you modify, delete or move: file paths, file names, input names, output names, or task names? diff --git a/verification/test-wdls/scripts/requirements.txt b/verification/test-wdls/scripts/requirements.txt index 58bc30385a..de9e3e0771 100644 --- a/verification/test-wdls/scripts/requirements.txt +++ b/verification/test-wdls/scripts/requirements.txt @@ -1 +1 @@ -jinja2==3.1.2 \ No newline at end of file +jinja2==3.1.3 \ No newline at end of file diff --git a/website/docs/Deprecated_Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md b/website/docs/Deprecated_Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md index af41088ee8..0938488887 100644 --- a/website/docs/Deprecated_Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md +++ b/website/docs/Deprecated_Pipelines/CEMBA_MethylC_Seq_Pipeline/README.md @@ -5,9 +5,9 @@ slug: /Pipelines/CEMBA_MethylC_Seq_Pipeline/README # CEMBA Overview :::warning -9/12/2024 +10/16/2024 -We are deprecating the CEMBA pipeline. Although the code will continue to be available, we are no longer supporting it. A possible alternative is the [Single-nucleus Methyl-seq and Chromatin Capture](../../Pipelines/snM3C/README.md) workflow. +The CEMBA pipeline has been officially deprecated, with support ending on October 16, 2024. Users currently utilizing this pipeline are advised to transition to other options. A recommended alternative is the [Single-nucleus Methyl-seq and Chromatin Capture](../../Pipelines/snM3C/README.md) workflow, which offers similar capabilities for methylation and chromatin analysis. ::: | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 6bbc24f6b7..9c1395ed4e 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -260,7 +260,7 @@ The following table lists the output files produced from the pipeline. For sampl | matrix_col_index | `_sparse_counts_col_index.npy` | Index of genes in count matrix. | NPY | | cell_metrics | `.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV | | gene_metrics | `.gene-metrics.csv.gz` | Matrix of metrics by genes. | Compressed CSV | -| aligner_metrics | `.star_metrics.tar` | Tarred metrics files produced by the STARsolo aligner; contains align features, cell reads, summary, and UMI per cell metrics files. | TXT | +| aligner_metrics | `.star_metrics.tar` | Tarred metrics files produced by the STARsolo aligner; contains align features, cell reads, summary, and UMI per cell metrics files. See the [STARsolo metrics](./starsolo-metrics.md) for more information about these files. | TXT | | library_metrics | `__library_metrics.csv` | Optional CSV file containing all library-level metrics calculated with STARsolo for gene expression data. See the [Library-level metrics](./Library-metrics.md) for how metrics are calculated. | CSV | | multimappers_EM_matrix | `UniqueAndMult-EM.mtx` | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | | multimappers_Uniform_matrix | `UniqueAndMult-Uniform.mtx` | Optional output produced when `soloMultiMappers` is "Uniform"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | diff --git a/website/docs/Pipelines/Optimus_Pipeline/starsolo-metrics.md b/website/docs/Pipelines/Optimus_Pipeline/starsolo-metrics.md new file mode 100644 index 0000000000..bd830a8d4c --- /dev/null +++ b/website/docs/Pipelines/Optimus_Pipeline/starsolo-metrics.md @@ -0,0 +1,104 @@ +# STAR Aligner Metrics +The STAR aligner produces multiple text files containing library-level summary metrics, cell-level metrics, and UMI metrics. The Optimus workflow compresses these files into a single TAR. These outputs are directly from the aligner as different batches of the data are analyzed in parallel. + +The STAR aligner metrics are supplemental to the [library-level metrics CSV](./Library-metrics.md) that is also produced by Optimus. Several of the calculations produced in the library metrics are directly based on the STAR aligner metrics. + +The following sections describe these outputs. + +## Align Features Metrics +The Align feature text file contains library-level metrics produced by the STARsolo alignment detailing the alignment of reads to genomic features during single-cell RNA-seq analysis. These metrics indicate how well reads map to specific genomic features or whether they failed to map due to various reasons. For example: +**noUnmapped** represents the number of reads that were not aligned to any feature in the genome. +**noNoFeature** reflects reads that were aligned but did not map to any specific feature such as exons or genes. +**MultiFeature** counts reads that were aligned to multiple features. +**yesWLmatch** and **yesCellBarcodes** track how well reads match the barcode whitelist, an important step in identifying valid cell barcodes, which helps demultiplex the single-cell RNA-seq data​. + +Each of the table metrics gives insights into different stages of read alignment, from barcode matching to gene feature mapping, allowing you to assess the quality and accuracy of the alignment step in the pipeline. + + +| Metrics name | Description | +| --- | --- | +| noUnmapped | Number of unmapped reads | +| noNoFeature | Number of reads not mapped to a feature. | +| MultiFeature | Number of reads aligned to multiple features. | +| subMultiFeatureMultiGenomic | Number of reads mapping to multiple genomic loci and multiple features. | +| noTooManyWLmatches | Number of reads not counted because their barcoded pair has too many matches to the whitelist. | +| noMMtoWLwithoutExact | Number of reads not counted because their barcoded pair has mismatches to the whitelist and there's no more reads supporting that barcode. | +| yesWLmatch | Number of reads whose barcoded pair has a match to the whitelist. | +| yessubWLmatchExact | Number of reads with cell barcode exactly matched to the whitelist (a subset of yesWLmatch). | +| yessubWLmatch_UniqueFeature | Number of reads matched to the WL and unique feature (a subset of yesWLmatch). | +| yesCellBarcodes | Number of reads associated with a valid cell barcode. | +| yesUMIs | Number of reads associated with a valid UMI. | + + + + + + +## Cell Read Metrics + +The **cell read metrics** text file provides cell barcode-level information about the reads; for instance: +**cbMatch** counts the number of reads that successfully matched the cell barcode. +**cbPerfect** gives the number of reads with a perfect match to a cell barcode, while **cbMMunique** and **cbMMmultiple** measure mismatches that still align uniquely or to multiple barcodes, respectively. +**genomeU** and **genomeM** count reads mapped to one or multiple loci in the genome, respectively. +**exonic** and **intronic** track reads mapping to annotated exons or introns, helping distinguish between different gene regions in the analysis. + +These metrics are important for assessing the quality of individual cell barcodes. + +| Metrics | Description | +| --- | --- | +| CB | Cell barcode | +| cbMatch | Number of reads that matched the cell barcode. | +| cbPerfect | Number of perfect matches on cell barcode. | +| cbMMunique | Number of reads with cell barcodes that map with mismatches to one barcode in the passlist. | +| cbMMmultiple | Number of reads with cell barcodes that map with mismatches to multiple barcodes in the passlist. | +| genomeU | Number of reads mapping to one locus in the genome. | +| genomeM | Number of reads mapping to multiple loci in the genome. | +| featureU | Number of reads mapping to one feature (Gene, GeneFull, etc). | +| featureM | Number of reads mapping to multiple features. | +| exonic | Number of reads mapping to annotated exons. | +| intronic | Number of reads mapping to annotated introns; these are only calculated for --soloFeatures GeneFull_Ex50pAS and/or GeneFull_ExonOverIntron. | +| exonicAS | Number of reads mapping antisense to annotated exons. | +| intronicAS | Number of reads mapping antisense to annotated introns; these are only calculated for --soloFeatures GeneFull_Ex50pAS. | +| mito | Number of reads mapping to the mitochondrial genome. | +| countedU | Number of unique-gene reads whose UMIs contributed to counts in the matrix.mtx (eads with valid CB/UMI/gene). | +| countedM | Number of multi-gene reads whose UMIs contributed to counts in the matrix.mtx. | +| nUMIunique | Total number of counted UMI for unique-gene reads. | +| nGenesUnique | Number of genes for unique-gene reads. | +| nUMImulti | Total number of counted UMI for multi-gene reads. | +| nGenesMulti | Number of genes for multi-gene reads. | + +## Summary.txt + +The **summary** text file contains additional library-level metrics produced by the STARsolo aligner, such as: +**Number of reads**, which reflects the total reads processed, and **reads with valid barcodes**, which indicates how many reads matched the barcode whitelist. +**Sequencing saturation** shows the completeness of sequencing, where higher values indicate fewer additional reads are needed to capture new UMIs. +Metrics like **Q30 Bases in CB+UMI** and **Q30 Bases in RNA read** give insights into sequencing quality, showing how many reads had high-quality base calls. +Other key metrics, such as **reads mapped to the genome: Unique+Multiple** and **estimated number of cells**, provide a sense of how well reads were mapped to the genome and how many cells were identified. +These summary metrics help users assess the overall quality and completeness of their single-cell RNA-seq data, serving as a useful checkpoint for determining whether the data is suitable for further analysis. + +| Metric | Description | +| --- | --- | +| Number of Reads | Number of reads in the library. | +| Reads With Valid Barcodes | Fraction of reads with valid barcodes. | +| Sequencing Saturation | Proportion of unique molecular identifiers (UMIs) that have been sequenced at least once compared to the total number of possible UMIs in the sample; calculated as: 1-(yesUMIs/yessubWLmatch_UniqueFeature). | +| Q30 Bases in CB+UMI | Fraction of high-quality reads in the cell barcode and UMI read. | +| Q30 Bases in RNA read | Fraction of high-quality reads in the RNA read. | +| Reads Mapped to Genome: Unique+Multiple | Fraction of unique and multimapped reads that mapped to the genome. | +| Reads Mapped to Genome: Unique | Fraction of unique reads that mapped to the genome. | +| Reads Mapped to genes: Unique+Multiple | Fraction of reads that mapped to genes as defined by the –solo-feature parameter. | +| Reads Mapped to Genes: Unique| Fraction of unique reads that mapped to genes. | +| Estimated Number of Cells | Number of barcodes that STARsolo flagged as cells based on UMIs. | +| Unique Reads in Cells Mapped to genes | Total number of unique reads that mapped to genes across all cells | +| Fraction of Unique Reads in Cells | Fraction of unique reads across all cells. | +| Mean Reads per Cell | Mean number of reads per cell. | +| Median Reads per Cell | Median number of reads per cell. | +| UMIs in Cells | Number of UMIs per cell. | +| Mean UMI per Cell | Mean number of UMIs per cell. | +| Median UMI per Cell | Median number of UMI per cell. | +| Mean Genes per Cell | Mean number of genes expressed per cell. | +| Median Genes per Cell | Median number of genes per cell. | +| Total Genes Detected | Total number of genes detected in the overall library. | + + +## UMI per cell +The UMI per cell text file is a list of UMI counts per every cell. It contains two columns. The first column contains the number of UMIs per each barcode entry. The second column indicates whether a barcode was flagged as a cell. A 1 indicates that it passed filtering criteria to be considered a cell and 0 indicates that it did not pass. diff --git a/website/docs/Pipelines/snM3C/README.md b/website/docs/Pipelines/snM3C/README.md index fe5e0fa2f9..733a443a53 100644 --- a/website/docs/Pipelines/snM3C/README.md +++ b/website/docs/Pipelines/snM3C/README.md @@ -6,8 +6,9 @@ slug: /Pipelines/snm3C/README | Pipeline Version | Date Updated | Documentation Authors | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [snm3C_v4.0.1](https://github.com/broadinstitute/warp/releases) | March, 2024 | Kaylee Mathews | Please [file an issue in WARP](https://github.com/broadinstitute/warp/issues). | +| [snm3C_v4.0.1](https://github.com/broadinstitute/warp/releases) | October, 2024 | Kaylee Mathews | Please [file an issue in WARP](https://github.com/broadinstitute/warp/issues). | +![snm3C_diagram](snm3C_diagram.png) ## Introduction to snm3C diff --git a/website/docs/Pipelines/snM3C/snm3C_diagram.png b/website/docs/Pipelines/snM3C/snm3C_diagram.png new file mode 100644 index 0000000000..4dd55b6c2d Binary files /dev/null and b/website/docs/Pipelines/snM3C/snm3C_diagram.png differ diff --git a/website/yarn.lock b/website/yarn.lock index dd38b210a0..cd6182cb2b 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -5344,15 +5344,10 @@ flux@^4.0.1: fbemitter "^3.0.0" fbjs "^3.0.0" -follow-redirects@^1.0.0: - version "1.14.9" - resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.9.tgz#dd4ea157de7bfaf9ea9b3fbd85aa16951f78d8d7" - integrity sha512-MQDfihBQYMcyy5dhRDJUHcw7lb2Pv/TuE6xP1vyraLukNDHKbDxDNaOE3NbCAdKQApno+GPRyo1YAp89yCjK4w== - -follow-redirects@^1.14.7: - version "1.15.2" - resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.2.tgz#b460864144ba63f2681096f274c4e57026da2c13" - integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA== +follow-redirects@^1.0.0, follow-redirects@^1.14.7: + version "1.15.6" + resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.6.tgz#7f815c0cda4249c74ff09e95ef97c23b5fd0399b" + integrity sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA== fork-ts-checker-webpack-plugin@^6.5.0: version "6.5.3" @@ -9257,9 +9252,9 @@ webpack-bundle-analyzer@^4.5.0: ws "^7.3.1" webpack-dev-middleware@^5.3.1: - version "5.3.3" - resolved "https://registry.yarnpkg.com/webpack-dev-middleware/-/webpack-dev-middleware-5.3.3.tgz#efae67c2793908e7311f1d9b06f2a08dcc97e51f" - integrity sha512-hj5CYrY0bZLB+eTO+x/j67Pkrquiy7kWepMHmUMoPsmcUaeEnQJqFzHJOyxgWlq746/wUuA64p9ta34Kyb01pA== + version "5.3.4" + resolved "https://registry.yarnpkg.com/webpack-dev-middleware/-/webpack-dev-middleware-5.3.4.tgz#eb7b39281cbce10e104eb2b8bf2b63fce49a3517" + integrity sha512-BVdTqhhs+0IfoeAf7EoH5WE+exCmqGerHfDM0IL096Px60Tq2Mn9MAbnaGUe6HiMa41KMCYF19gyzZmBcq/o4Q== dependencies: colorette "^2.0.10" memfs "^3.4.3"