Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/jw_update_reblock' into jw_updat…
Browse files Browse the repository at this point in the history
…e_reblock
  • Loading branch information
jessicaway committed Dec 5, 2024
2 parents 5badc4c + 46168f4 commit ed5064d
Show file tree
Hide file tree
Showing 18 changed files with 172 additions and 57 deletions.
18 changes: 9 additions & 9 deletions pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,19 @@ Pipeline Name Version Date of Last Commit
CheckFingerprint 1.0.22 2024-10-28
RNAWithUMIsPipeline 1.0.18 2024-11-04
AnnotationFiltration 1.2.7 2024-11-04
UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-03
UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05
WholeGenomeGermlineSingleSample 3.3.3 2024-11-04
ExomeGermlineSingleSample 3.2.3 2024-11-04
JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04
JointGenotypingByChromosomePartOne 1.5.2 2024-11-04
ReblockGVCF 2.3.3 2024-12-03
ReblockGVCF 2.3.3 2024-12-05
JointGenotyping 1.7.2 2024-11-04
UltimaGenomicsJointGenotyping 1.2.2 2024-11-04
VariantCalling 2.2.4 2024-11-04
UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04
GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04
BroadInternalRNAWithUMIs 1.0.36 2024-11-04
BroadInternalUltimaGenomics 1.1.3 2024-12-03
BroadInternalUltimaGenomics 1.1.3 2024-12-05
BroadInternalArrays 1.1.14 2024-11-04
BroadInternalImputation 1.1.14 2024-11-04
Arrays 2.6.30 2024-11-04
Expand All @@ -27,14 +27,14 @@ ExternalExomeReprocessing 3.3.3 2024-11-04
CramToUnmappedBams 1.1.3 2024-08-02
WholeGenomeReprocessing 3.3.3 2024-11-04
ExomeReprocessing 3.3.3 2024-11-04
BuildIndices 3.0.0 2023-12-06
BuildIndices 3.1.0 2024-11-26
scATAC 1.3.2 2023-08-03
snm3C 4.0.4 2024-08-06
Multiome 5.9.2 2024-11-22
PairedTag 1.8.3 2024-11-22
Multiome 5.9.3 2024-12-3
PairedTag 1.8.4 2024-12-3
MultiSampleSmartSeq2 2.2.22 2024-09-11
MultiSampleSmartSeq2SingleNucleus 2.0.5 2024-11-15
Optimus 7.8.3 2024-11-22
MultiSampleSmartSeq2SingleNucleus 2.0.6 2024-11-15
Optimus 7.8.4 2024-12-3
atac 2.5.3 2024-11-22
SmartSeq2SingleSample 5.1.21 2024-09-11
SlideSeq 3.4.6 2024-11-15
SlideSeq 3.4.7 2024-12-3
5 changes: 5 additions & 0 deletions pipelines/skylab/build_indices/BuildIndices.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.1.0
2024-11-26 (Date of Last Commit)

* Added metadata.txt file as an output to the pipeline

# 3.0.0
2023-12-06 (Date of Last Commit)

Expand Down
73 changes: 72 additions & 1 deletion pipelines/skylab/build_indices/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BuildIndices {
}

# version of this pipeline
String pipeline_version = "3.0.0"
String pipeline_version = "3.1.0"


parameter_meta {
Expand Down Expand Up @@ -49,12 +49,25 @@ workflow BuildIndices {
organism = organism
}

call RecordMetadata {
input:
pipeline_version = pipeline_version,
input_files = [annotations_gtf, genome_fa, biotypes],
output_files = [
BuildStarSingleNucleus.star_index,
BuildStarSingleNucleus.modified_annotation_gtf,
CalculateChromosomeSizes.chrom_sizes,
BuildBWAreference.reference_bundle
]
}

output {
File snSS2_star_index = BuildStarSingleNucleus.star_index
String pipeline_version_out = "BuildIndices_v~{pipeline_version}"
File snSS2_annotation_gtf_modified = BuildStarSingleNucleus.modified_annotation_gtf
File reference_bundle = BuildBWAreference.reference_bundle
File chromosome_sizes = CalculateChromosomeSizes.chrom_sizes
File metadata = RecordMetadata.metadata_file
}
}
Expand Down Expand Up @@ -195,3 +208,61 @@ String reference_name = "bwa-mem2-2.2.1-~{organism}-~{genome_source}-build-~{gen
}
}


task RecordMetadata {
input {
String pipeline_version
Array[File] input_files
Array[File] output_files
}

command <<<
set -euo pipefail

# create metadata file
echo "Pipeline Version: ~{pipeline_version}" > metadata.txt
echo "Date of Workflow Run: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> metadata.txt
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Input Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " input_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Output Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " output_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# grab workspace bucket
file="~{output_files[0]}"
workspace_bucket=$(echo $file | awk -F'/' '{print $3}')
echo "Workspace Bucket: $workspace_bucket" >> metadata.txt

# grab submission ID
submission_id=$(echo $file | awk -F'/' '{print $5}')
echo "Submission ID: $submission_id" >> metadata.txt

# grab workflow ID
workflow_id=$(echo $file | awk -F'/' '{print $7}')
echo "Workflow ID: $workflow_id" >> metadata.txt

echo "" >> metadata.txt
>>>

output {
File metadata_file = "metadata.txt"
}
runtime {
docker: "ubuntu:20.04"
memory: "5 GiB"
disks: "local-disk 100 HDD"
cpu: "1"
}
}

5 changes: 5 additions & 0 deletions pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 5.9.3
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 5.9.2
2024-11-22 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow Multiome {

String pipeline_version = "5.9.2"
String pipeline_version = "5.9.3"

input {
String cloud_provider
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 7.8.4
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 7.8.3
2024-11-22 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ workflow Optimus {
# version of this pipeline

String pipeline_version = "7.8.3"
String pipeline_version = "7.8.4"


# this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/paired_tag/PairedTag.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 1.8.4
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR

# 1.8.3
2024-11-22 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/paired_tag/PairedTag.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow PairedTag {

String pipeline_version = "1.8.3"
String pipeline_version = "1.8.4"


input {
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/slideseq/SlideSeq.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.4.7
2024-12-3 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR; this does not affect the outputs of the pipeline

# 3.4.6
2024-11-15 (Date of Last Commit)

Expand Down
2 changes: 1 addition & 1 deletion pipelines/skylab/slideseq/SlideSeq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow SlideSeq {

String pipeline_version = "3.4.6"
String pipeline_version = "3.4.7"

input {
Array[File] r1_fastq
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.0.6
2024-11-15 (Date of Last Commit)

* Fixed a bug in the StarSoloFastq task that caused the pipeline to not output a UniqueAndMult-Uniform.mtx when --soloMultiMappers Uniform was passed to STAR; this does not affect the outputs of the pipeline

# 2.0.5
2024-11-15 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
}

# Version of this pipeline
String pipeline_version = "2.0.5"
String pipeline_version = "2.0.6"

if (false) {
String? none = "None"
Expand Down
44 changes: 24 additions & 20 deletions tasks/skylab/StarAlign.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -349,11 +349,12 @@ task STARsoloFastq {
then
SoloDirectory="Solo.out/Gene/raw"
echo "SoloDirectory is $SoloDirectory"
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
echo "list matrix files in $SoloDirectory"
ls "$SoloDirectory"/*.mtx
mv $SoloDirectory/matrix.mtx matrix.mtx
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/

echo "Listing the files in the current directory:"
ls -l

mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv
mv "Solo.out/Gene/raw/features.tsv" features.tsv
mv "Solo.out/Gene/CellReads.stats" CellReads.stats
Expand All @@ -366,11 +367,12 @@ task STARsoloFastq {
then
SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw"
echo "SoloDirectory is $SoloDirectory"
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
echo "list matrix files in $SoloDirectory"
ls "$SoloDirectory"/*.mtx
mv $SoloDirectory/matrix.mtx matrix.mtx
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/

echo "Listing the files in the current directory"
ls -l

mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats
Expand All @@ -380,18 +382,20 @@ task STARsoloFastq {
else
SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw"
echo "SoloDirectory is $SoloDirectory"
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/
echo "list matrix files in $SoloDirectory"
ls "$SoloDirectory"/*.mtx
mv $SoloDirectory/matrix.mtx matrix.mtx
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/

echo "Listing the files in the current directory"
ls -l

SoloDirectory="Solo.out/Gene/raw"
echo "SoloDirectory is $SoloDirectory"
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo mv {} "/cromwell_root/$new_name"'
#find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"'
echo "list matrix files in $SoloDirectory"
ls "$SoloDirectory"/*.mtx
mv $SoloDirectory/matrix.mtx matrix_sn_rna.mtx
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo mv {} "/cromwell_root/$new_name"'
find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"'

echo "Listing the files in the current directory"
ls -l

mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv
mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv
mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats
Expand Down
7 changes: 7 additions & 0 deletions website/docs/Pipelines/ATAC/atac.methods.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# ATAC v2.3.1 Methods

# Methods

Data preprocessing and analysis for 10x chromatin accessibility was performed using the ATAC workflow v2.3.1 (RRID:SCR_025042). Briefly, FASTQ files were processed with a custom tool fastqprocess which corrects cell barcodes against a reference whitelist and splits reads by barcode to enable processing parallelization. Adaptor sequences were then removed from reads using Cutadapt v4.4. Reads were then aligned to the reference genome using BWA-MEM2 v2.2.1 with default parameters, which outputs corrected barcodes to a BAM in the CB:Z tag. The resulting BAM was then processed with SnapATAC2 v2.7.0 to produce a fragment file, index, and h5ad containing fragments as well as per-barcode quality metrics.

An overview of the pipeline is available in [WARP Documentation](https://broadinstitute.github.io/warp/docs/Pipelines/ATAC/README) and examples of genomic references, whitelists, and other inputs are available in the [WARP repository](https://github.com/broadinstitute/warp/tree/master/pipelines/skylab/multiome/test_inputs).
Loading

0 comments on commit ed5064d

Please sign in to comment.