Skip to content

Commit

Permalink
Np add metadata.txt file to build indices (#1435)
Browse files Browse the repository at this point in the history
* add pipeline metadata txt to BuildIndices.wdl

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* more disk space

* need all as files

* need all as files

* need all as files

* need all as files

* grab the full paths

* changelogs

* Updated pipeline_versions.txt with all pipeline version information

---------

Co-authored-by: GitHub Action <[email protected]>
  • Loading branch information
nikellepetrillo and actions-user authored Dec 5, 2024
1 parent 39c6aac commit 9add5e9
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 2 deletions.
2 changes: 1 addition & 1 deletion pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ExternalExomeReprocessing 3.3.3 2024-11-04
CramToUnmappedBams 1.1.3 2024-08-02
WholeGenomeReprocessing 3.3.3 2024-11-04
ExomeReprocessing 3.3.3 2024-11-04
BuildIndices 3.0.0 2023-12-06
BuildIndices 3.1.0 2024-11-26
scATAC 1.3.2 2023-08-03
snm3C 4.0.4 2024-08-06
Multiome 5.9.2 2024-11-22
Expand Down
5 changes: 5 additions & 0 deletions pipelines/skylab/build_indices/BuildIndices.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 3.1.0
2024-11-26 (Date of Last Commit)

* Added metadata.txt file as an output to the pipeline

# 3.0.0
2023-12-06 (Date of Last Commit)

Expand Down
73 changes: 72 additions & 1 deletion pipelines/skylab/build_indices/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BuildIndices {
}

# version of this pipeline
String pipeline_version = "3.0.0"
String pipeline_version = "3.1.0"


parameter_meta {
Expand Down Expand Up @@ -49,12 +49,25 @@ workflow BuildIndices {
organism = organism
}

call RecordMetadata {
input:
pipeline_version = pipeline_version,
input_files = [annotations_gtf, genome_fa, biotypes],
output_files = [
BuildStarSingleNucleus.star_index,
BuildStarSingleNucleus.modified_annotation_gtf,
CalculateChromosomeSizes.chrom_sizes,
BuildBWAreference.reference_bundle
]
}

output {
File snSS2_star_index = BuildStarSingleNucleus.star_index
String pipeline_version_out = "BuildIndices_v~{pipeline_version}"
File snSS2_annotation_gtf_modified = BuildStarSingleNucleus.modified_annotation_gtf
File reference_bundle = BuildBWAreference.reference_bundle
File chromosome_sizes = CalculateChromosomeSizes.chrom_sizes
File metadata = RecordMetadata.metadata_file
}
}
Expand Down Expand Up @@ -195,3 +208,61 @@ String reference_name = "bwa-mem2-2.2.1-~{organism}-~{genome_source}-build-~{gen
}
}


task RecordMetadata {
input {
String pipeline_version
Array[File] input_files
Array[File] output_files
}

command <<<
set -euo pipefail

# create metadata file
echo "Pipeline Version: ~{pipeline_version}" > metadata.txt
echo "Date of Workflow Run: $(date -u +%Y-%m-%dT%H:%M:%SZ)" >> metadata.txt
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Input Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " input_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# echo paths and md5sums for input files
echo "Output Files and their md5sums:" >> metadata.txt
for file in ~{sep=" " output_files}; do
echo "$file : $(md5sum $file | awk '{print $1}')" >> metadata.txt
done
echo "" >> metadata.txt

# grab workspace bucket
file="~{output_files[0]}"
workspace_bucket=$(echo $file | awk -F'/' '{print $3}')
echo "Workspace Bucket: $workspace_bucket" >> metadata.txt

# grab submission ID
submission_id=$(echo $file | awk -F'/' '{print $5}')
echo "Submission ID: $submission_id" >> metadata.txt

# grab workflow ID
workflow_id=$(echo $file | awk -F'/' '{print $7}')
echo "Workflow ID: $workflow_id" >> metadata.txt

echo "" >> metadata.txt
>>>

output {
File metadata_file = "metadata.txt"
}
runtime {
docker: "ubuntu:20.04"
memory: "5 GiB"
disks: "local-disk 100 HDD"
cpu: "1"
}
}

0 comments on commit 9add5e9

Please sign in to comment.