diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index c35fc26a23..53824b480c 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -5,6 +5,7 @@ * Updated the ATAC library CSV to be consistent in file naming convention and to have similar case for metric names to the Optimus workflow library CSV * Added a new metric to the ATAC library CSV to calculate percent_target, which is the number of estimated cells by SnapATAC2 divided by expected_cells input * Updated the ATAC workflow so that the output fragment file is bgzipped by default +* Updated memory settings for PairedTag; does not impact the ATAC workflow # 2.3.2 diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index ce69629951..daf4ab3814 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -5,6 +5,7 @@ * Updated the ATAC library CSV and the Gene Expression library CSV to be consistent in file naming convention and to have similar case for metric names * Added a new metric to the ATAC library CSV to calculate percent_target, which is the number of estimated cells by SnapATAC2 divided by expected_cells input * Updated the ATAC workflow so that the output fragment file is bgzipped by default +* Updated memory settings for PairedTag; does not impact the Multiome workflow # 5.7.1 2024-10-18 (Date of Last Commit) diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index a201ddc395..26ab433675 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -5,6 +5,7 @@ * Updated gex_expected_cells to a required output * Reformatted the library CSV output filename to remove an extra gex * Updated the ATAC fragment file output so that it is bgzipped; this does not impact the Optimus workflow +* Updated memory settings for PairedTag; does not impact the Optimus workflow # 7.7.0 2024-09-24 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index 930ed976fc..e973d75d0a 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -5,6 +5,7 @@ * Updated the ATAC library CSV and the Gene Expression library CSV to be consistent in file naming convention and to have similar case for metric names * Added a new metric to the ATAC library CSV to calculate percent_target, which is the number of estimated cells by SnapATAC2 divided by expected_cells input * Updated the ATAC fragment file output so that it is bgzipped +* Updated memory settings for PairedTag Utils # 1.7.1 2024-10-18 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index e67f2c83a8..8f2bf98f52 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -3,6 +3,7 @@ * Updated the h5adUtils WDL to rename the gene expression library CSV filename; this does not impact slideseq * Updated the ATAC fragment file output so that it is bgzipped; this does not impact the slideseq workflow +* Updated memory settings for PairedTag; does not impact the Slideseq workflow # 3.4.2 2024-09-24 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 9b2e810e79..6bb7385e8f 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -3,6 +3,7 @@ * Updated the h5adUtils WDL to rename the gene expression library CSV filename; this does not impact slideseq * Updated the ATAC fragment file output so that it is bgzipped; this does not impact the Multi-snSS2 workflow +* Updated memory settings for PairedTag; does not impact the snSS2 workflow # 2.0.1 2024-09-24 (Date of Last Commit) diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index f877aa462b..688ada62c2 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -235,8 +235,8 @@ task JoinMultiomeBarcodes { Int nthreads = 1 String cpuPlatform = "Intel Cascade Lake" - Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000 - Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10 + Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(gex_h5ad, "MiB") + size(atac_fragment, "MiB")) * 6) + 10000 + Int disk = ceil((size(atac_h5ad, "GiB") + size(gex_h5ad, "GiB") + size(atac_fragment, "GiB")) * 8) + 10 String docker_path } String gex_base_name = basename(gex_h5ad, ".h5ad") @@ -255,8 +255,10 @@ task JoinMultiomeBarcodes { set -e pipefail # decompress the bgzipped fragment file + echo "Moving fragment file for bgzipping" + mv ~{atac_fragment} ~{atac_fragment_base}.sorted.tsv.gz echo "Decompressing fragment file" - bgzip -d ~{atac_fragment} > "~{atac_fragment_base}.sorted.tsv" + bgzip -d "~{atac_fragment_base}.sorted.tsv.gz" echo "Done decompressing" @@ -276,12 +278,14 @@ task JoinMultiomeBarcodes { print("Reading ATAC h5ad:") print("~{atac_h5ad}") print("Read ATAC fragment file:") - print("~{atac_fragment}") + print(atac_fragment) print("Reading Optimus h5ad:") print("~{gex_h5ad}") atac_data = ad.read_h5ad("~{atac_h5ad}") gex_data = ad.read_h5ad("~{gex_h5ad}") atac_tsv = pd.read_csv(atac_fragment, sep="\t", names=['chr','start', 'stop', 'barcode','n_reads']) + print("Printing ATAC fragment tsv") + print(atac_tsv) whitelist_gex = pd.read_csv("~{gex_whitelist}", header=None, names=["gex_barcodes"]) whitelist_atac = pd.read_csv("~{atac_whitelist}", header=None, names=["atac_barcodes"]) @@ -317,6 +321,7 @@ task JoinMultiomeBarcodes { atac_data.write_h5ad("~{atac_base_name}.h5ad") df_fragment.to_csv("~{atac_fragment_base}.tsv", sep='\t', index=False, header = False) CODE + # sorting the file echo "Sorting file" sort -k1,1V -k2,2n "~{atac_fragment_base}.tsv" > "~{atac_fragment_base}.sorted.tsv" diff --git a/tasks/skylab/PairedTagUtils.wdl b/tasks/skylab/PairedTagUtils.wdl index 5590ec980d..f96e88429e 100644 --- a/tasks/skylab/PairedTagUtils.wdl +++ b/tasks/skylab/PairedTagUtils.wdl @@ -205,13 +205,13 @@ task ParseBarcodes { Int nthreads = 1 String cpuPlatform = "Intel Cascade Lake" String docker_path + Int disk = ceil((size(atac_h5ad, "GiB") + size(atac_fragment, "GiB")) * 8) + 10 + Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(atac_fragment, "MiB")) * 6) + 10000 } String atac_base_name = basename(atac_h5ad, ".h5ad") String atac_fragment_base = basename(atac_fragment, ".sorted.tsv.gz") - Int machine_mem_mb = ceil((size(atac_h5ad, "MiB") + size(atac_fragment, "MiB")) * 3) + 10000 - Int disk = ceil((size(atac_h5ad, "GiB") + size(atac_fragment, "GiB")) * 5) + 10 parameter_meta { atac_h5ad: "The resulting h5ad from the ATAC workflow." @@ -222,8 +222,10 @@ task ParseBarcodes { set -e pipefail # decompress the bgzipped atac file + echo "Moving fragment tsv for decompression" + mv ~{atac_fragment} ~{atac_fragment_base}.sorted.tsv.gz echo "Decompressing fragment file" - bgzip -d ~{atac_fragment} > "~{atac_fragment_base}.sorted.tsv" + bgzip -d "~{atac_fragment_base}.sorted.tsv.gz" echo "Done decompressing" python3 <