From 78126f5e7175300aab9d58857f13db89e399965b Mon Sep 17 00:00:00 2001 From: ekiernan Date: Thu, 17 Oct 2024 13:15:57 -0400 Subject: [PATCH 1/8] adding new verification test --- verification/VerifyOptimus.wdl | 2 +- verification/VerifyTasks.wdl | 58 ++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/verification/VerifyOptimus.wdl b/verification/VerifyOptimus.wdl index 5832560633..a2c015c0ef 100644 --- a/verification/VerifyOptimus.wdl +++ b/verification/VerifyOptimus.wdl @@ -48,7 +48,7 @@ workflow VerifyOptimus { truth_h5ad = truth_h5ad } - call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { input: test_text_files = select_all([test_library_metrics]), truth_text_files = select_all([truth_library_metrics]) diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index d21435c039..e715002c88 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -613,3 +613,61 @@ task CompareSnapTextFiles { } +task CompareLibraryFiles { + input { + Array[File] test_text_files + Array[File] truth_text_files + } + + command { + exit_code=0 + + test_files_length=~{length(test_text_files)} + truth_files_length=~{length(truth_text_files)} + if [ $test_files_length -ne $truth_files_length ]; then + exit_code=1 + echo "Error: Different number of input files ($test_files_length vs. $truth_files_length). This is really not OK" + fi + + while read -r a && read -r b <&3; + do + echo "Sorting File $a and $b" + sort $a > $a.sorted + sort $b > $b.sorted + + echo "Calculating md5sums for $a and $b" + md5_a=$(md5sum $a.sorted | cut -d ' ' -f1) + md5_b=$(md5sum $b.sorted | cut -d ' ' -f1) + + if [ $md5_a = $md5_b ]; then + echo "Files $a.sorted and $b.sorted have matching md5sums and are the same." + else + echo "Files $a.sorted and $b.sorted have different md5sums." + + # Compare the files, excluding specific lines + excluded_lines="percent_doublets|keeper_cells|keeper_mean_reads_per_cell|keeper_median_genes|percent_keeper|percent_usable" + + # Store the diff result, but only check non-excluded lines + diff_output=$(diff <(grep -v -E "$excluded_lines" $a.sorted) <(grep -v -E "$excluded_lines" $b.sorted)) + + if [ -z "$diff_output" ]; then + echo "Files $a.sorted and $b.sorted are the same when excluding specified lines." + else + echo "Files $a.sorted and $b.sorted have differences in non-excluded lines." + echo "$diff_output" >&2 + exit_code=1 + fi + fi + done < ~{write_lines(test_text_files)} 3<~{write_lines(truth_text_files)} + + echo "Exiting with code $exit_code" + exit $exit_code + } + + runtime { + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" + disks: "local-disk 100 HDD" + memory: "50 GiB" + preemptible: 3 + } +} \ No newline at end of file From d93206096ba7380a990d60b2569cef1b5e5b03f5 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Tue, 22 Oct 2024 08:33:35 -0400 Subject: [PATCH 2/8] updating diff statements --- verification/VerifyTasks.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index e715002c88..ca50e2e5ae 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -648,13 +648,13 @@ task CompareLibraryFiles { excluded_lines="percent_doublets|keeper_cells|keeper_mean_reads_per_cell|keeper_median_genes|percent_keeper|percent_usable" # Store the diff result, but only check non-excluded lines - diff_output=$(diff <(grep -v -E "$excluded_lines" $a.sorted) <(grep -v -E "$excluded_lines" $b.sorted)) + diff_output=$(diff <(grep -v -E $excluded_lines $a.sorted) <(grep -v -E $excluded_lines $b.sorted)) if [ -z "$diff_output" ]; then echo "Files $a.sorted and $b.sorted are the same when excluding specified lines." else echo "Files $a.sorted and $b.sorted have differences in non-excluded lines." - echo "$diff_output" >&2 + echo "$diff_output" exit_code=1 fi fi From b7a8e0efa1ef1d9d6d3a30953ad76cd54a0bb26a Mon Sep 17 00:00:00 2001 From: ekiernan Date: Tue, 22 Oct 2024 13:09:55 -0400 Subject: [PATCH 3/8] reconfiguring test --- verification/VerifyTasks.wdl | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index ca50e2e5ae..297c33c4fe 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -615,31 +615,25 @@ task CompareSnapTextFiles { task CompareLibraryFiles { input { - Array[File] test_text_files - Array[File] truth_text_files + File test_text_file + File truth_text_file } command { exit_code=0 - test_files_length=~{length(test_text_files)} - truth_files_length=~{length(truth_text_files)} - if [ $test_files_length -ne $truth_files_length ]; then - exit_code=1 - echo "Error: Different number of input files ($test_files_length vs. $truth_files_length). This is really not OK" - fi + test_file=~{test_text_files} + truth_file=~{truth_text_files} - while read -r a && read -r b <&3; - do - echo "Sorting File $a and $b" - sort $a > $a.sorted - sort $b > $b.sorted + echo "Sorting files $a and $b" + sort "$a" > "${a}.sorted" + sort "$b" > "${b}.sorted" - echo "Calculating md5sums for $a and $b" - md5_a=$(md5sum $a.sorted | cut -d ' ' -f1) - md5_b=$(md5sum $b.sorted | cut -d ' ' -f1) + echo "Calculating md5sums for $a and $b" + md5_a=$(md5sum "$a.sorted" | cut -d ' ' -f1) + md5_b=$(md5sum "$b.sorted" | cut -d ' ' -f1) - if [ $md5_a = $md5_b ]; then + if [ $md5_a = $md5_b ]; then echo "Files $a.sorted and $b.sorted have matching md5sums and are the same." else echo "Files $a.sorted and $b.sorted have different md5sums." @@ -655,11 +649,9 @@ task CompareLibraryFiles { else echo "Files $a.sorted and $b.sorted have differences in non-excluded lines." echo "$diff_output" - exit_code=1 + exit_code=2 fi fi - done < ~{write_lines(test_text_files)} 3<~{write_lines(truth_text_files)} - echo "Exiting with code $exit_code" exit $exit_code } From 744548e0601da39832a338d2c992827cdd137dca Mon Sep 17 00:00:00 2001 From: ekiernan Date: Tue, 22 Oct 2024 13:19:24 -0400 Subject: [PATCH 4/8] fix typo --- verification/VerifyTasks.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index 297c33c4fe..87b28a8f90 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -622,8 +622,8 @@ task CompareLibraryFiles { command { exit_code=0 - test_file=~{test_text_files} - truth_file=~{truth_text_files} + test_file=~{test_text_file} + truth_file=~{truth_text_file} echo "Sorting files $a and $b" sort "$a" > "${a}.sorted" From 5dbd9c99a602d81a8c00e6adce87df5239268ee7 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Tue, 22 Oct 2024 13:50:47 -0400 Subject: [PATCH 5/8] remove a and b as wdl variables --- verification/VerifyTasks.wdl | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index 87b28a8f90..0c94aa9fb7 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -622,16 +622,16 @@ task CompareLibraryFiles { command { exit_code=0 - test_file=~{test_text_file} - truth_file=~{truth_text_file} + a=~{test_text_file} + b=~{truth_text_file} echo "Sorting files $a and $b" - sort "$a" > "${a}.sorted" - sort "$b" > "${b}.sorted" + sort "$a" > "a.sorted" + sort "$b" > "b.sorted" echo "Calculating md5sums for $a and $b" - md5_a=$(md5sum "$a.sorted" | cut -d ' ' -f1) - md5_b=$(md5sum "$b.sorted" | cut -d ' ' -f1) + md5_a=$(md5sum "a.sorted" | cut -d ' ' -f1) + md5_b=$(md5sum "b.sorted" | cut -d ' ' -f1) if [ $md5_a = $md5_b ]; then echo "Files $a.sorted and $b.sorted have matching md5sums and are the same." @@ -642,12 +642,12 @@ task CompareLibraryFiles { excluded_lines="percent_doublets|keeper_cells|keeper_mean_reads_per_cell|keeper_median_genes|percent_keeper|percent_usable" # Store the diff result, but only check non-excluded lines - diff_output=$(diff <(grep -v -E $excluded_lines $a.sorted) <(grep -v -E $excluded_lines $b.sorted)) + diff_output=$(diff <(grep -v -E $excluded_lines a.sorted) <(grep -v -E $excluded_lines b.sorted)) if [ -z "$diff_output" ]; then - echo "Files $a.sorted and $b.sorted are the same when excluding specified lines." + echo "Files a.sorted and $b.sorted are the same when excluding specified lines." else - echo "Files $a.sorted and $b.sorted have differences in non-excluded lines." + echo "Files a.sorted and b.sorted have differences in non-excluded lines." echo "$diff_output" exit_code=2 fi From b0c49980bea13687ab898aeade4737a5bdeacf68 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Tue, 22 Oct 2024 14:26:57 -0400 Subject: [PATCH 6/8] updating inputs on verify pipeline wdls --- verification/VerifyMultiome.wdl | 4 ++-- verification/VerifyOptimus.wdl | 4 ++-- verification/VerifyPairedTag.wdl | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/verification/VerifyMultiome.wdl b/verification/VerifyMultiome.wdl index b98de16dcb..fa172f500f 100644 --- a/verification/VerifyMultiome.wdl +++ b/verification/VerifyMultiome.wdl @@ -74,7 +74,7 @@ workflow VerifyMultiome { } call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { input: - test_text_files = select_all([test_library_metrics]), - truth_text_files = select_all([truth_library_metrics]) + test_text_file = test_library_metrics, + truth_text_file = truth_library_metrics } } \ No newline at end of file diff --git a/verification/VerifyOptimus.wdl b/verification/VerifyOptimus.wdl index a2c015c0ef..d52b71129c 100644 --- a/verification/VerifyOptimus.wdl +++ b/verification/VerifyOptimus.wdl @@ -50,7 +50,7 @@ workflow VerifyOptimus { call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { input: - test_text_files = select_all([test_library_metrics]), - truth_text_files = select_all([truth_library_metrics]) + test_text_file = test_library_metrics, + truth_text_file = truth_library_metrics } } \ No newline at end of file diff --git a/verification/VerifyPairedTag.wdl b/verification/VerifyPairedTag.wdl index dda6350f05..48dc4306a0 100644 --- a/verification/VerifyPairedTag.wdl +++ b/verification/VerifyPairedTag.wdl @@ -74,7 +74,7 @@ workflow VerifyPairedTag { } call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { input: - test_text_files = select_all([test_library_metrics]), - truth_text_files = select_all([truth_library_metrics]) + test_text_file = test_library_metrics, + truth_text_file = truth_library_metrics } } \ No newline at end of file From 6f1881017b68969b2ddab4894b76e43fd1b15ba3 Mon Sep 17 00:00:00 2001 From: ekiernan Date: Tue, 22 Oct 2024 14:28:54 -0400 Subject: [PATCH 7/8] more updates --- verification/VerifyMultiome.wdl | 2 +- verification/VerifyPairedTag.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/verification/VerifyMultiome.wdl b/verification/VerifyMultiome.wdl index fa172f500f..d1cad11606 100644 --- a/verification/VerifyMultiome.wdl +++ b/verification/VerifyMultiome.wdl @@ -72,7 +72,7 @@ workflow VerifyMultiome { test_h5ad = test_optimus_h5ad, truth_h5ad = truth_optimus_h5ad } - call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { input: test_text_file = test_library_metrics, truth_text_file = truth_library_metrics diff --git a/verification/VerifyPairedTag.wdl b/verification/VerifyPairedTag.wdl index 48dc4306a0..a18e85a44d 100644 --- a/verification/VerifyPairedTag.wdl +++ b/verification/VerifyPairedTag.wdl @@ -72,7 +72,7 @@ workflow VerifyPairedTag { test_h5ad = test_optimus_h5ad, truth_h5ad = truth_optimus_h5ad } - call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { input: test_text_file = test_library_metrics, truth_text_file = truth_library_metrics From f6bd7855d94144b34f9334fcf07fb63f857e7aab Mon Sep 17 00:00:00 2001 From: ekiernan Date: Wed, 23 Oct 2024 13:29:20 -0400 Subject: [PATCH 8/8] fixing indents --- verification/VerifyTasks.wdl | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index 0c94aa9fb7..683857e6c8 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -634,24 +634,24 @@ task CompareLibraryFiles { md5_b=$(md5sum "b.sorted" | cut -d ' ' -f1) if [ $md5_a = $md5_b ]; then - echo "Files $a.sorted and $b.sorted have matching md5sums and are the same." - else - echo "Files $a.sorted and $b.sorted have different md5sums." + echo "Files $a.sorted and $b.sorted have matching md5sums and are the same." + else + echo "Files $a.sorted and $b.sorted have different md5sums." - # Compare the files, excluding specific lines - excluded_lines="percent_doublets|keeper_cells|keeper_mean_reads_per_cell|keeper_median_genes|percent_keeper|percent_usable" + # Compare the files, excluding specific lines + excluded_lines="percent_doublets|keeper_cells|keeper_mean_reads_per_cell|keeper_median_genes|percent_keeper|percent_usable" - # Store the diff result, but only check non-excluded lines - diff_output=$(diff <(grep -v -E $excluded_lines a.sorted) <(grep -v -E $excluded_lines b.sorted)) + # Store the diff result, but only check non-excluded lines + diff_output=$(diff <(grep -v -E $excluded_lines a.sorted) <(grep -v -E $excluded_lines b.sorted)) - if [ -z "$diff_output" ]; then + if [ -z "$diff_output" ]; then echo "Files a.sorted and $b.sorted are the same when excluding specified lines." - else - echo "Files a.sorted and b.sorted have differences in non-excluded lines." - echo "$diff_output" - exit_code=2 - fi + else + echo "Files a.sorted and b.sorted have differences in non-excluded lines." + echo "$diff_output" + exit_code=2 fi + fi echo "Exiting with code $exit_code" exit $exit_code }