diff --git a/verification/VerifyMultiome.wdl b/verification/VerifyMultiome.wdl index 4c001dfd53..fa6d3b4676 100644 --- a/verification/VerifyMultiome.wdl +++ b/verification/VerifyMultiome.wdl @@ -75,10 +75,10 @@ workflow VerifyMultiome { test_h5ad = test_optimus_h5ad, truth_h5ad = truth_optimus_h5ad } - call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { input: - test_text_files = select_all([test_library_metrics]), - truth_text_files = select_all([truth_library_metrics]) + test_text_file = test_library_metrics, + truth_text_file = truth_library_metrics } call VerifyTasks.CompareTextFiles as CompareAtacLibraryMetrics { input: diff --git a/verification/VerifyOptimus.wdl b/verification/VerifyOptimus.wdl index 5832560633..d52b71129c 100644 --- a/verification/VerifyOptimus.wdl +++ b/verification/VerifyOptimus.wdl @@ -48,9 +48,9 @@ workflow VerifyOptimus { truth_h5ad = truth_h5ad } - call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { input: - test_text_files = select_all([test_library_metrics]), - truth_text_files = select_all([truth_library_metrics]) + test_text_file = test_library_metrics, + truth_text_file = truth_library_metrics } } \ No newline at end of file diff --git a/verification/VerifyPairedTag.wdl b/verification/VerifyPairedTag.wdl index dda6350f05..a18e85a44d 100644 --- a/verification/VerifyPairedTag.wdl +++ b/verification/VerifyPairedTag.wdl @@ -72,9 +72,9 @@ workflow VerifyPairedTag { test_h5ad = test_optimus_h5ad, truth_h5ad = truth_optimus_h5ad } - call VerifyTasks.CompareTextFiles as CompareLibraryMetrics { + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { input: - test_text_files = select_all([test_library_metrics]), - truth_text_files = select_all([truth_library_metrics]) + test_text_file = test_library_metrics, + truth_text_file = truth_library_metrics } } \ No newline at end of file diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index d21435c039..683857e6c8 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -613,3 +613,53 @@ task CompareSnapTextFiles { } +task CompareLibraryFiles { + input { + File test_text_file + File truth_text_file + } + + command { + exit_code=0 + + a=~{test_text_file} + b=~{truth_text_file} + + echo "Sorting files $a and $b" + sort "$a" > "a.sorted" + sort "$b" > "b.sorted" + + echo "Calculating md5sums for $a and $b" + md5_a=$(md5sum "a.sorted" | cut -d ' ' -f1) + md5_b=$(md5sum "b.sorted" | cut -d ' ' -f1) + + if [ $md5_a = $md5_b ]; then + echo "Files $a.sorted and $b.sorted have matching md5sums and are the same." + else + echo "Files $a.sorted and $b.sorted have different md5sums." + + # Compare the files, excluding specific lines + excluded_lines="percent_doublets|keeper_cells|keeper_mean_reads_per_cell|keeper_median_genes|percent_keeper|percent_usable" + + # Store the diff result, but only check non-excluded lines + diff_output=$(diff <(grep -v -E $excluded_lines a.sorted) <(grep -v -E $excluded_lines b.sorted)) + + if [ -z "$diff_output" ]; then + echo "Files a.sorted and $b.sorted are the same when excluding specified lines." + else + echo "Files a.sorted and b.sorted have differences in non-excluded lines." + echo "$diff_output" + exit_code=2 + fi + fi + echo "Exiting with code $exit_code" + exit $exit_code + } + + runtime { + docker: "gcr.io/gcp-runtimes/ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" + disks: "local-disk 100 HDD" + memory: "50 GiB" + preemptible: 3 + } +} \ No newline at end of file