From c2274e3a9034f401e82af6145c6b97046a4b34fe Mon Sep 17 00:00:00 2001 From: John Huddleston Date: Tue, 3 Sep 2024 16:19:21 -0700 Subject: [PATCH] Pin deepdiff to later version Pins deepdiff to the latest version (8) which includes a fix a PyYAML dependency issue with Cython [1]. Also, replaces the deep diff CLI command in a functional test with a custom Python script that uses the DeepDiff class, working around an issue where the deep diff CLI did not properly recognize numeric values and caused the functional test to fail spuriously. This commit should fix CI for Python versions 3.10 and 3.11. [1] https://github.com/seperman/deepdiff/pull/406 --- pyproject.toml | 2 +- scripts/diff_tsv.py | 39 +++++++++++++++++++++++++++++++++++++ tests/functional/forecast.t | 2 +- 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 scripts/diff_tsv.py diff --git a/pyproject.toml b/pyproject.toml index 4c22004..52cd442 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ test = [ "coverage[toml] >=5.2.1, ==5.*", "cram >=0.7, ==0.*", - "deepdiff[cli] >=5.2.0, ==5.*", + "deepdiff[cli] >=8.0.0, ==8.*", "flake8 >=3.9.0, ==3.*", "pylint >=2.14.5, ==2.*", ] diff --git a/scripts/diff_tsv.py b/scripts/diff_tsv.py new file mode 100644 index 0000000..7c9ed45 --- /dev/null +++ b/scripts/diff_tsv.py @@ -0,0 +1,39 @@ +"""Compare TSV files line by line with deepdiff +""" +import argparse +import deepdiff +import pandas as pd + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Compare TSV files line by line with deepdiff", + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("first_tsv", help="first TSV to compare") + parser.add_argument("second_tsv", help="second TSV to compare") + parser.add_argument("--significant-digits", type=int, default=6, help="number of significant digits to use when comparing numeric values") + + args = parser.parse_args() + + first_tsv = pd.read_csv( + args.first_tsv, + sep="\t", + header=None, + na_filter=False, + ).to_dict() + + second_tsv = pd.read_csv( + args.second_tsv, + sep="\t", + header=None, + na_filter=False, + ).to_dict() + + print( + deepdiff.DeepDiff( + first_tsv, + second_tsv, + significant_digits=args.significant_digits, + ) + ) diff --git a/tests/functional/forecast.t b/tests/functional/forecast.t index a14a918..a86cad4 100644 --- a/tests/functional/forecast.t +++ b/tests/functional/forecast.t @@ -9,7 +9,7 @@ Forecast frequencies with a model trained on simulated data. > --model data/simulated_sample_1/normalized_fitness.json \ > --delta-months 12 \ > --output-table "$TMP/forecasts.tsv" > /dev/null - $ deep diff --significant-digits 6 "data/simulated_sample_1/forecasts.tsv" "$TMP/forecasts.tsv" + $ python3 ../../scripts/diff_tsv.py "data/simulated_sample_1/forecasts.tsv" "$TMP/forecasts.tsv" {} $ rm -f "$TMP/forecasts.tsv"