Implement and test date comparison methods for simpler cases

ref #5
dh-tech · Oct 27, 2023 · 34b6688 · 34b6688
1 parent dfe1e4c
commit 34b6688
Show file tree

Hide file tree

Showing 2 changed files with 191 additions and 15 deletions.
diff --git a/src/undate/undate.py b/src/undate/undate.py
@@ -1,6 +1,6 @@
 import datetime
 from calendar import monthrange
-from enum import Enum, auto
+from enum import IntEnum
 import re
 
 # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
@@ -15,16 +15,19 @@
 ONE_DAY = datetime.timedelta(days=1)
 
 
-class DatePrecision(Enum):
+class DatePrecision(IntEnum):
     """date precision, to indicate date precision independent from how much
     of the date is known."""
 
-    #: year
-    YEAR = auto()
-    #: month
-    MONTH = auto()
+    # numbers should be set to allow logical greater than / less than
+    # comparison, e.g. year precision > month
+
     #: day
-    DAY = auto()
+    DAY = 1
+    #: month
+    MONTH = 2
+    #: year
+    YEAR = 3
 
     def __str__(self):
         return f"{self.name}"
@@ -177,8 +180,7 @@ def __repr__(self) -> str:
         return "<Undate %s>" % self
 
     def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
-        # question: should label be taken into account when checking equality?
-        # for now, assuming label differences don't matter for comparing dates
+        # Note: assumes label differences don't matter for comparing dates
 
         # support comparison with datetime date ONLY for full day precision
         if isinstance(other, datetime.date):
@@ -190,17 +192,66 @@ def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
                     % self.precision
                 )
 
-        return (
+        # check for apparent equality
+        looks_equal = (
             self.earliest == other.earliest
             and self.latest == other.latest
-            # NOTE: assumes that partially known values can only be written
-            # in one format (i.e. X for missing digits).
-            # If we support other formats, will need to normalize to common
-            # internal format for comparison
             and self.initial_values == other.initial_values
         )
+        # if everything looks the same, check for any unknowns in initial values
+        # the same unknown date should NOT be considered equal
+
+        # NOTE: assumes that partially known values can only be written
+        # in one format (i.e. X for missing digits).
+        # If we support other formats, will need to normalize to common
+        # internal format for comparison
+        if looks_equal and any("X" in str(val) for val in self.initial_values.values()):
+            return False
+        return looks_equal
+
+    def __lt__(self, other: "Undate") -> bool:
+        # TODO: support datetime.date (?)
+
+        # if this date ends before the other date starts,
+        # return true (this date is earlier, so it is less)
+        if self.latest < other.earliest:
+            return True
+
+        # if the other one ends before this one starts,
+        # return false (this date is later, so it is not less)
+        if other.latest < self.earliest:
+            return False
+
+        # if it does not, check if one is included within the other
+        # (e.g., single date within the same year)
+        # comparison for those cases is not currently supported
+        elif other in self or self in other:
+            raise NotImplementedError(
+                "Can't compare when one date falls within the other"
+            )
+
+        # for any other case (i.e., self == other), return false
+        return False
+
+    def __le__(self, other: "Undate") -> bool:
+        return self == other or self < other
+
+    def __contains__(self, other: "Undate") -> bool:
+        # if the two dates are strictly equal, don't consider
+        # either one as containing the other
+        if self == other:
+            return False
+
+        # TODO: support datetime.date ?
 
-    # def __lt__(self, other: "")
+        return (
+            self.earliest <= other.earliest
+            and self.latest >= other.latest
+            # precision is not sufficient for comparing partially known dates
+            and self.precision > other.precision
+        )
+        # TODO: how to compare partially unknown values
+        # like 19xx and 199x or 1801-XX and 1801-1X
 
     @property
     def known_year(self) -> bool:

diff --git a/tests/test_undate.py b/tests/test_undate.py
@@ -153,6 +153,131 @@ def test_not_eq(self):
         assert Undate(2022) != Undate(2022, 10)
         assert Undate(2022, 10) != Undate(2022, 10, 1)
 
+        # partially unknown dates should NOT be considered equal
+        assert Undate("19XX") != Undate("19XX")
+        assert Undate(1980, "XX") != Undate(1980, "XX")
+
+    testdata_lt_gt = [
+        # dates to test for gt/lt comparison: earlier date, later date
+        # - simple cases: same precision where one date is clearly earlier
+        (Undate(2022), Undate(2023)),
+        (Undate(1991, 1), Undate(1991, 5)),
+        (Undate(1856, 3, 3), Undate(1856, 3, 21)),
+        # - mixed precision where one date is clearly earlier
+        (Undate(1991, 1), Undate(2000)),
+        (Undate(1856, 3, 3), Undate(1901)),
+        # partially known digits where comparison is possible
+        (Undate("19XX"), Undate("20XX")),
+        (Undate(1900, "0X"), Undate(1900, "1X")),
+    ]
+
+    @pytest.mark.parametrize("earlier,later", testdata_lt_gt)
+    def test_lt(self, earlier, later):
+        assert earlier < later
+        assert later > earlier
+
+    testdata_lte_gte = testdata_lt_gt.copy()
+    # add a few exactly equal cases
+    testdata_lte_gte.extend(
+        [
+            (Undate(1601), Undate(1601)),
+            (Undate(1991, 1), Undate(1991, 1)),
+            (Undate(1492, 5, 3), Undate(1492, 5, 3)),
+        ]
+    )
+
+    def test_lt_when_eq(self):
+        # strict less than / greater should return false when equal
+        assert not Undate(1900) > Undate(1900)
+        assert not Undate(1900) < Undate(1900)
+
+    @pytest.mark.parametrize("earlier,later", testdata_lte_gte)
+    def test_lte(self, earlier, later):
+        assert earlier <= later
+        assert later >= earlier
+
+    def test_lt_notimplemented(self):
+        # how to compare mixed precision where dates overlap?
+        # if the second date falls *within* earliest/latest,
+        # then it is not clearly less; not implemented?
+        with pytest.raises(NotImplementedError, match="date falls within the other"):
+            assert Undate(2022) < Undate(2022, 5)
+
+        # same if we attempt to compare in the other direction
+        with pytest.raises(NotImplementedError, match="date falls within the other"):
+            assert Undate(2022, 5) < Undate(2022)
+
+    testdata_contains = [
+        # first date falls within the range of the other
+        # dates within range: middle, start, end, varying precision
+        (Undate(2022, 6), Undate(2022)),
+        (Undate(2022, 1, 1), Undate(2022)),
+        (Undate(2022, 12, 31), Undate(2022)),
+        (Undate(2022, 6, 15), Undate(2022, 6)),
+        # TODO: support partially known dates that are unambiguously in range
+        # (Undate("199X"), Undate("19XX")),
+    ]
+
+    @pytest.mark.parametrize("date1,date2", testdata_contains)
+    def test_contains(self, date1, date2):
+        assert date1 in date2
+
+    testdata_not_contains = [
+        # dates not in range
+        (Undate(1980), Undate(2020)),
+        (Undate(1980), Undate(2020, 6)),
+        (Undate(1980, 6), Undate(2020, 6)),
+    ]
+
+    @pytest.mark.parametrize("date1,date2", testdata_not_contains)
+    def test_not_contains(self, date1, date2):
+        assert date1 not in date2
+
+    def test_contains_ambiguous(self):
+        # date not in range due to precision
+        # TODO: can we return an unknown instead of false?
+        # or should this raise a not implemented error?
+
+        # these are cases where dates *might* overlap,
+        #  but we don't have enough information to determine
+        # - specific month to unknown month
+        assert Undate(1980, 6) not in Undate(1980, "XX")
+        # - unknown month to unknown month
+        assert Undate(1980, "XX") not in Undate(1980, "XX")
+        assert Undate(1980, 6) not in Undate(1980, "XX")
+        assert Undate(1801, "1X") not in Undate(1801, "XX")
+
+    def test_sorting(self):
+        # sorting should be possible based on gt/lt
+        # test simple cases for sorting
+        d1980 = Undate(1980)
+        d2002_10 = Undate(2002, 10)
+        d2002_12 = Undate(2002, 12)
+        d2012_05_01 = Undate(2012, 5, 1)
+
+        assert sorted([d2012_05_01, d2002_12, d2002_10, d1980]) == [
+            d1980,
+            d2002_10,
+            d2002_12,
+            d2012_05_01,
+        ]
+
+        # what about semi-ambigous cases?
+        d1991_XX = Undate(1991, "XX")
+        d1992_01_XX = Undate(1992, 1, "XX")
+        assert sorted([d1992_01_XX, d1991_XX, d1980]) == [d1980, d1991_XX, d1992_01_XX]
+
+        # what about things we can't compare?
+        d1991 = Undate(1991)
+        d1991_02 = Undate(1991, 2)
+        # for now, this will raise a not implemented error
+        with pytest.raises(NotImplementedError):
+            sorted([d1991_02, d1991, d1991_XX])
+
+        # TODO: partially known year?
+        # someyear = Undate("1XXX")
+        # assert sorted([d1991, someyear]) == [someyear, d1991]
+
     def test_duration(self):
         day_duration = Undate(2022, 11, 7).duration()
         assert isinstance(day_duration, timedelta)