Skip to content

Commit

Permalink
Implement and test date comparison methods for simpler cases
Browse files Browse the repository at this point in the history
ref #5
  • Loading branch information
rlskoeser committed Oct 27, 2023
1 parent dfe1e4c commit 34b6688
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 15 deletions.
81 changes: 66 additions & 15 deletions src/undate/undate.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import datetime
from calendar import monthrange
from enum import Enum, auto
from enum import IntEnum
import re

# Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None
Expand All @@ -15,16 +15,19 @@
ONE_DAY = datetime.timedelta(days=1)


class DatePrecision(Enum):
class DatePrecision(IntEnum):
"""date precision, to indicate date precision independent from how much
of the date is known."""

#: year
YEAR = auto()
#: month
MONTH = auto()
# numbers should be set to allow logical greater than / less than
# comparison, e.g. year precision > month

#: day
DAY = auto()
DAY = 1
#: month
MONTH = 2
#: year
YEAR = 3

def __str__(self):
return f"{self.name}"
Expand Down Expand Up @@ -177,8 +180,7 @@ def __repr__(self) -> str:
return "<Undate %s>" % self

def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
# question: should label be taken into account when checking equality?
# for now, assuming label differences don't matter for comparing dates
# Note: assumes label differences don't matter for comparing dates

# support comparison with datetime date ONLY for full day precision
if isinstance(other, datetime.date):
Expand All @@ -190,17 +192,66 @@ def __eq__(self, other: Union["Undate", datetime.date]) -> bool:
% self.precision
)

return (
# check for apparent equality
looks_equal = (
self.earliest == other.earliest
and self.latest == other.latest
# NOTE: assumes that partially known values can only be written
# in one format (i.e. X for missing digits).
# If we support other formats, will need to normalize to common
# internal format for comparison
and self.initial_values == other.initial_values
)
# if everything looks the same, check for any unknowns in initial values
# the same unknown date should NOT be considered equal

# NOTE: assumes that partially known values can only be written
# in one format (i.e. X for missing digits).
# If we support other formats, will need to normalize to common
# internal format for comparison
if looks_equal and any("X" in str(val) for val in self.initial_values.values()):
return False
return looks_equal

def __lt__(self, other: "Undate") -> bool:
# TODO: support datetime.date (?)

# if this date ends before the other date starts,
# return true (this date is earlier, so it is less)
if self.latest < other.earliest:
return True

# if the other one ends before this one starts,
# return false (this date is later, so it is not less)
if other.latest < self.earliest:
return False

# if it does not, check if one is included within the other
# (e.g., single date within the same year)
# comparison for those cases is not currently supported
elif other in self or self in other:
raise NotImplementedError(
"Can't compare when one date falls within the other"
)

# for any other case (i.e., self == other), return false
return False

def __le__(self, other: "Undate") -> bool:
return self == other or self < other

def __contains__(self, other: "Undate") -> bool:
# if the two dates are strictly equal, don't consider
# either one as containing the other
if self == other:
return False

# TODO: support datetime.date ?

# def __lt__(self, other: "")
return (
self.earliest <= other.earliest
and self.latest >= other.latest
# precision is not sufficient for comparing partially known dates
and self.precision > other.precision
)
# TODO: how to compare partially unknown values
# like 19xx and 199x or 1801-XX and 1801-1X

@property
def known_year(self) -> bool:
Expand Down
125 changes: 125 additions & 0 deletions tests/test_undate.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,131 @@ def test_not_eq(self):
assert Undate(2022) != Undate(2022, 10)
assert Undate(2022, 10) != Undate(2022, 10, 1)

# partially unknown dates should NOT be considered equal
assert Undate("19XX") != Undate("19XX")
assert Undate(1980, "XX") != Undate(1980, "XX")

testdata_lt_gt = [
# dates to test for gt/lt comparison: earlier date, later date
# - simple cases: same precision where one date is clearly earlier
(Undate(2022), Undate(2023)),
(Undate(1991, 1), Undate(1991, 5)),
(Undate(1856, 3, 3), Undate(1856, 3, 21)),
# - mixed precision where one date is clearly earlier
(Undate(1991, 1), Undate(2000)),
(Undate(1856, 3, 3), Undate(1901)),
# partially known digits where comparison is possible
(Undate("19XX"), Undate("20XX")),
(Undate(1900, "0X"), Undate(1900, "1X")),
]

@pytest.mark.parametrize("earlier,later", testdata_lt_gt)
def test_lt(self, earlier, later):
assert earlier < later
assert later > earlier

testdata_lte_gte = testdata_lt_gt.copy()
# add a few exactly equal cases
testdata_lte_gte.extend(
[
(Undate(1601), Undate(1601)),
(Undate(1991, 1), Undate(1991, 1)),
(Undate(1492, 5, 3), Undate(1492, 5, 3)),
]
)

def test_lt_when_eq(self):
# strict less than / greater should return false when equal
assert not Undate(1900) > Undate(1900)
assert not Undate(1900) < Undate(1900)

@pytest.mark.parametrize("earlier,later", testdata_lte_gte)
def test_lte(self, earlier, later):
assert earlier <= later
assert later >= earlier

def test_lt_notimplemented(self):
# how to compare mixed precision where dates overlap?
# if the second date falls *within* earliest/latest,
# then it is not clearly less; not implemented?
with pytest.raises(NotImplementedError, match="date falls within the other"):
assert Undate(2022) < Undate(2022, 5)

# same if we attempt to compare in the other direction
with pytest.raises(NotImplementedError, match="date falls within the other"):
assert Undate(2022, 5) < Undate(2022)

testdata_contains = [
# first date falls within the range of the other
# dates within range: middle, start, end, varying precision
(Undate(2022, 6), Undate(2022)),
(Undate(2022, 1, 1), Undate(2022)),
(Undate(2022, 12, 31), Undate(2022)),
(Undate(2022, 6, 15), Undate(2022, 6)),
# TODO: support partially known dates that are unambiguously in range
# (Undate("199X"), Undate("19XX")),
]

@pytest.mark.parametrize("date1,date2", testdata_contains)
def test_contains(self, date1, date2):
assert date1 in date2

testdata_not_contains = [
# dates not in range
(Undate(1980), Undate(2020)),
(Undate(1980), Undate(2020, 6)),
(Undate(1980, 6), Undate(2020, 6)),
]

@pytest.mark.parametrize("date1,date2", testdata_not_contains)
def test_not_contains(self, date1, date2):
assert date1 not in date2

def test_contains_ambiguous(self):
# date not in range due to precision
# TODO: can we return an unknown instead of false?
# or should this raise a not implemented error?

# these are cases where dates *might* overlap,
# but we don't have enough information to determine
# - specific month to unknown month
assert Undate(1980, 6) not in Undate(1980, "XX")
# - unknown month to unknown month
assert Undate(1980, "XX") not in Undate(1980, "XX")
assert Undate(1980, 6) not in Undate(1980, "XX")
assert Undate(1801, "1X") not in Undate(1801, "XX")

def test_sorting(self):
# sorting should be possible based on gt/lt
# test simple cases for sorting
d1980 = Undate(1980)
d2002_10 = Undate(2002, 10)
d2002_12 = Undate(2002, 12)
d2012_05_01 = Undate(2012, 5, 1)

assert sorted([d2012_05_01, d2002_12, d2002_10, d1980]) == [
d1980,
d2002_10,
d2002_12,
d2012_05_01,
]

# what about semi-ambigous cases?
d1991_XX = Undate(1991, "XX")
d1992_01_XX = Undate(1992, 1, "XX")
assert sorted([d1992_01_XX, d1991_XX, d1980]) == [d1980, d1991_XX, d1992_01_XX]

# what about things we can't compare?
d1991 = Undate(1991)
d1991_02 = Undate(1991, 2)
# for now, this will raise a not implemented error
with pytest.raises(NotImplementedError):
sorted([d1991_02, d1991, d1991_XX])

# TODO: partially known year?
# someyear = Undate("1XXX")
# assert sorted([d1991, someyear]) == [someyear, d1991]

def test_duration(self):
day_duration = Undate(2022, 11, 7).duration()
assert isinstance(day_duration, timedelta)
Expand Down

0 comments on commit 34b6688

Please sign in to comment.