Skip to content

Commit

Permalink
fix(date_parser): fixed bug for advanced time range filter (apache#31867
Browse files Browse the repository at this point in the history
)
  • Loading branch information
alexandrusoare authored Jan 20, 2025
1 parent 8960db4 commit 63843c5
Showing 2 changed files with 226 additions and 11 deletions.
197 changes: 186 additions & 11 deletions superset/utils/date_parser.py
Original file line number Diff line number Diff line change
@@ -143,6 +143,174 @@ def parse_past_timedelta(
)


def get_relative_base(unit: str, relative_start: str | None = None) -> str:
"""
Determines the relative base (`now` or `today`) based on the granularity of the unit
and an optional user-provided base expression. This is used as the base for all
queries parsed from `time_range_lookup`.
Args:
unit (str): The time unit (e.g., "second", "minute", "hour", "day", etc.).
relative_start (datetime | None): Optional user-provided base time.
Returns:
datetime: The base time (`now`, `today`, or user-provided).
"""
if relative_start is not None:
return relative_start

granular_units = {"second", "minute", "hour"}
broad_units = {"day", "week", "month", "quarter", "year"}

if unit.lower() in granular_units:
return "now"
elif unit.lower() in broad_units:
return "today"
raise ValueError(f"Unknown unit: {unit}")


def handle_start_of(base_expression: str, unit: str) -> str:
"""
Generates a datetime expression for the start of a given unit (e.g., start of month,
start of year).
This function is used to handle queries matching the first regex in
`time_range_lookup`.
Args:
base_expression (str): The base datetime expression (e.g., "DATETIME('now')"),
provided by `get_relative_base`.
unit (str): The granularity to calculate the start for (e.g., "year",
"month", "week"),
extracted from the regex.
Returns:
str: The resulting expression for the start of the specified unit.
Raises:
ValueError: If the unit is not one of the valid options.
Relation to `time_range_lookup`:
- Handles the "start of" or "beginning of" modifiers in the first regex pattern.
- Example: "start of this month" → `DATETRUNC(DATETIME('today'), month)`.
"""
valid_units = {"year", "quarter", "month", "week", "day"}
if unit in valid_units:
return f"DATETRUNC({base_expression}, {unit})"
raise ValueError(f"Invalid unit for 'start of': {unit}")


def handle_end_of(base_expression: str, unit: str) -> str:
"""
Generates a datetime expression for the end of a given unit (e.g., end of month,
end of year).
This function is used to handle queries matching the first regex in
`time_range_lookup`.
Args:
base_expression (str): The base datetime expression (e.g., "DATETIME('now')"),
provided by `get_relative_base`.
unit (str): The granularity to calculate the end for (e.g., "year", "month",
"week"), extracted from the regex.
Returns:
str: The resulting expression for the end of the specified unit.
Raises:
ValueError: If the unit is not one of the valid options.
Relation to `time_range_lookup`:
- Handles the "end of" modifier in the first regex pattern.
- Example: "end of last month" → `LASTDAY(DATETIME('today'), month)`.
"""
valid_units = {"year", "quarter", "month", "week", "day"}
if unit in valid_units:
return f"LASTDAY({base_expression}, {unit})"
raise ValueError(f"Invalid unit for 'end of': {unit}")


def handle_modifier_and_unit(
modifier: str, scope: str, delta: str, unit: str, relative_base: str
) -> str:
"""
Generates a datetime expression based on a modifier, scope, delta, unit,
and relative base.
This function handles queries matching the first regex pattern in
`time_range_lookup`.
Args:
modifier (str): Specifies the operation (e.g., "start of", "end of").
Extracted from the regex to determine whether to calculate the start or end.
scope (str): The time scope (e.g., "this", "last", "next", "prior"),
extracted from the regex.
delta (str): The numeric delta value (e.g., "1", "2"), extracted from the regex.
unit (str): The granularity (e.g., "day", "month", "year"), extracted from
the regex.
relative_base (str): The base datetime expression (e.g., "now" or "today"),
determined by `get_relative_base`.
Returns:
str: The resulting datetime expression.
Raises:
ValueError: If the modifier is invalid.
Relation to `time_range_lookup`:
- Processes queries like "start of this month" or "end of prior 2 years".
- Example: "start of this month" → `DATETRUNC(DATETIME('today'), month)`.
Example:
>>> handle_modifier_and_unit("start of", "this", "", "month", "today")
"DATETRUNC(DATETIME('today'), month)"
>>> handle_modifier_and_unit("end of", "last", "1", "year", "today")
"LASTDAY(DATEADD(DATETIME('today'), -1, year), year)"
"""
base_expression = handle_scope_and_unit(scope, delta, unit, relative_base)

if modifier.lower() in ["start of", "beginning of"]:
return handle_start_of(base_expression, unit.lower())
elif modifier.lower() == "end of":
return handle_end_of(base_expression, unit.lower())
else:
raise ValueError(f"Unknown modifier: {modifier}")


def handle_scope_and_unit(scope: str, delta: str, unit: str, relative_base: str) -> str:
"""
Generates a datetime expression based on the scope, delta, unit, and relative base.
This function handles queries matching the second regex pattern in
`time_range_lookup`.
Args:
scope (str): The time scope (e.g., "this", "last", "next", "prior"),
extracted from the regex.
delta (str): The numeric delta value (e.g., "1", "2"), extracted from the regex.
unit (str): The granularity (e.g., "second", "minute", "hour", "day"),
extracted from the regex.
relative_base (str): The base datetime expression (e.g., "now" or "today"),
determined by `get_relative_base`.
Returns:
str: The resulting datetime expression.
Raises:
ValueError: If the scope is invalid.
Relation to `time_range_lookup`:
- Processes queries like "last 2 weeks" or "this month".
- Example: "last 2 weeks" → `DATEADD(DATETIME('today'), -2, week)`.
"""
_delta = int(delta) if delta else 1
if scope.lower() == "this":
return f"DATETIME('{relative_base}')"
elif scope.lower() in ["last", "prior"]:
return f"DATEADD(DATETIME('{relative_base}'), -{_delta}, {unit})"
elif scope.lower() == "next":
return f"DATEADD(DATETIME('{relative_base}'), {_delta}, {unit})"
else:
raise ValueError(f"Invalid scope: {scope}")


def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-many-branches,too-many-statements # noqa: C901
time_range: str | None = None,
since: str | None = None,
@@ -241,21 +409,28 @@ def get_since_until( # pylint: disable=too-many-arguments,too-many-locals,too-m
if time_range and separator in time_range:
time_range_lookup = [
(
r"^last\s+(day|week|month|quarter|year)$",
lambda unit: f"DATEADD(DATETIME('{_relative_start}'), -1, {unit})",
),
(
r"^last\s+([0-9]+)\s+(second|minute|hour|day|week|month|year)s?$",
lambda delta,
unit: f"DATEADD(DATETIME('{_relative_start}'), -{int(delta)}, {unit})", # pylint: disable=line-too-long,useless-suppression
r"^(start of|beginning of|end of)\s+"
r"(this|last|next|prior)\s+"
r"([0-9]+)?\s*"
r"(day|week|month|quarter|year)s?$", # Matches phrases like "start of next month" # pylint: disable=line-too-long,useless-suppression # noqa: E501
lambda modifier, scope, delta, unit: handle_modifier_and_unit(
modifier,
scope,
delta,
unit,
get_relative_base(unit, relative_start),
),
),
(
r"^next\s+([0-9]+)\s+(second|minute|hour|day|week|month|year)s?$",
lambda delta,
unit: f"DATEADD(DATETIME('{_relative_end}'), {int(delta)}, {unit})", # pylint: disable=line-too-long,useless-suppression
r"^(this|last|next|prior)\s+"
r"([0-9]+)?\s*"
r"(second|minute|day|week|month|quarter|year)s?$", # Matches "next 5 days" or "last 2 weeks" # pylint: disable=line-too-long,useless-suppression # noqa: E501
lambda scope, delta, unit: handle_scope_and_unit(
scope, delta, unit, get_relative_base(unit, relative_start)
),
),
(
r"^(DATETIME.*|DATEADD.*|DATETRUNC.*|LASTDAY.*|HOLIDAY.*)$",
r"^(DATETIME.*|DATEADD.*|DATETRUNC.*|LASTDAY.*|HOLIDAY.*)$", # Matches date-related keywords # pylint: disable=line-too-long,useless-suppression # noqa: E501
lambda text: text,
),
]
40 changes: 40 additions & 0 deletions tests/unit_tests/utils/date_parser_tests.py
Original file line number Diff line number Diff line change
@@ -92,6 +92,46 @@ def test_get_since_until() -> None:
expected = datetime(2016, 11, 6), datetime(2016, 11, 8)
assert result == expected

result = get_since_until(" : now")
expected = None, datetime(2016, 11, 7, 9, 30, 10)
assert result == expected

result = get_since_until(" : last 2 minutes")
expected = None, datetime(2016, 11, 7, 9, 28, 10)
assert result == expected

result = get_since_until(" : prior 2 minutes")
expected = None, datetime(2016, 11, 7, 9, 28, 10)
assert result == expected

result = get_since_until(" : next 2 minutes")
expected = None, datetime(2016, 11, 7, 9, 32, 10)
assert result == expected

result = get_since_until("start of this month : ")
expected = datetime(2016, 11, 1), None
assert result == expected

result = get_since_until("start of next month : ")
expected = datetime(2016, 12, 1), None
assert result == expected

result = get_since_until("end of this month : ")
expected = datetime(2016, 11, 30), None
assert result == expected

result = get_since_until("end of next month : ")
expected = datetime(2016, 12, 31), None
assert result == expected

result = get_since_until("beginning of next year : ")
expected = datetime(2017, 1, 1), None
assert result == expected

result = get_since_until("beginning of last year : ")
expected = datetime(2015, 1, 1), None
assert result == expected

result = get_since_until("2018-01-01T00:00:00 : 2018-12-31T23:59:59")
expected = datetime(2018, 1, 1), datetime(2018, 12, 31, 23, 59, 59)
assert result == expected

0 comments on commit 63843c5

Please sign in to comment.