Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make rrule fast forwarding stable #15601

Merged
merged 16 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 67 additions & 27 deletions awx/main/models/schedules.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,61 @@
UTC_TIMEZONES = {x: tzutc() for x in dateutil.parser.parserinfo().UTCZONE}


def _check_valid_tzid(rrules):
for r in rrules:
if r._dtstart and r._dtstart.tzinfo is None:
raise ValueError('A valid TZID must be provided (e.g., America/New_York)')


def _fast_forward_rrules(rrules, ref_dt=None):
for i, rule in enumerate(rrules):
rrules[i] = _fast_forward_rrule(rule, ref_dt=ref_dt)
return rrules


def _fast_forward_rrule(rrule, ref_dt=None):
'''
Utility to fast forward an rrule, maintaining consistency in the resulting
occurrences.

Uses the .replace() method to update the rrule with a newer dtstart
The operation ensures that the original occurrences (based on the original dtstart)
will match the occurrences after changing the dtstart.

Returns a new rrule with a new dtstart
'''
if rrule._freq not in {dateutil.rrule.HOURLY, dateutil.rrule.MINUTELY}:
return rrule

if ref_dt is None:
ref_dt = now()

if rrule._dtstart > ref_dt:
return rrule

interval = rrule._interval if rrule._interval else 1
if rrule._freq == dateutil.rrule.HOURLY:
interval *= 60 * 60
elif rrule._freq == dateutil.rrule.MINUTELY:
interval *= 60

# if after converting to seconds the interval is still a fraction,
# just return original rrule
if isinstance(interval, float) and not interval.is_integer():
return rrule

seconds_since_dtstart = (ref_dt - rrule._dtstart).total_seconds()

# it is important to fast forward by a number that is divisible by
# interval. For example, if interval is 7 hours, we fast forward by 7, 14, 21, etc. hours.
# Otherwise, the occurrences after the fast forward might not match the ones before.
# x // y is integer division, lopping off any remainder, so that we get the outcome we want.
interval_aligned_offset = datetime.timedelta(seconds=(seconds_since_dtstart // interval) * interval)
new_start = rrule._dtstart + interval_aligned_offset
new_rrule = rrule.replace(dtstart=new_start)
return new_rrule


class ScheduleFilterMethods(object):
def enabled(self, enabled=True):
return self.filter(enabled=enabled)
Expand Down Expand Up @@ -194,41 +249,26 @@ def coerce_naive_until(cls, rrule):
return " ".join(rules)

@classmethod
def rrulestr(cls, rrule, fast_forward=True, **kwargs):
def rrulestr(cls, rrule, ref_dt=None, **kwargs):
"""
Apply our own custom rrule parsing requirements
"""
rrule = Schedule.coerce_naive_until(rrule)
kwargs['forceset'] = True
x = dateutil.rrule.rrulestr(rrule, tzinfos=UTC_TIMEZONES, **kwargs)
rruleset = dateutil.rrule.rrulestr(rrule, tzinfos=UTC_TIMEZONES, **kwargs)

for r in x._rrule:
if r._dtstart and r._dtstart.tzinfo is None:
raise ValueError('A valid TZID must be provided (e.g., America/New_York)')
_check_valid_tzid(rruleset._rrule)
_check_valid_tzid(rruleset._exrule)

# Fast forward is a way for us to limit the number of events in the rruleset
# If we are fastforwading and we don't have a count limited rule that is minutely or hourley
# We will modify the start date of the rule to last week to prevent a large number of entries
if fast_forward:
try:
# All rules in a ruleset will have the same dtstart value
# so lets compare the first event to now to see if its > 7 days old
first_event = x[0]
if (now() - first_event).days > 7:
for rule in x._rrule:
# If any rule has a minutely or hourly rule without a count...
if rule._freq in [dateutil.rrule.MINUTELY, dateutil.rrule.HOURLY] and not rule._count:
# hourly/minutely rrules with far-past DTSTART values
# are *really* slow to precompute
# start *from* one week ago to speed things up drastically
new_start = (now() - datetime.timedelta(days=7)).strftime('%Y%m%d')
# Now we want to repalce the DTSTART:<value>T with the new date (which includes the T)
new_rrule = re.sub('(DTSTART[^:]*):[^T]+T', r'\1:{0}T'.format(new_start), rrule)
return Schedule.rrulestr(new_rrule, fast_forward=False)
except IndexError:
pass

return x
# If we are fast forwarding and we don't have a count limited rule that is minutely or hourly
# We will modify the start date of the rule to bring as close to the current date as possible
# Even though the API restricts each rrule to have the same dtstart, each rrule in the rruleset
# can fast forward to a difference dtstart. This is required in order to get stable occurrences.
rruleset._rrule = _fast_forward_rrules(rruleset._rrule, ref_dt=ref_dt)
rruleset._exrule = _fast_forward_rrules(rruleset._exrule, ref_dt=ref_dt)

return rruleset

def __str__(self):
return u'%s_t%s_%s_%s' % (self.name, self.unified_job_template.id, self.id, self.next_run)
Expand Down
5 changes: 2 additions & 3 deletions awx/main/tests/functional/models/test_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,11 @@ def test_past_week_rrule(job_template, freq, delta):
def test_really_old_dtstart(job_template, freq, delta):
# see: https://github.com/ansible/awx/issues/8071
# If an event is per-minute/per-hour and was created a *really long*
# time ago, we should just bump forward to start counting "in the last week"
# time ago, we should just bump forward the dtstart
rrule = f'DTSTART;TZID=America/New_York:20150101T000000 RRULE:FREQ={freq};INTERVAL={delta}' # noqa
sched = Schedule.objects.create(name='example schedule', rrule=rrule, unified_job_template=job_template)
last_week = (datetime.utcnow() - timedelta(days=7)).date()
first_event = sched.rrulestr(sched.rrule)[0]
assert last_week == first_event.date()
assert now() - first_event < timedelta(days=1)

# the next few scheduled events should be the next minute/hour incremented
next_five_events = list(sched.rrulestr(sched.rrule).xafter(now(), count=5))
Expand Down
126 changes: 126 additions & 0 deletions awx/main/tests/unit/utils/test_schedule_fast_forward.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import pytest
import datetime
import dateutil

from django.utils.timezone import now

from awx.main.models.schedules import _fast_forward_rrule, Schedule
from dateutil.rrule import HOURLY, MINUTELY, MONTHLY

REF_DT = datetime.datetime(2024, 1, 1, tzinfo=datetime.timezone.utc)


@pytest.mark.parametrize(
'rrulestr',
[
'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=MINUTELY;INTERVAL=5',
'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=HOURLY;INTERVAL=5',
'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=YEARLY;INTERVAL=5',
'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=MINUTELY;INTERVAL=5;WKST=SU;BYMONTH=2,3;BYMONTHDAY=18;BYHOUR=5;BYMINUTE=35;BYSECOND=0',
'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=HOURLY;INTERVAL=5;WKST=SU;BYMONTH=2,3;BYHOUR=5',
],
)
def test_fast_forwarded_rrule_matches_original_occurrence(rrulestr):
'''
Assert that the resulting fast forwarded date is included in the original rrule
occurrence list
'''
rruleset = Schedule.rrulestr(rrulestr, ref_dt=REF_DT)

gen = rruleset.xafter(REF_DT, count=200)
occurrences = [i for i in gen]

orig_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
gen = orig_rruleset.xafter(REF_DT, count=200)
orig_occurrences = [i for i in gen]

assert occurrences == orig_occurrences


def test_fast_forward_rrule_hours():
'''
Generate an rrule for each hour of the day

Assert that the resulting fast forwarded date is included in the original rrule
occurrence list
'''
rrulestr_prefix = 'DTSTART;TZID=America/New_York:20201118T200000 RRULE:FREQ=HOURLY;'
for interval in range(1, 24):
rrulestr = f"{rrulestr_prefix}INTERVAL={interval}"
rruleset = Schedule.rrulestr(rrulestr, ref_dt=REF_DT)

gen = rruleset.xafter(REF_DT, count=200)
occurrences = [i for i in gen]

orig_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
gen = orig_rruleset.xafter(REF_DT, count=200)
orig_occurrences = [i for i in gen]

assert occurrences == orig_occurrences


def test_multiple_rrules():
'''
Create an rruleset that contains multiple rrules and an exrule
rruleA: freq HOURLY interval 5, dtstart should be fast forwarded
rruleB: freq HOURLY interval 7, dtstart should be fast forwarded
rruleC: freq MONTHLY interval 1, dtstart should not be fast forwarded
exruleA: freq HOURLY interval 5, dtstart should be fast forwarded
'''
rrulestr = '''DTSTART;TZID=America/New_York:20201118T200000
RRULE:FREQ=HOURLY;INTERVAL=5
RRULE:FREQ=HOURLY;INTERVAL=7
RRULE:FREQ=MONTHLY
EXRULE:FREQ=HOURLY;INTERVAL=5;BYDAY=MO,TU,WE'''
rruleset = Schedule.rrulestr(rrulestr, ref_dt=REF_DT)

rruleA, rruleB, rruleC = rruleset._rrule
exruleA = rruleset._exrule[0]

# assert that each rrule has its own dtstart
assert rruleA._dtstart != rruleB._dtstart
assert rruleA._dtstart != rruleC._dtstart

assert exruleA._dtstart == rruleA._dtstart

# the new dtstart should be within INTERVAL amount of hours from REF_DT
assert (REF_DT - rruleA._dtstart) < datetime.timedelta(hours=6)
assert (REF_DT - rruleB._dtstart) < datetime.timedelta(hours=8)
assert (REF_DT - exruleA._dtstart) < datetime.timedelta(hours=6)

# the freq=monthly rrule's dtstart should not have changed
dateutil_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
assert rruleC._dtstart == dateutil_rruleset._rrule[2]._dtstart

gen = rruleset.xafter(REF_DT, count=200)
occurrences = [i for i in gen]

orig_rruleset = dateutil.rrule.rrulestr(rrulestr, forceset=True)
gen = orig_rruleset.xafter(REF_DT, count=200)
orig_occurrences = [i for i in gen]

assert occurrences == orig_occurrences


def test_future_date_does_not_fast_forward():
dtstart = now() + datetime.timedelta(days=30)
rrule = dateutil.rrule.rrule(freq=HOURLY, interval=7, dtstart=dtstart)
new_rrule = _fast_forward_rrule(rrule, ref_dt=REF_DT)
assert new_rrule == rrule


@pytest.mark.parametrize(
('freq', 'interval'),
[
pytest.param(MINUTELY, 15.5555, id="freq-MINUTELY-interval-15.5555"),
pytest.param(MONTHLY, 1, id="freq-MONTHLY-interval-1"),
],
)
def test_does_not_fast_forward(freq, interval):
'''
Assert a couple of rrules that should not be fast forwarded
'''
dtstart = REF_DT - datetime.timedelta(days=30)
rrule = dateutil.rrule.rrule(freq=freq, interval=interval, dtstart=dtstart)

assert rrule == _fast_forward_rrule(rrule, ref_dt=REF_DT)
Loading