Skip to content

Commit

Permalink
Skip invalidating segments for today if flag is provided (#22546)
Browse files Browse the repository at this point in the history
* Skip invalidating segments for today if flag is provided

* Ensure invalidation skipping for yesterday is handled for each segment separately

* improve tests for --skip-segments-today

* Ensure segments created recently are still invalidated for today

* improve variable naming

* smaller code refactorings / improvements

* skip archiving segment for any period starting today if --skip-segments-today is given
  • Loading branch information
sgiehl authored Sep 2, 2024
1 parent 1733ef7 commit 7cbe3b4
Show file tree
Hide file tree
Showing 3 changed files with 542 additions and 72 deletions.
3 changes: 1 addition & 2 deletions core/Archive.php
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,7 @@ public static function factory(
public static function shouldSkipArchiveIfSkippingSegmentArchiveForToday(Site $site, Period $period, Segment $segment)
{
$now = Date::factory('now', $site->getTimezone());
return $period->getLabel() === 'day'
&& !$segment->isEmpty()
return !$segment->isEmpty()
&& $period->getDateStart()->toString() === $now->toString();
}

Expand Down
110 changes: 72 additions & 38 deletions core/CronArchive.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
use Piwik\DataAccess\Model;
use Piwik\Exception\UnexpectedWebsiteFoundException;
use Piwik\Metrics\Formatter;
use Piwik\Period\Day;
use Piwik\Period\Factory as PeriodFactory;
use Piwik\CronArchive\SegmentArchiving;
use Piwik\Period\Range;
Expand Down Expand Up @@ -866,19 +865,19 @@ private function invalidateArchivedReportsForSitesThatNeedToBeArchivedAgainImpl(

try {
$this->logger->debug(' Will invalidate archived reports for ' . $date . ' for following websites ids: ' . $listSiteIds);
$this->invalidateWithSegments($siteIdsToInvalidate, $date, $period = 'day');
$this->invalidateWithSegments($siteIdsToInvalidate, $date, 'day');
} catch (Exception $e) {
$message = ExceptionToTextProcessor::getMessageAndWholeBacktrace($e);
$this->logger->info(' Failed to invalidate archived reports: ' . $message);
}
}

// invalidate today if needed for all websites
$this->invalidateRecentDate('today', $idSiteToInvalidate);
$this->invalidateRecentDate('today', (int) $idSiteToInvalidate);

// invalidate yesterday archive if the time of the latest valid archive is earlier than today
// (means the day has changed and there might be more visits that weren't processed)
$this->invalidateRecentDate('yesterday', $idSiteToInvalidate);
$this->invalidateRecentDate('yesterday', (int) $idSiteToInvalidate);

// invalidate range archives
$dates = $this->getCustomDateRangeToPreProcess($idSiteToInvalidate);
Expand All @@ -893,15 +892,15 @@ private function invalidateArchivedReportsForSitesThatNeedToBeArchivedAgainImpl(

$this->logger->debug(' Invalidating custom date range ({date}) for site {idSite}', ['idSite' => $idSiteToInvalidate, 'date' => $date]);

$this->invalidateWithSegments($idSiteToInvalidate, $date, 'range', $_forceInvalidateNonexistent = true);
$this->invalidateWithSegments($idSiteToInvalidate, $date, 'range');
}

$this->setInvalidationTime();

$this->logger->debug("Done invalidating");
}

public function invalidateRecentDate($dateStr, $idSite)
public function invalidateRecentDate(string $dateStr, int $idSite): void
{
$timezone = Site::getTimezoneFor($idSite);
$date = Date::factoryInTimezone($dateStr, $timezone);
Expand All @@ -916,36 +915,34 @@ public function invalidateRecentDate($dateStr, $idSite)
}

$isYesterday = $dateStr === 'yesterday';
if ($isYesterday) {
// Skip invalidation for yesterday if archiving for yesterday was already started after midnight in site's timezone
$invalidationsInProgress = $this->model->getInvalidationsInProgress($idSite);
$today = Date::factoryInTimezone('today', $timezone);

foreach ($invalidationsInProgress as $invalidation) {
if (
$invalidation['period'] == Day::PERIOD_ID
&& $date->toString() === $invalidation['date1']
&& Date::factory($invalidation['ts_started'], $timezone)->getTimestamp() >= $today->getTimestamp()
) {
$this->logger->debug(" " . ucfirst($dateStr) . " archive already in process for idSite = $idSite, skipping invalidation...");
return;
}
}
}
$isToday = $dateStr === 'today';

$this->logger->info(" Will invalidate archived reports for $dateStr in site ID = {idSite}'s timezone ({date}).", [
'idSite' => $idSite,
'date' => $date->getDatetime(),
]);

$onlyProcessSegmentsChangedRecently = $this->archiveFilter->isSkipSegmentsForToday() && $isToday;

// if we are invalidating yesterday here, we are only interested in checking if there is no archive for yesterday, or the day has changed since
// the last archive was archived (in which there may have been more visits before midnight). so we disable the ttl check, since any archive
// will be good enough, if the date hasn't changed.
$this->invalidateWithSegments([$idSite], $date->toString(), 'day', false, $doNotIncludeTtlInExistingArchiveCheck = $isYesterday);
$this->invalidateWithSegments(
[$idSite],
$date->toString(),
'day',
$isYesterday,
$onlyProcessSegmentsChangedRecently
);
}

private function invalidateWithSegments($idSites, $date, $period, $_forceInvalidateNonexistent = false, $doNotIncludeTtlInExistingArchiveCheck = false)
{
private function invalidateWithSegments(
$idSites,
$date,
string $period,
bool $skipWhenRunningOrNewEnoughArchiveExists = false,
bool $onlyProcessSegmentsChangedRecently = false
) {
if ($date instanceof Date) {
$date = $date->toString();
}
Expand All @@ -972,36 +969,51 @@ private function invalidateWithSegments($idSites, $date, $period, $_forceInvalid
$periodObj->getDateTimeEnd()->setTimezone($site->getTimezone())
)
);
if ($this->canWeSkipInvalidatingBecauseThereIsAUsablePeriod($params, $doNotIncludeTtlInExistingArchiveCheck)) {

if ($this->canWeSkipInvalidatingBecauseThereIsAUsablePeriod($params, $skipWhenRunningOrNewEnoughArchiveExists)) {
$this->logger->debug(' Found usable archive for {archive}, skipping invalidation.', ['archive' => $params]);
} elseif ($skipWhenRunningOrNewEnoughArchiveExists && $this->canWeSkipInvalidatingBecauseInvalidationAlreadyInProgress($site->getId(), $periodObj)) {
$this->logger->debug(' Invalidation for {archive} already in progress, skipping invalidation.', ['archive' => $params]);
} else {
$this->getApiToInvalidateArchivedReport()->invalidateArchivedReports(
$idSite,
$date,
$period,
$segment = false,
$cascadeDown = false,
$_forceInvalidateNonexistent
$period === 'range'
);
}

$allSegments = $this->segmentArchiving->getAllSegments();

foreach ($this->segmentArchiving->getAllSegmentsToArchive($idSite) as $segmentDefinition) {
// check if the segment is available
// check if the segment is available
if (!$this->isSegmentAvailable($segmentDefinition, [$idSite])) {
continue;
}

if ($onlyProcessSegmentsChangedRecently && !$this->wasSegmentChangedRecently($segmentDefinition, $allSegments)) {
continue;
}

$segmentObj = new Segment(
$segmentDefinition,
[$idSite],
$periodObj->getDateTimeStart()->setTimezone($site->getTimezone()),
$periodObj->getDateTimeEnd()->setTimezone($site->getTimezone())
);

$params = new Parameters(
$site,
$periodObj,
new Segment(
$segmentDefinition,
[$idSite],
$periodObj->getDateTimeStart()->setTimezone($site->getTimezone()),
$periodObj->getDateTimeEnd()->setTimezone($site->getTimezone())
)
$segmentObj
);
if ($this->canWeSkipInvalidatingBecauseThereIsAUsablePeriod($params, $doNotIncludeTtlInExistingArchiveCheck)) {

if ($this->canWeSkipInvalidatingBecauseThereIsAUsablePeriod($params, $skipWhenRunningOrNewEnoughArchiveExists)) {
$this->logger->debug(' Found usable archive for {archive}, skipping invalidation.', ['archive' => $params]);
} elseif ($skipWhenRunningOrNewEnoughArchiveExists && $this->canWeSkipInvalidatingBecauseInvalidationAlreadyInProgress($site->getId(), $periodObj, $segmentObj)) {
$this->logger->debug(' Invalidation for {archive} already in progress, skipping invalidation.', ['archive' => $params]);
} else {
if (empty($this->segmentArchiving)) {
// might not be initialised if init is not called
Expand All @@ -1026,7 +1038,7 @@ private function invalidateWithSegments($idSites, $date, $period, $_forceInvalid
$period,
$segmentDefinition,
$cascadeDown = false,
$_forceInvalidateNonexistent
$period === 'range'
);
}
}
Expand All @@ -1040,7 +1052,7 @@ private function invalidateWithSegments($idSites, $date, $period, $_forceInvalid
* @param $idSites
* @return bool
*/
protected function isSegmentAvailable($segmentDefinition, $idSites)
protected function isSegmentAvailable($segmentDefinition, $idSites): bool
{
try {
new Segment($segmentDefinition, $idSites);
Expand All @@ -1051,14 +1063,36 @@ protected function isSegmentAvailable($segmentDefinition, $idSites)
return true;
}

private function canWeSkipInvalidatingBecauseInvalidationAlreadyInProgress(int $idSite, Period $period, Segment $segment = null): bool
{
$invalidationsInProgress = $this->model->getInvalidationsInProgress($idSite);
$timezone = Site::getTimezoneFor($idSite);

$doneFlag = Rules::getDoneFlagArchiveContainsAllPlugins($segment ?? new Segment('', [$idSite]));
$endOfDayInTimezone = $period->getDateEnd()->getEndOfDay();

foreach ($invalidationsInProgress as $invalidation) {
if (
$invalidation['name'] === $doneFlag
&& $invalidation['period'] == $period->getId()
&& $period->getDateStart()->toString() === $invalidation['date1']
&& Date::factory($invalidation['ts_started'], $timezone)->isLater($endOfDayInTimezone)
) {
return true;
}
}

return false;
}

/**
* Returns true if there is an existing valid period we can use, or false if there isn't and the invalidation should go through.
*
* Note: this method should only be used in the context of invalidation.
*
* @params Parameters $params The parameters for the archive we want to invalidate.
*/
public function canWeSkipInvalidatingBecauseThereIsAUsablePeriod(Parameters $params, $doNotIncludeTtlInExistingArchiveCheck = false): bool
private function canWeSkipInvalidatingBecauseThereIsAUsablePeriod(Parameters $params, $doNotIncludeTtlInExistingArchiveCheck = false): bool
{
$timezone = Site::getTimezoneFor($params->getSite()->getId());
$today = Date::factoryInTimezone('today', $timezone);
Expand Down
Loading

0 comments on commit 7cbe3b4

Please sign in to comment.