diff options
-rw-r--r-- | core/CronArchive/QueueConsumer.php | 37 | ||||
-rw-r--r-- | tests/PHPUnit/Integration/CronArchiveTest.php | 34 | ||||
-rw-r--r-- | tests/PHPUnit/Unit/CronArchive/QueueConsumerTest.php | 95 |
3 files changed, 136 insertions, 30 deletions
diff --git a/core/CronArchive/QueueConsumer.php b/core/CronArchive/QueueConsumer.php index f7a98b9498..9b8cc70033 100644 --- a/core/CronArchive/QueueConsumer.php +++ b/core/CronArchive/QueueConsumer.php @@ -215,7 +215,7 @@ class QueueConsumer continue; } - if ($this->hasIntersectingPeriod($archivesToProcess, $invalidatedArchive)) { + if (self::hasIntersectingPeriod($archivesToProcess, $invalidatedArchive)) { $this->logger->debug("Found archive with intersecting period with others in concurrent batch, skipping until next batch: $invalidationDesc"); $idinvalidation = $invalidatedArchive['idinvalidation']; @@ -260,7 +260,6 @@ class QueueConsumer continue; } - // TODO: should use descriptive string instead of just invalidation ID $reason = $this->shouldSkipArchiveBecauseLowerPeriodOrSegmentIsInProgress($invalidatedArchive); if ($reason) { $this->logger->debug("Skipping invalidated archive, $reason: $invalidationDesc"); @@ -398,6 +397,14 @@ class QueueConsumer continue; } + // we don't care about lower periods being concurrent if they are for different segments (that are not "all visits") + if (!empty($archiveBeingProcessed['segment']) + && !empty($archiveToProcess['segment']) + && $archiveBeingProcessed['segment'] != $archiveToProcess['segment'] + ) { + continue; + } + $archiveBeingProcessed['periodObj'] = PeriodFactory::build($archiveBeingProcessed['period'], $archiveBeingProcessed['date']); if ($this->isArchiveOfLowerPeriod($archiveToProcess, $archiveBeingProcessed)) { @@ -447,13 +454,32 @@ class QueueConsumer $archive['plugin'] = $this->getPluginNameForArchiveIfAny($archive); } - private function hasIntersectingPeriod(array $archivesToProcess, $invalidatedArchive) + // static so it can be unit tested + public static function hasIntersectingPeriod(array $archivesToProcess, $invalidatedArchive) { if (empty($archivesToProcess)) { return false; } foreach ($archivesToProcess as $archive) { + $isSamePeriod = $archive['period'] == $invalidatedArchive['period'] + && $archive['date1'] == $invalidatedArchive['date1'] + && $archive['date2'] == $invalidatedArchive['date2']; + + // don't do the check for $archvie, if we have the same period and segment as $invalidatedArchive + // we only want to to do the intersecting periods check if there are different periods or one of the + // invalidations is for an "all visits" archive. + // + // it's allowed to archive the same period concurrently for different segments, where neither is + // "All Visits" + if (!empty($archive['segment']) + && !empty($invalidatedArchive['segment']) + && $archive['segment'] != $invalidatedArchive['segment'] + && $isSamePeriod + ) { + continue; + } + if ($archive['periodObj']->isPeriodIntersectingWith($invalidatedArchive['periodObj'])) { return true; } @@ -517,13 +543,14 @@ class QueueConsumer private function getInvalidationDescription(array $invalidatedArchive) { - return sprintf("[idinvalidation = %s, idsite = %s, period = %s(%s - %s), name = %s]", + return sprintf("[idinvalidation = %s, idsite = %s, period = %s(%s - %s), name = %s, segment = %s]", $invalidatedArchive['idinvalidation'], $invalidatedArchive['idsite'], $this->periodIdsToLabels[$invalidatedArchive['period']], $invalidatedArchive['date1'], $invalidatedArchive['date2'], - $invalidatedArchive['name'] + $invalidatedArchive['name'], + $invalidatedArchive['segment'] ?? '' ); } diff --git a/tests/PHPUnit/Integration/CronArchiveTest.php b/tests/PHPUnit/Integration/CronArchiveTest.php index 00a8b4867e..4693092589 100644 --- a/tests/PHPUnit/Integration/CronArchiveTest.php +++ b/tests/PHPUnit/Integration/CronArchiveTest.php @@ -697,48 +697,31 @@ Checking for queued invalidations... Segment "actions>=2" was created or changed recently and will therefore archive today (for site ID = 1) Segment "actions>=4" was created or changed recently and will therefore archive today (for site ID = 1) Done invalidating -Found invalidated archive we can skip (no visits): [idinvalidation = 53, idsite = 1, period = day(2020-02-03 - 2020-02-03), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 54, idsite = 1, period = week(2020-02-03 - 2020-02-09), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 52, idsite = 1, period = day(2020-02-02 - 2020-02-02), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 50, idsite = 1, period = day(2020-02-01 - 2020-02-01), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 51, idsite = 1, period = month(2020-02-01 - 2020-02-29), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 46, idsite = 1, period = week(2020-01-27 - 2020-02-02), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 43, idsite = 1, period = day(2020-01-01 - 2020-01-01), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 44, idsite = 1, period = month(2020-01-01 - 2020-01-31), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 45, idsite = 1, period = year(2020-01-01 - 2020-12-31), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 35, idsite = 1, period = day(2019-12-31 - 2019-12-31), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 33, idsite = 1, period = day(2019-12-30 - 2019-12-30), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 34, idsite = 1, period = week(2019-12-30 - 2020-01-05), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 31, idsite = 1, period = day(2019-12-23 - 2019-12-23), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 32, idsite = 1, period = week(2019-12-23 - 2019-12-29), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 29, idsite = 1, period = day(2019-12-16 - 2019-12-16), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Found invalidated archive we can skip (no visits): [idinvalidation = 30, idsite = 1, period = week(2019-12-16 - 2019-12-22), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Processing invalidation: [idinvalidation = 5, idsite = 1, period = day(2019-12-12 - 2019-12-12), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. -Processing invalidation: [idinvalidation = 14, idsite = 1, period = day(2019-12-11 - 2019-12-11), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. -Processing invalidation: [idinvalidation = 17, idsite = 1, period = day(2019-12-10 - 2019-12-10), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. +Processing invalidation: [idinvalidation = 5, idsite = 1, period = day(2019-12-12 - 2019-12-12), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. +Processing invalidation: [idinvalidation = 14, idsite = 1, period = day(2019-12-11 - 2019-12-11), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. +Processing invalidation: [idinvalidation = 17, idsite = 1, period = day(2019-12-10 - 2019-12-10), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=day&date=2019-12-12&format=json&segment=actions%3E%3D2&trigger=archivephp Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=day&date=2019-12-11&format=json&segment=actions%3E%3D2&trigger=archivephp Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=day&date=2019-12-10&format=json&segment=actions%3E%3D2&trigger=archivephp Archived website id 1, period = day, date = 2019-12-12, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs Archived website id 1, period = day, date = 2019-12-11, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs Archived website id 1, period = day, date = 2019-12-10, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs -Found invalidated archive we can skip (no visits): [idinvalidation = 28, idsite = 1, period = day(2019-12-09 - 2019-12-09), name = donee0512c03f7c20af6ef96a8d792c6bb9f] -Processing invalidation: [idinvalidation = 6, idsite = 1, period = week(2019-12-09 - 2019-12-15), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. -Processing invalidation: [idinvalidation = 22, idsite = 1, period = day(2019-12-02 - 2019-12-02), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. +Processing invalidation: [idinvalidation = 6, idsite = 1, period = week(2019-12-09 - 2019-12-15), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. +Processing invalidation: [idinvalidation = 22, idsite = 1, period = day(2019-12-02 - 2019-12-02), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. No next invalidated archive. Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=week&date=2019-12-09&format=json&segment=actions%3E%3D2&trigger=archivephp Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=day&date=2019-12-02&format=json&segment=actions%3E%3D2&trigger=archivephp Archived website id 1, period = week, date = 2019-12-09, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs Archived website id 1, period = day, date = 2019-12-02, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs -Processing invalidation: [idinvalidation = 23, idsite = 1, period = week(2019-12-02 - 2019-12-08), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. +Processing invalidation: [idinvalidation = 23, idsite = 1, period = week(2019-12-02 - 2019-12-08), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. No next invalidated archive. Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=week&date=2019-12-02&format=json&segment=actions%3E%3D2&trigger=archivephp Archived website id 1, period = week, date = 2019-12-02, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs -Processing invalidation: [idinvalidation = 7, idsite = 1, period = month(2019-12-01 - 2019-12-31), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. +Processing invalidation: [idinvalidation = 7, idsite = 1, period = month(2019-12-01 - 2019-12-31), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. No next invalidated archive. Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=month&date=2019-12-01&format=json&segment=actions%3E%3D2&trigger=archivephp Archived website id 1, period = month, date = 2019-12-01, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs -Processing invalidation: [idinvalidation = 8, idsite = 1, period = year(2019-01-01 - 2019-12-31), name = donee0512c03f7c20af6ef96a8d792c6bb9f]. +Processing invalidation: [idinvalidation = 8, idsite = 1, period = year(2019-01-01 - 2019-12-31), name = donee0512c03f7c20af6ef96a8d792c6bb9f, segment = actions>=2]. No next invalidated archive. Starting archiving for ?module=API&method=CoreAdminHome.archiveReports&idSite=1&period=year&date=2019-01-01&format=json&segment=actions%3E%3D2&trigger=archivephp Archived website id 1, period = year, date = 2019-01-01, segment = 'actions%3E%3D2', 0 visits found. Time elapsed: %fs @@ -767,6 +750,7 @@ LOG; $output = array_filter($output, function ($l) { return strpos($l, 'Found archive with intersecting period') === false; }); $output = array_filter($output, function ($l) { return strpos($l, 'Found duplicate invalidated archive') === false; }); $output = array_filter($output, function ($l) { return strpos($l, 'No usable archive exists') === false; }); + $output = array_filter($output, function ($l) { return strpos($l, 'Found invalidated archive we can skip (no visits)') === false; }); $output = implode("\n", $output); return $output; } diff --git a/tests/PHPUnit/Unit/CronArchive/QueueConsumerTest.php b/tests/PHPUnit/Unit/CronArchive/QueueConsumerTest.php new file mode 100644 index 0000000000..f8353ae554 --- /dev/null +++ b/tests/PHPUnit/Unit/CronArchive/QueueConsumerTest.php @@ -0,0 +1,95 @@ +<?php +/** + * Matomo - free/libre analytics platform + * + * @link https://matomo.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + * + */ + +namespace PHPUnit\Unit\CronArchive; + + +use PHPUnit\Framework\TestCase; +use Piwik\CronArchive\QueueConsumer; +use Piwik\Period\Factory; +use Piwik\Piwik; + +class QueueConsumerTest extends TestCase +{ + /** + * @dataProvider getTestDataForHasIntersectingPeriod + */ + public function test_hasIntersectingPeriod($archivesToProcess, $invalidatedArchive, $expected) + { + $periods = array_flip(Piwik::$idPeriods); + foreach ($archivesToProcess as &$archive) { + $periodLabel = $periods[$archive['period']]; + $archive['periodObj'] = Factory::build($periodLabel, $archive['date1']); + } + + $periodLabel = $periods[$invalidatedArchive['period']]; + $invalidatedArchive['periodObj'] = Factory::build($periodLabel, $invalidatedArchive['date1']); + + $actual = QueueConsumer::hasIntersectingPeriod($archivesToProcess, $invalidatedArchive); + $this->assertEquals($expected, $actual); + } + + public function getTestDataForHasIntersectingPeriod() + { + return [ + // no intersecting periods + [ + [ + ['period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04'], + ['period' => 3, 'date1' => '2020-04-01', 'date2' => '2020-04-30'], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==abc'], + ], + ['period' => 1, 'date1' => '2020-03-05', 'date2' => '2020-03-05'], + false, + ], + + // intersecting periods + [ + [ + ['period' => 1, 'date1' => '2020-03-04', 'date2' => '2020-03-04'], + ['period' => 3, 'date1' => '2020-04-01', 'date2' => '2020-04-30'], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==abc'], + ], + ['period' => 2, 'date1' => '2020-03-02', 'date2' => '2020-03-08'], + true, + ], + + // all same period, different segments + [ + [ + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==def'], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==ghi'], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==abc'], + ], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==lmn'], + false, + ], + + // all same period, all visits in one + [ + [ + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => ''], + ], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==lmn'], + true, + ], + + // all same period, different segments, all visits in next + [ + [ + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==def'], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==ghi'], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15', 'segment' => 'pageUrl==abc'], + ], + ['period' => 1, 'date1' => '2020-03-15', 'date2' => '2020-03-15'], + true, + ], + ]; + } +}
\ No newline at end of file |