diff options
author | dizzy <diosmosis@users.noreply.github.com> | 2021-02-05 01:22:11 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-02-05 01:22:11 +0300 |
commit | 490dc3a1a09e4263b4893cbbbf600ba505d9add4 (patch) | |
tree | 2fca42614fec12907ecac1ae217e1305baa4144a /core | |
parent | bc53bc36ebebfe50d83d6ce381aa8bb6cd2cb15c (diff) |
Refactor segment re-archiving in past behavior to be on demand (#17005)
* Invalidate past archives on demand when adding/updating segments, rather than trying to check when running core:archive
* start on rewriting test
* rewrite SegmentArchivingTest and get to pass
* get sites as superuser
* add update to rearchive segments if they were created/update between last archive time and update time
* remove unused parameter
* fix build
* fix tests
* sanity check
* fix bug, we should not forget archives to invalidate unless all related archives are being invalidated
* fix tests and make fix more complete
* fix test
* update counts in test
* fix test for last time hopefully
* fix another test
* remove debugging code
Diffstat (limited to 'core')
-rw-r--r-- | core/Archive/ArchiveInvalidator.php | 39 | ||||
-rw-r--r-- | core/CronArchive.php | 34 | ||||
-rw-r--r-- | core/CronArchive/SegmentArchiving.php | 173 | ||||
-rw-r--r-- | core/Updates/4.1.2-b1.php | 32 |
4 files changed, 99 insertions, 179 deletions
diff --git a/core/Archive/ArchiveInvalidator.php b/core/Archive/ArchiveInvalidator.php index 2301c3ed97..f0aa119385 100644 --- a/core/Archive/ArchiveInvalidator.php +++ b/core/Archive/ArchiveInvalidator.php @@ -12,6 +12,7 @@ namespace Piwik\Archive; use Piwik\Archive\ArchiveInvalidator\InvalidationResult; use Piwik\ArchiveProcessor\ArchivingStatus; use Piwik\ArchiveProcessor\Loader; +use Piwik\ArchiveProcessor\Rules; use Piwik\Config; use Piwik\Container\StaticContainer; use Piwik\CronArchive\ReArchiveList; @@ -332,7 +333,11 @@ class ArchiveInvalidator Loader::invalidateMinVisitTimeCache($idSite); } - if ($period != 'range') { + $isInvalidatingDays = $period == 'day' || $cascadeDown || empty($period); + $isNotInvalidatingSegment = empty($segment) || empty($segment->getString()); + if ($isInvalidatingDays + && $isNotInvalidatingSegment + ) { foreach ($idSites as $idSite) { foreach ($dates as $date) { if (is_string($date)) { @@ -468,7 +473,7 @@ class ArchiveInvalidator * @throws \Exception * @api */ - public function reArchiveReport($idSites, string $plugin, string $report = null, Date $startDate = null) + public function reArchiveReport($idSites, string $plugin = null, string $report = null, Date $startDate = null, Segment $segment = null) { $date2 = Date::yesterday(); @@ -500,23 +505,13 @@ class ArchiveInvalidator $name .= '.' . $report; } - $this->markArchivesAsInvalidated($idSites, $dates, 'day', null, $cascadeDown = false, $forceInvalidateRanges = false, $name); - foreach ($idSites as $idSite) { - $segmentDatesToInvalidate = $this->getSegmentArchiving()->getSegmentArchivesToInvalidate($idSite); - foreach ($segmentDatesToInvalidate as $info) { - $latestDate = Date::factory($info['date']); - $latestDate = $latestDate->isEarlier($startDate) ? $startDate : $latestDate; - - $datesToInvalidateForSegment = []; - - $date = $latestDate; - while ($date->isEarlier($date2)) { - $datesToInvalidateForSegment[] = $date; - $date = $date->addDay(1); + $this->markArchivesAsInvalidated($idSites, $dates, 'day', $segment, $cascadeDown = false, $forceInvalidateRanges = false, $name); + if (empty($segment)) { + foreach ($idSites as $idSite) { + foreach (Rules::getSegmentsToProcess([$idSite]) as $segment) { + $this->markArchivesAsInvalidated($idSites, $dates, 'day', new Segment($segment, [$idSite]), + $cascadeDown = false, $forceInvalidateRanges = false, $name); } - - $this->markArchivesAsInvalidated($idSites, $datesToInvalidateForSegment, 'day', new Segment($info['segment'], [$idSite]), - $cascadeDown = false, $forceInvalidateRanges = false, $name); } } } @@ -548,7 +543,8 @@ class ArchiveInvalidator * @param string|null $report * @param Date|null $startDate */ - public function scheduleReArchiving($idSites, string $pluginName, $report = null, Date $startDate = null) + public function scheduleReArchiving($idSites, string $pluginName = null, $report = null, Date $startDate = null, + Segment $segment = null) { if (!empty($report)) { $this->removeInvalidationsSafely($idSites, $pluginName, $report); @@ -560,6 +556,7 @@ class ArchiveInvalidator 'pluginName' => $pluginName, 'report' => $report, 'startDate' => $startDate ? $startDate->getTimestamp() : null, + 'segment' => $segment ? $segment->getString() : null, ])); } catch (\Throwable $ex) { $this->logger->info("Failed to schedule rearchiving of past reports for $pluginName plugin."); @@ -581,11 +578,13 @@ class ArchiveInvalidator continue; } + $idSites = Site::getIdSitesFromIdSitesString($entry['idSites']); $this->reArchiveReport( $entry['idSites'], $entry['pluginName'], $entry['report'], - !empty($entry['startDate']) ? Date::factory((int) $entry['startDate']) : null + !empty($entry['startDate']) ? Date::factory((int) $entry['startDate']) : null, + !empty($entry['segment']) ? new Segment($entry['segment'], $idSites) : null ); } catch (\Throwable $ex) { $this->logger->info("Failed to create invalidations for report re-archiving (idSites = {idSites}, pluginName = {pluginName}, report = {report}, startDate = {startDateTs}): {ex}", [ diff --git a/core/CronArchive.php b/core/CronArchive.php index 067b85e6db..8d634000ba 100644 --- a/core/CronArchive.php +++ b/core/CronArchive.php @@ -199,8 +199,6 @@ class CronArchive */ private $periodIdsToLabels; - private $processNewSegmentsFrom; - /** * @var ArchiveFilter */ @@ -219,17 +217,13 @@ class CronArchive /** * Constructor. * - * @param string|null $processNewSegmentsFrom When to archive new segments from. See [General] process_new_segments_from - * for possible values. * @param LoggerInterface|null $logger */ - public function __construct($processNewSegmentsFrom = null, LoggerInterface $logger = null) + public function __construct(LoggerInterface $logger = null) { $this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface'); $this->formatter = new Formatter(); - $this->processNewSegmentsFrom = $processNewSegmentsFrom ?: StaticContainer::get('ini.General.process_new_segments_from'); - $this->invalidator = StaticContainer::get('Piwik\Archive\ArchiveInvalidator'); $this->isArchiveProfilingEnabled = Config::getInstance()->Debug['archiving_profile'] == 1; @@ -276,7 +270,7 @@ class CronArchive public function init() { - $this->segmentArchiving = new SegmentArchiving($this->processNewSegmentsFrom, $this->dateLastForced); + $this->segmentArchiving = StaticContainer::get(SegmentArchiving::class); /** * This event is triggered during initializing archiving. @@ -567,7 +561,7 @@ class CronArchive $visits = (int) $visits; $this->logger->info("Archived website id {$params['idSite']}, period = {$params['period']}, date = " - . "{$params['date']}, segment = '" . (isset($params['segment']) ? urldecode($params['segment']) : '') . "', " + . "{$params['date']}, segment = '" . (isset($params['segment']) ? urldecode(urldecode($params['segment'])) : '') . "', " . ($plugin ? "plugin = $plugin, " : "") . ($report ? "report = $report, " : "") . "$visits visits found. $timer"); } @@ -778,7 +772,7 @@ class CronArchive { if (empty($this->segmentArchiving)) { // might not be initialised if init is not called - $this->segmentArchiving = new SegmentArchiving($this->processNewSegmentsFrom, $this->dateLastForced); + $this->segmentArchiving = StaticContainer::get(SegmentArchiving::class); } $this->logger->debug("Checking for queued invalidations..."); @@ -838,26 +832,6 @@ class CronArchive $this->invalidateWithSegments($idSiteToInvalidate, $date, 'range', $_forceInvalidateNonexistant = true); } - // for new segments, invalidate past dates - $segmentDatesToInvalidate = $this->segmentArchiving->getSegmentArchivesToInvalidateForNewSegments($idSiteToInvalidate); - - foreach ($segmentDatesToInvalidate as $info) { - $this->logger->info(' Segment "{segment}" was created or changed recently and will therefore archive today (for site ID = {idSite})', [ - 'segment' => $info['segment'], - 'idSite' => $idSiteToInvalidate, - ]); - - $earliestDate = $info['date']; - - $allDates = PeriodFactory::build('range', $earliestDate . ',today')->getSubperiods(); - $allDates = array_map(function (Period $p) { - return $p->getDateStart()->toString(); - }, $allDates); - $allDates = implode(',', $allDates); - - $this->getApiToInvalidateArchivedReport()->invalidateArchivedReports($idSiteToInvalidate, $allDates, $period = false, $info['segment']); - } - $this->setInvalidationTime(); $this->logger->debug("Done invalidating"); diff --git a/core/CronArchive/SegmentArchiving.php b/core/CronArchive/SegmentArchiving.php index 1e68b3639f..923f3d4004 100644 --- a/core/CronArchive/SegmentArchiving.php +++ b/core/CronArchive/SegmentArchiving.php @@ -9,6 +9,8 @@ namespace Piwik\CronArchive; use Doctrine\Common\Cache\Cache; use Matomo\Cache\Transient; +use Piwik\Access; +use Piwik\Archive\ArchiveInvalidator; use Piwik\ArchiveProcessor\Rules; use Piwik\Common; use Piwik\Container\StaticContainer; @@ -65,11 +67,11 @@ class SegmentArchiving */ private $forceArchiveAllSegments; - public function __construct($processNewSegmentsFrom, $beginningOfTimeLastNInYears = self::DEFAULT_BEGINNING_OF_TIME_LAST_N_YEARS, + public function __construct($beginningOfTimeLastNInYears = self::DEFAULT_BEGINNING_OF_TIME_LAST_N_YEARS, Model $segmentEditorModel = null, Cache $segmentListCache = null, Date $now = null, LoggerInterface $logger = null) { - $this->processNewSegmentsFrom = $processNewSegmentsFrom; + $this->processNewSegmentsFrom = StaticContainer::get('ini.General.process_new_segments_from'); $this->beginningOfTimeLastNInYears = $beginningOfTimeLastNInYears; $this->segmentEditorModel = $segmentEditorModel ?: new Model(); $this->segmentListCache = $segmentListCache ?: new Transient(); @@ -78,48 +80,6 @@ class SegmentArchiving $this->forceArchiveAllSegments = $this->getShouldForceArchiveAllSegments(); } - public function getSegmentArchivesToInvalidateForNewSegments($idSite) - { - return $this->getSegmentArchivesToInvalidate($idSite, true); - } - - public function getSegmentArchivesToInvalidate($idSite, $checkOnlyForNewSegments = false) - { - $result = []; - - $segmentsForSite = $this->getAllSegments(); - foreach ($segmentsForSite as $storedSegment) { - if (!$this->isAutoArchivingEnabledFor($storedSegment) - || !$this->isSegmentForSite($storedSegment, $idSite) - ) { - continue; - } - - $oldestDateToProcessForNewSegment = $this->getOldestDateToProcessForNewSegment($idSite, $storedSegment, $checkOnlyForNewSegments); - if (empty($oldestDateToProcessForNewSegment)) { - continue; - } - - $found = false; - foreach ($result as $segment) { - if ($segment['segment'] == $storedSegment['definition']) { - $segment['date'] = $segment['date']->isEarlier($oldestDateToProcessForNewSegment) ? $segment['date'] : $oldestDateToProcessForNewSegment; - - $found = true; - break; - } - } - - if (!$found) { - $result[] = [ - 'date' => $oldestDateToProcessForNewSegment, - 'segment' => $storedSegment['definition'], - ]; - } - } - return $result; - } - public function findSegmentForHash($hash, $idSite) { foreach ($this->getAllSegments() as $segment) { @@ -143,50 +103,27 @@ class SegmentArchiving return null; } - private function getOldestDateToProcessForNewSegment($idSite, $storedSegment, $checkOnlyForNewSegments) + public function getReArchiveSegmentStartDate($segmentInfo) { /** * @var Date $segmentCreatedTime * @var Date $segmentLastEditedTime */ - list($segmentCreatedTime, $segmentLastEditedTime) = $this->getCreatedTimeOfSegment($idSite, $storedSegment); + list($segmentCreatedTime, $segmentLastEditedTime) = $this->getCreatedTimeOfSegment($segmentInfo); if (empty($segmentCreatedTime)) { return null; } - $lastInvalidationTime = CronArchive::getLastInvalidationTime(); - if (!empty($lastInvalidationTime)) { - $lastInvalidationTime = Date::factory((int) $lastInvalidationTime); - } - - $segmentTimeToUse = $segmentLastEditedTime ?: $segmentCreatedTime; - if ($checkOnlyForNewSegments) { - if (!empty($lastInvalidationTime) - && !empty($segmentTimeToUse) - && $segmentTimeToUse->isEarlier($lastInvalidationTime) - ) { - return null; // has already have been invalidated, ignore - } - } - - if ($this->processNewSegmentsFrom == self::CREATION_TIME) { + if ($this->processNewSegmentsFrom == SegmentArchiving::CREATION_TIME) { $this->logger->debug("process_new_segments_from set to segment_creation_time, oldest date to process is {time}", array('time' => $segmentCreatedTime)); return $segmentCreatedTime; - } elseif ($this->processNewSegmentsFrom == self::LAST_EDIT_TIME) { + } else if ($this->processNewSegmentsFrom == SegmentArchiving::LAST_EDIT_TIME) { $this->logger->debug("process_new_segments_from set to segment_last_edit_time, segment last edit time is {time}", array('time' => $segmentLastEditedTime)); - if ($segmentLastEditedTime === null - || $segmentLastEditedTime->getTimestamp() < $segmentCreatedTime->getTimestamp() - ) { - $this->logger->debug("segment last edit time is older than created time, using created time instead"); - - $segmentLastEditedTime = $segmentCreatedTime; - } - return $segmentLastEditedTime; - } elseif (preg_match("/^last([0-9]+)$/", $this->processNewSegmentsFrom, $matches)) { + } else if (preg_match("/^last([0-9]+)$/", $this->processNewSegmentsFrom, $matches)) { $lastN = $matches[1]; list($lastDate, $lastPeriod) = Range::getDateXPeriodsAgo($lastN, $segmentCreatedTime, 'day'); @@ -198,11 +135,15 @@ class SegmentArchiving } else { $this->logger->debug("process_new_segments_from set to beginning_of_time or cannot recognize value"); - $siteCreationDate = Date::factory(Site::getCreationDateFor($idSite)); - $result = Date::factory('today')->subYear($this->beginningOfTimeLastNInYears); - if ($result->isEarlier($siteCreationDate)) { - $result = $siteCreationDate; + + $idSite = $segmentInfo['enable_only_idsite'] ?? null; + if (!empty($idSite)) { + $siteCreationDate = Date::factory(Site::getCreationDateFor($idSite)); + + if ($result->isEarlier($siteCreationDate)) { + $result = $siteCreationDate; + } } $earliestVisitTime = $this->getEarliestVisitTimeFor($idSite); @@ -216,6 +157,22 @@ class SegmentArchiving } } + private function getCreatedTimeOfSegment($storedSegment) + { + // check for an earlier ts_created timestamp + $createdTime = empty($storedSegment['ts_created']) ? null : Date::factory($storedSegment['ts_created']); + + // if there is no ts_last_edit timestamp, initialize it to ts_created + if (empty($storedSegment['ts_last_edit'])) { + $storedSegment['ts_last_edit'] = empty($storedSegment['ts_created']) ? null : $storedSegment['ts_created']; + } + + // check for a later ts_last_edit timestamp + $lastEditTime = empty($storedSegment['ts_last_edit']) ? null : Date::factory($storedSegment['ts_last_edit']); + + return array($createdTime, $lastEditTime); + } + private function getEarliestVisitTimeFor($idSite) { $earliestIdVisit = Db::fetchOne('SELECT idvisit FROM ' . Common::prefixTable('log_visit') @@ -232,52 +189,6 @@ class SegmentArchiving return Date::factory($earliestStartTime); } - private function getCreatedTimeOfSegment($idSite, $storedSegment) - { - /** @var Date $latestEditTime */ - $latestEditTime = null; - $earliestCreatedTime = $this->now; - if (empty($storedSegment['ts_created']) - || empty($storedSegment['definition']) - || !isset($storedSegment['enable_only_idsite']) - || !$this->isSegmentForSite($storedSegment, $idSite) - ) { - return [null, null]; - } - - // check for an earlier ts_created timestamp - $createdTime = Date::factory($storedSegment['ts_created']); - if ($createdTime->getTimestamp() < $earliestCreatedTime->getTimestamp()) { - $earliestCreatedTime = $createdTime; - } - - // if there is no ts_last_edit timestamp, initialize it to ts_created - if (empty($storedSegment['ts_last_edit'])) { - $storedSegment['ts_last_edit'] = $storedSegment['ts_created']; - } - - // check for a later ts_last_edit timestamp - $lastEditTime = Date::factory($storedSegment['ts_last_edit']); - if ($latestEditTime === null - || $latestEditTime->getTimestamp() < $lastEditTime->getTimestamp() - ) { - $latestEditTime = $lastEditTime; - } - - $this->logger->debug( - "Earliest created time of segment '{segment}' w/ idSite = {idSite} is found to be {createdTime}. Latest " . - "edit time is found to be {latestEditTime}.", - array( - 'segment' => $storedSegment['definition'], - 'idSite' => $idSite, - 'createdTime' => $earliestCreatedTime, - 'latestEditTime' => $latestEditTime, - ) - ); - - return array($earliestCreatedTime, $latestEditTime); - } - public function getAllSegments() { if (!$this->segmentListCache->contains('all')) { @@ -309,4 +220,22 @@ class SegmentArchiving { return !Rules::isBrowserTriggerEnabled() && !Rules::isBrowserArchivingAvailableForSegments(); } + + public function reArchiveSegment($segmentInfo) + { + if (empty($segmentInfo['definition'])) { // sanity check + return; + } + + $definition = $segmentInfo['definition']; + $idSite = $segmentInfo['enable_only_idsite'] ?? 'all'; + + $idSites = Access::doAsSuperUser(function () use ($idSite) { + return Site::getIdSitesFromIdSitesString($idSite); + }); + $startDate = $this->getReArchiveSegmentStartDate($segmentInfo); + + $invalidator = StaticContainer::get(ArchiveInvalidator::class); + $invalidator->scheduleReArchiving($idSites, null, null, $startDate, new Segment($definition, $idSites)); + } } diff --git a/core/Updates/4.1.2-b1.php b/core/Updates/4.1.2-b1.php index fbd9d52fd8..8093bb9dcd 100644 --- a/core/Updates/4.1.2-b1.php +++ b/core/Updates/4.1.2-b1.php @@ -9,10 +9,12 @@ namespace Piwik\Updates; -use Piwik\Archive\ArchiveInvalidator; -use Piwik\ArchiveProcessor\Rules; use Piwik\Container\StaticContainer; +use Piwik\CronArchive; use Piwik\Date; +use Piwik\Plugins\SegmentEditor\API; +use Piwik\Archive\ArchiveInvalidator; +use Piwik\ArchiveProcessor\Rules; use Piwik\Updater; use Piwik\Updates as PiwikUpdates; use Piwik\Updater\Migration\Factory as MigrationFactory; @@ -29,6 +31,11 @@ class Updates_4_1_2_b1 extends PiwikUpdates $this->migration = $factory; } + public function doUpdate(Updater $updater) + { + $updater->executeMigrations(__FILE__, $this->getMigrations($updater)); + } + public function getMigrations(Updater $updater) { $migrations = []; @@ -44,12 +51,23 @@ class Updates_4_1_2_b1 extends PiwikUpdates }, $cmdStr); } - return $migrations; - } + $migrations[] = new Updater\Migration\Custom(function () { + $segmentArchiving = StaticContainer::get(CronArchive\SegmentArchiving::class); + $timeOfLastInvalidateTime = CronArchive::getLastInvalidationTime(); - public function doUpdate(Updater $updater) - { - $updater->executeMigrations(__FILE__, $this->getMigrations($updater)); + $segments = API::getInstance()->getAll(); + foreach ($segments as $segment) { + $tsCreated = !empty($segment['ts_created']) ? Date::factory($segment['ts_created'])->getTimestamp() : 0; + $tsLastEdit = !empty($segment['ts_last_edit']) ? Date::factory($segment['ts_last_edit'])->getTimestamp() : null; + $timeToUse = max($tsCreated, $tsLastEdit); + + if ($timeToUse > $timeOfLastInvalidateTime) { + $segmentArchiving->reArchiveSegment($segment); + } + } + }, ''); + + return $migrations; } private function getInvalidateCommand(Date $dateOfMatomo4Release) |