Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authordizzy <diosmosis@users.noreply.github.com>2021-02-05 01:22:11 +0300
committerGitHub <noreply@github.com>2021-02-05 01:22:11 +0300
commit490dc3a1a09e4263b4893cbbbf600ba505d9add4 (patch)
tree2fca42614fec12907ecac1ae217e1305baa4144a /core
parentbc53bc36ebebfe50d83d6ce381aa8bb6cd2cb15c (diff)
Refactor segment re-archiving in past behavior to be on demand (#17005)
* Invalidate past archives on demand when adding/updating segments, rather than trying to check when running core:archive * start on rewriting test * rewrite SegmentArchivingTest and get to pass * get sites as superuser * add update to rearchive segments if they were created/update between last archive time and update time * remove unused parameter * fix build * fix tests * sanity check * fix bug, we should not forget archives to invalidate unless all related archives are being invalidated * fix tests and make fix more complete * fix test * update counts in test * fix test for last time hopefully * fix another test * remove debugging code
Diffstat (limited to 'core')
-rw-r--r--core/Archive/ArchiveInvalidator.php39
-rw-r--r--core/CronArchive.php34
-rw-r--r--core/CronArchive/SegmentArchiving.php173
-rw-r--r--core/Updates/4.1.2-b1.php32
4 files changed, 99 insertions, 179 deletions
diff --git a/core/Archive/ArchiveInvalidator.php b/core/Archive/ArchiveInvalidator.php
index 2301c3ed97..f0aa119385 100644
--- a/core/Archive/ArchiveInvalidator.php
+++ b/core/Archive/ArchiveInvalidator.php
@@ -12,6 +12,7 @@ namespace Piwik\Archive;
use Piwik\Archive\ArchiveInvalidator\InvalidationResult;
use Piwik\ArchiveProcessor\ArchivingStatus;
use Piwik\ArchiveProcessor\Loader;
+use Piwik\ArchiveProcessor\Rules;
use Piwik\Config;
use Piwik\Container\StaticContainer;
use Piwik\CronArchive\ReArchiveList;
@@ -332,7 +333,11 @@ class ArchiveInvalidator
Loader::invalidateMinVisitTimeCache($idSite);
}
- if ($period != 'range') {
+ $isInvalidatingDays = $period == 'day' || $cascadeDown || empty($period);
+ $isNotInvalidatingSegment = empty($segment) || empty($segment->getString());
+ if ($isInvalidatingDays
+ && $isNotInvalidatingSegment
+ ) {
foreach ($idSites as $idSite) {
foreach ($dates as $date) {
if (is_string($date)) {
@@ -468,7 +473,7 @@ class ArchiveInvalidator
* @throws \Exception
* @api
*/
- public function reArchiveReport($idSites, string $plugin, string $report = null, Date $startDate = null)
+ public function reArchiveReport($idSites, string $plugin = null, string $report = null, Date $startDate = null, Segment $segment = null)
{
$date2 = Date::yesterday();
@@ -500,23 +505,13 @@ class ArchiveInvalidator
$name .= '.' . $report;
}
- $this->markArchivesAsInvalidated($idSites, $dates, 'day', null, $cascadeDown = false, $forceInvalidateRanges = false, $name);
- foreach ($idSites as $idSite) {
- $segmentDatesToInvalidate = $this->getSegmentArchiving()->getSegmentArchivesToInvalidate($idSite);
- foreach ($segmentDatesToInvalidate as $info) {
- $latestDate = Date::factory($info['date']);
- $latestDate = $latestDate->isEarlier($startDate) ? $startDate : $latestDate;
-
- $datesToInvalidateForSegment = [];
-
- $date = $latestDate;
- while ($date->isEarlier($date2)) {
- $datesToInvalidateForSegment[] = $date;
- $date = $date->addDay(1);
+ $this->markArchivesAsInvalidated($idSites, $dates, 'day', $segment, $cascadeDown = false, $forceInvalidateRanges = false, $name);
+ if (empty($segment)) {
+ foreach ($idSites as $idSite) {
+ foreach (Rules::getSegmentsToProcess([$idSite]) as $segment) {
+ $this->markArchivesAsInvalidated($idSites, $dates, 'day', new Segment($segment, [$idSite]),
+ $cascadeDown = false, $forceInvalidateRanges = false, $name);
}
-
- $this->markArchivesAsInvalidated($idSites, $datesToInvalidateForSegment, 'day', new Segment($info['segment'], [$idSite]),
- $cascadeDown = false, $forceInvalidateRanges = false, $name);
}
}
}
@@ -548,7 +543,8 @@ class ArchiveInvalidator
* @param string|null $report
* @param Date|null $startDate
*/
- public function scheduleReArchiving($idSites, string $pluginName, $report = null, Date $startDate = null)
+ public function scheduleReArchiving($idSites, string $pluginName = null, $report = null, Date $startDate = null,
+ Segment $segment = null)
{
if (!empty($report)) {
$this->removeInvalidationsSafely($idSites, $pluginName, $report);
@@ -560,6 +556,7 @@ class ArchiveInvalidator
'pluginName' => $pluginName,
'report' => $report,
'startDate' => $startDate ? $startDate->getTimestamp() : null,
+ 'segment' => $segment ? $segment->getString() : null,
]));
} catch (\Throwable $ex) {
$this->logger->info("Failed to schedule rearchiving of past reports for $pluginName plugin.");
@@ -581,11 +578,13 @@ class ArchiveInvalidator
continue;
}
+ $idSites = Site::getIdSitesFromIdSitesString($entry['idSites']);
$this->reArchiveReport(
$entry['idSites'],
$entry['pluginName'],
$entry['report'],
- !empty($entry['startDate']) ? Date::factory((int) $entry['startDate']) : null
+ !empty($entry['startDate']) ? Date::factory((int) $entry['startDate']) : null,
+ !empty($entry['segment']) ? new Segment($entry['segment'], $idSites) : null
);
} catch (\Throwable $ex) {
$this->logger->info("Failed to create invalidations for report re-archiving (idSites = {idSites}, pluginName = {pluginName}, report = {report}, startDate = {startDateTs}): {ex}", [
diff --git a/core/CronArchive.php b/core/CronArchive.php
index 067b85e6db..8d634000ba 100644
--- a/core/CronArchive.php
+++ b/core/CronArchive.php
@@ -199,8 +199,6 @@ class CronArchive
*/
private $periodIdsToLabels;
- private $processNewSegmentsFrom;
-
/**
* @var ArchiveFilter
*/
@@ -219,17 +217,13 @@ class CronArchive
/**
* Constructor.
*
- * @param string|null $processNewSegmentsFrom When to archive new segments from. See [General] process_new_segments_from
- * for possible values.
* @param LoggerInterface|null $logger
*/
- public function __construct($processNewSegmentsFrom = null, LoggerInterface $logger = null)
+ public function __construct(LoggerInterface $logger = null)
{
$this->logger = $logger ?: StaticContainer::get('Psr\Log\LoggerInterface');
$this->formatter = new Formatter();
- $this->processNewSegmentsFrom = $processNewSegmentsFrom ?: StaticContainer::get('ini.General.process_new_segments_from');
-
$this->invalidator = StaticContainer::get('Piwik\Archive\ArchiveInvalidator');
$this->isArchiveProfilingEnabled = Config::getInstance()->Debug['archiving_profile'] == 1;
@@ -276,7 +270,7 @@ class CronArchive
public function init()
{
- $this->segmentArchiving = new SegmentArchiving($this->processNewSegmentsFrom, $this->dateLastForced);
+ $this->segmentArchiving = StaticContainer::get(SegmentArchiving::class);
/**
* This event is triggered during initializing archiving.
@@ -567,7 +561,7 @@ class CronArchive
$visits = (int) $visits;
$this->logger->info("Archived website id {$params['idSite']}, period = {$params['period']}, date = "
- . "{$params['date']}, segment = '" . (isset($params['segment']) ? urldecode($params['segment']) : '') . "', "
+ . "{$params['date']}, segment = '" . (isset($params['segment']) ? urldecode(urldecode($params['segment'])) : '') . "', "
. ($plugin ? "plugin = $plugin, " : "") . ($report ? "report = $report, " : "") . "$visits visits found. $timer");
}
@@ -778,7 +772,7 @@ class CronArchive
{
if (empty($this->segmentArchiving)) {
// might not be initialised if init is not called
- $this->segmentArchiving = new SegmentArchiving($this->processNewSegmentsFrom, $this->dateLastForced);
+ $this->segmentArchiving = StaticContainer::get(SegmentArchiving::class);
}
$this->logger->debug("Checking for queued invalidations...");
@@ -838,26 +832,6 @@ class CronArchive
$this->invalidateWithSegments($idSiteToInvalidate, $date, 'range', $_forceInvalidateNonexistant = true);
}
- // for new segments, invalidate past dates
- $segmentDatesToInvalidate = $this->segmentArchiving->getSegmentArchivesToInvalidateForNewSegments($idSiteToInvalidate);
-
- foreach ($segmentDatesToInvalidate as $info) {
- $this->logger->info(' Segment "{segment}" was created or changed recently and will therefore archive today (for site ID = {idSite})', [
- 'segment' => $info['segment'],
- 'idSite' => $idSiteToInvalidate,
- ]);
-
- $earliestDate = $info['date'];
-
- $allDates = PeriodFactory::build('range', $earliestDate . ',today')->getSubperiods();
- $allDates = array_map(function (Period $p) {
- return $p->getDateStart()->toString();
- }, $allDates);
- $allDates = implode(',', $allDates);
-
- $this->getApiToInvalidateArchivedReport()->invalidateArchivedReports($idSiteToInvalidate, $allDates, $period = false, $info['segment']);
- }
-
$this->setInvalidationTime();
$this->logger->debug("Done invalidating");
diff --git a/core/CronArchive/SegmentArchiving.php b/core/CronArchive/SegmentArchiving.php
index 1e68b3639f..923f3d4004 100644
--- a/core/CronArchive/SegmentArchiving.php
+++ b/core/CronArchive/SegmentArchiving.php
@@ -9,6 +9,8 @@ namespace Piwik\CronArchive;
use Doctrine\Common\Cache\Cache;
use Matomo\Cache\Transient;
+use Piwik\Access;
+use Piwik\Archive\ArchiveInvalidator;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Common;
use Piwik\Container\StaticContainer;
@@ -65,11 +67,11 @@ class SegmentArchiving
*/
private $forceArchiveAllSegments;
- public function __construct($processNewSegmentsFrom, $beginningOfTimeLastNInYears = self::DEFAULT_BEGINNING_OF_TIME_LAST_N_YEARS,
+ public function __construct($beginningOfTimeLastNInYears = self::DEFAULT_BEGINNING_OF_TIME_LAST_N_YEARS,
Model $segmentEditorModel = null, Cache $segmentListCache = null, Date $now = null,
LoggerInterface $logger = null)
{
- $this->processNewSegmentsFrom = $processNewSegmentsFrom;
+ $this->processNewSegmentsFrom = StaticContainer::get('ini.General.process_new_segments_from');
$this->beginningOfTimeLastNInYears = $beginningOfTimeLastNInYears;
$this->segmentEditorModel = $segmentEditorModel ?: new Model();
$this->segmentListCache = $segmentListCache ?: new Transient();
@@ -78,48 +80,6 @@ class SegmentArchiving
$this->forceArchiveAllSegments = $this->getShouldForceArchiveAllSegments();
}
- public function getSegmentArchivesToInvalidateForNewSegments($idSite)
- {
- return $this->getSegmentArchivesToInvalidate($idSite, true);
- }
-
- public function getSegmentArchivesToInvalidate($idSite, $checkOnlyForNewSegments = false)
- {
- $result = [];
-
- $segmentsForSite = $this->getAllSegments();
- foreach ($segmentsForSite as $storedSegment) {
- if (!$this->isAutoArchivingEnabledFor($storedSegment)
- || !$this->isSegmentForSite($storedSegment, $idSite)
- ) {
- continue;
- }
-
- $oldestDateToProcessForNewSegment = $this->getOldestDateToProcessForNewSegment($idSite, $storedSegment, $checkOnlyForNewSegments);
- if (empty($oldestDateToProcessForNewSegment)) {
- continue;
- }
-
- $found = false;
- foreach ($result as $segment) {
- if ($segment['segment'] == $storedSegment['definition']) {
- $segment['date'] = $segment['date']->isEarlier($oldestDateToProcessForNewSegment) ? $segment['date'] : $oldestDateToProcessForNewSegment;
-
- $found = true;
- break;
- }
- }
-
- if (!$found) {
- $result[] = [
- 'date' => $oldestDateToProcessForNewSegment,
- 'segment' => $storedSegment['definition'],
- ];
- }
- }
- return $result;
- }
-
public function findSegmentForHash($hash, $idSite)
{
foreach ($this->getAllSegments() as $segment) {
@@ -143,50 +103,27 @@ class SegmentArchiving
return null;
}
- private function getOldestDateToProcessForNewSegment($idSite, $storedSegment, $checkOnlyForNewSegments)
+ public function getReArchiveSegmentStartDate($segmentInfo)
{
/**
* @var Date $segmentCreatedTime
* @var Date $segmentLastEditedTime
*/
- list($segmentCreatedTime, $segmentLastEditedTime) = $this->getCreatedTimeOfSegment($idSite, $storedSegment);
+ list($segmentCreatedTime, $segmentLastEditedTime) = $this->getCreatedTimeOfSegment($segmentInfo);
if (empty($segmentCreatedTime)) {
return null;
}
- $lastInvalidationTime = CronArchive::getLastInvalidationTime();
- if (!empty($lastInvalidationTime)) {
- $lastInvalidationTime = Date::factory((int) $lastInvalidationTime);
- }
-
- $segmentTimeToUse = $segmentLastEditedTime ?: $segmentCreatedTime;
- if ($checkOnlyForNewSegments) {
- if (!empty($lastInvalidationTime)
- && !empty($segmentTimeToUse)
- && $segmentTimeToUse->isEarlier($lastInvalidationTime)
- ) {
- return null; // has already have been invalidated, ignore
- }
- }
-
- if ($this->processNewSegmentsFrom == self::CREATION_TIME) {
+ if ($this->processNewSegmentsFrom == SegmentArchiving::CREATION_TIME) {
$this->logger->debug("process_new_segments_from set to segment_creation_time, oldest date to process is {time}", array('time' => $segmentCreatedTime));
return $segmentCreatedTime;
- } elseif ($this->processNewSegmentsFrom == self::LAST_EDIT_TIME) {
+ } else if ($this->processNewSegmentsFrom == SegmentArchiving::LAST_EDIT_TIME) {
$this->logger->debug("process_new_segments_from set to segment_last_edit_time, segment last edit time is {time}",
array('time' => $segmentLastEditedTime));
- if ($segmentLastEditedTime === null
- || $segmentLastEditedTime->getTimestamp() < $segmentCreatedTime->getTimestamp()
- ) {
- $this->logger->debug("segment last edit time is older than created time, using created time instead");
-
- $segmentLastEditedTime = $segmentCreatedTime;
- }
-
return $segmentLastEditedTime;
- } elseif (preg_match("/^last([0-9]+)$/", $this->processNewSegmentsFrom, $matches)) {
+ } else if (preg_match("/^last([0-9]+)$/", $this->processNewSegmentsFrom, $matches)) {
$lastN = $matches[1];
list($lastDate, $lastPeriod) = Range::getDateXPeriodsAgo($lastN, $segmentCreatedTime, 'day');
@@ -198,11 +135,15 @@ class SegmentArchiving
} else {
$this->logger->debug("process_new_segments_from set to beginning_of_time or cannot recognize value");
- $siteCreationDate = Date::factory(Site::getCreationDateFor($idSite));
-
$result = Date::factory('today')->subYear($this->beginningOfTimeLastNInYears);
- if ($result->isEarlier($siteCreationDate)) {
- $result = $siteCreationDate;
+
+ $idSite = $segmentInfo['enable_only_idsite'] ?? null;
+ if (!empty($idSite)) {
+ $siteCreationDate = Date::factory(Site::getCreationDateFor($idSite));
+
+ if ($result->isEarlier($siteCreationDate)) {
+ $result = $siteCreationDate;
+ }
}
$earliestVisitTime = $this->getEarliestVisitTimeFor($idSite);
@@ -216,6 +157,22 @@ class SegmentArchiving
}
}
+ private function getCreatedTimeOfSegment($storedSegment)
+ {
+ // check for an earlier ts_created timestamp
+ $createdTime = empty($storedSegment['ts_created']) ? null : Date::factory($storedSegment['ts_created']);
+
+ // if there is no ts_last_edit timestamp, initialize it to ts_created
+ if (empty($storedSegment['ts_last_edit'])) {
+ $storedSegment['ts_last_edit'] = empty($storedSegment['ts_created']) ? null : $storedSegment['ts_created'];
+ }
+
+ // check for a later ts_last_edit timestamp
+ $lastEditTime = empty($storedSegment['ts_last_edit']) ? null : Date::factory($storedSegment['ts_last_edit']);
+
+ return array($createdTime, $lastEditTime);
+ }
+
private function getEarliestVisitTimeFor($idSite)
{
$earliestIdVisit = Db::fetchOne('SELECT idvisit FROM ' . Common::prefixTable('log_visit')
@@ -232,52 +189,6 @@ class SegmentArchiving
return Date::factory($earliestStartTime);
}
- private function getCreatedTimeOfSegment($idSite, $storedSegment)
- {
- /** @var Date $latestEditTime */
- $latestEditTime = null;
- $earliestCreatedTime = $this->now;
- if (empty($storedSegment['ts_created'])
- || empty($storedSegment['definition'])
- || !isset($storedSegment['enable_only_idsite'])
- || !$this->isSegmentForSite($storedSegment, $idSite)
- ) {
- return [null, null];
- }
-
- // check for an earlier ts_created timestamp
- $createdTime = Date::factory($storedSegment['ts_created']);
- if ($createdTime->getTimestamp() < $earliestCreatedTime->getTimestamp()) {
- $earliestCreatedTime = $createdTime;
- }
-
- // if there is no ts_last_edit timestamp, initialize it to ts_created
- if (empty($storedSegment['ts_last_edit'])) {
- $storedSegment['ts_last_edit'] = $storedSegment['ts_created'];
- }
-
- // check for a later ts_last_edit timestamp
- $lastEditTime = Date::factory($storedSegment['ts_last_edit']);
- if ($latestEditTime === null
- || $latestEditTime->getTimestamp() < $lastEditTime->getTimestamp()
- ) {
- $latestEditTime = $lastEditTime;
- }
-
- $this->logger->debug(
- "Earliest created time of segment '{segment}' w/ idSite = {idSite} is found to be {createdTime}. Latest " .
- "edit time is found to be {latestEditTime}.",
- array(
- 'segment' => $storedSegment['definition'],
- 'idSite' => $idSite,
- 'createdTime' => $earliestCreatedTime,
- 'latestEditTime' => $latestEditTime,
- )
- );
-
- return array($earliestCreatedTime, $latestEditTime);
- }
-
public function getAllSegments()
{
if (!$this->segmentListCache->contains('all')) {
@@ -309,4 +220,22 @@ class SegmentArchiving
{
return !Rules::isBrowserTriggerEnabled() && !Rules::isBrowserArchivingAvailableForSegments();
}
+
+ public function reArchiveSegment($segmentInfo)
+ {
+ if (empty($segmentInfo['definition'])) { // sanity check
+ return;
+ }
+
+ $definition = $segmentInfo['definition'];
+ $idSite = $segmentInfo['enable_only_idsite'] ?? 'all';
+
+ $idSites = Access::doAsSuperUser(function () use ($idSite) {
+ return Site::getIdSitesFromIdSitesString($idSite);
+ });
+ $startDate = $this->getReArchiveSegmentStartDate($segmentInfo);
+
+ $invalidator = StaticContainer::get(ArchiveInvalidator::class);
+ $invalidator->scheduleReArchiving($idSites, null, null, $startDate, new Segment($definition, $idSites));
+ }
}
diff --git a/core/Updates/4.1.2-b1.php b/core/Updates/4.1.2-b1.php
index fbd9d52fd8..8093bb9dcd 100644
--- a/core/Updates/4.1.2-b1.php
+++ b/core/Updates/4.1.2-b1.php
@@ -9,10 +9,12 @@
namespace Piwik\Updates;
-use Piwik\Archive\ArchiveInvalidator;
-use Piwik\ArchiveProcessor\Rules;
use Piwik\Container\StaticContainer;
+use Piwik\CronArchive;
use Piwik\Date;
+use Piwik\Plugins\SegmentEditor\API;
+use Piwik\Archive\ArchiveInvalidator;
+use Piwik\ArchiveProcessor\Rules;
use Piwik\Updater;
use Piwik\Updates as PiwikUpdates;
use Piwik\Updater\Migration\Factory as MigrationFactory;
@@ -29,6 +31,11 @@ class Updates_4_1_2_b1 extends PiwikUpdates
$this->migration = $factory;
}
+ public function doUpdate(Updater $updater)
+ {
+ $updater->executeMigrations(__FILE__, $this->getMigrations($updater));
+ }
+
public function getMigrations(Updater $updater)
{
$migrations = [];
@@ -44,12 +51,23 @@ class Updates_4_1_2_b1 extends PiwikUpdates
}, $cmdStr);
}
- return $migrations;
- }
+ $migrations[] = new Updater\Migration\Custom(function () {
+ $segmentArchiving = StaticContainer::get(CronArchive\SegmentArchiving::class);
+ $timeOfLastInvalidateTime = CronArchive::getLastInvalidationTime();
- public function doUpdate(Updater $updater)
- {
- $updater->executeMigrations(__FILE__, $this->getMigrations($updater));
+ $segments = API::getInstance()->getAll();
+ foreach ($segments as $segment) {
+ $tsCreated = !empty($segment['ts_created']) ? Date::factory($segment['ts_created'])->getTimestamp() : 0;
+ $tsLastEdit = !empty($segment['ts_last_edit']) ? Date::factory($segment['ts_last_edit'])->getTimestamp() : null;
+ $timeToUse = max($tsCreated, $tsLastEdit);
+
+ if ($timeToUse > $timeOfLastInvalidateTime) {
+ $segmentArchiving->reArchiveSegment($segment);
+ }
+ }
+ }, '');
+
+ return $migrations;
}
private function getInvalidateCommand(Date $dateOfMatomo4Release)