Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordiosmosis <diosmosis@users.noreply.github.com>2020-05-07 19:11:06 +0300
committerGitHub <noreply@github.com>2020-05-07 19:11:06 +0300
commit6d744db7f3f7a55fc4bb1ca573e937f0832c642b (patch)
tree148d845d4ed521641f66f6bb2640697b14da6b26 /plugins/CoreConsole
parentd829c23496c9f1c53bd4144009d67f8a287d97e4 (diff)
Rewrite cron archiving process for easier maintenance and performance (#15499)
* Adding initial new code for cron archive rewrite. * first pass at removing unused CronArchive code. * unfinished commit * fill out archiveinvalidator code * getting some tests to pass * unfinished commit * fixing part of test * Another test fix. * another sql change * fix broken merge or something else that went wrong * Couple more fixes and extra logs. * Fixing enough issues to get core archive command to run completely. * Fix and log change. * Fixed more segment/test related issues for CronArchiveTest. Includes optimization for no visits for period + segment process from handling. * another optimization and possible build fix * no visit optimization * test fix * Implement archiving_custom_ranges logic w/ queue based implementation * fixes to get archivecrontest to work * add logic to invalidate today period * fix optimization and some tests * Fixing more tests. * Fixing more tests * debug travis failure * more test fixes * more test fixes, removing more unneeded code, handling some TODOs * Handle more TODOs including creating ArchiveFilter class for some cli options. * tests and todos * idarchives are specific to table + start on archivefilter tests * one test * more TODOs and tests * more tests and todo taken care of * handle more todos * fixing more tests * fix comment * make sure autoarchiving is enabled for segments when cron archive picks them up * Fixing test. * apply more pr feedback * order by date1 asc * quick refactor * use batch insert instead of createDummyArchives * apply rest of pr feedback * add removed events, add new test, fix an issue (when deleting idarchives older than do not lump all segments together). * re-add fixed/shared siteids * fix tests * incomplete commit * Insert archive entries into archive_invalidations table. * Use invalidations table in core:archive and get ArchiveCronTest to pass. * fixing some tests * debugging travis * fix more tests & remove DONE_IN_PROGRESS which is no longer used. * fix more tests * apply review feedback * fix tests * try fixing alltests build * try again * try again * Move archive_invalidations update to new beta since omnifixture was updated w/o it. * Another fix.
Diffstat (limited to 'plugins/CoreConsole')
-rw-r--r--plugins/CoreConsole/Commands/CoreArchiver.php37
-rw-r--r--plugins/CoreConsole/tests/System/ArchiveCronTest.php54
2 files changed, 38 insertions, 53 deletions
diff --git a/plugins/CoreConsole/Commands/CoreArchiver.php b/plugins/CoreConsole/Commands/CoreArchiver.php
index b74e573286..44719e7b64 100644
--- a/plugins/CoreConsole/Commands/CoreArchiver.php
+++ b/plugins/CoreConsole/Commands/CoreArchiver.php
@@ -34,32 +34,28 @@ class CoreArchiver extends ConsoleCommand
$archiver->disableScheduledTasks = $input->getOption('disable-scheduled-tasks');
$archiver->acceptInvalidSSLCertificate = $input->getOption("accept-invalid-ssl-certificate");
- $archiver->shouldArchiveAllSites = (bool) $input->getOption("force-all-websites");
$archiver->shouldStartProfiler = (bool) $input->getOption("xhprof");
$archiver->shouldArchiveSpecifiedSites = self::getSitesListOption($input, "force-idsites");
$archiver->shouldSkipSpecifiedSites = self::getSitesListOption($input, "skip-idsites");
- $archiver->forceTimeoutPeriod = $input->getOption("force-timeout-for-periods");
- $archiver->shouldArchiveAllPeriodsSince = $input->getOption("force-all-periods");
- $archiver->restrictToDateRange = $input->getOption("force-date-range");
$archiver->phpCliConfigurationOptions = $input->getOption("php-cli-options");
-
- $restrictToPeriods = $input->getOption("force-periods");
- $restrictToPeriods = explode(',', $restrictToPeriods);
- $archiver->restrictToPeriods = array_map('trim', $restrictToPeriods);
-
$archiver->dateLastForced = $input->getOption('force-date-last-n');
$archiver->concurrentRequestsPerWebsite = $input->getOption('concurrent-requests-per-website');
$archiver->maxConcurrentArchivers = $input->getOption('concurrent-archivers');
-
- $archiver->disableSegmentsArchiving = $input->getOption('skip-all-segments');
$archiver->skipSegmentsToday = $input->getOption('skip-segments-today');
+ $archiver->shouldArchiveAllSites = $input->getOption('force-all-websites');
+ $archiver->setUrlToPiwik($url);
+
+ $archiveFilter = new CronArchive\ArchiveFilter();
+ $archiveFilter->setDisableSegmentsArchiving($input->getOption('skip-all-segments'));
+ $archiveFilter->setRestrictToDateRange($input->getOption("force-date-range"));
+ $archiveFilter->setRestrictToPeriods($input->getOption("force-periods"));
$segmentIds = $input->getOption('force-idsegments');
$segmentIds = explode(',', $segmentIds);
$segmentIds = array_map('trim', $segmentIds);
- $archiver->setSegmentsToForceFromSegmentIds($segmentIds);
+ $archiveFilter->setSegmentsToForceFromSegmentIds($segmentIds);
- $archiver->setUrlToPiwik($url);
+ $archiver->setArchiveFilter($archiveFilter);
return $archiver;
}
@@ -84,18 +80,6 @@ class CoreArchiver extends ConsoleCommand
"Forces the value of this option to be used as the URL to Piwik. \nIf your system does not support"
. " archiving with CLI processes, you may need to set this in order for the archiving HTTP requests to use"
. " the desired URLs.");
- $command->addOption('force-all-websites', null, InputOption::VALUE_NONE,
- "If specified, the script will trigger archiving on all websites.\nUse with --force-all-periods=[seconds] "
- . "to also process those websites that had visits in the last [seconds] seconds.\nLaunching several processes"
- . " with this option will make them share the list of sites to process.");
- $command->addOption('force-all-periods', null, InputOption::VALUE_OPTIONAL,
- "Limits archiving to websites with some traffic in the last [seconds] seconds. \nFor example "
- . "--force-all-periods=86400 will archive websites that had visits in the last 24 hours. \nIf [seconds] is "
- . "not specified, all websites with visits in the last " . CronArchive::ARCHIVE_SITES_WITH_TRAFFIC_SINCE
- . " seconds (" . round(CronArchive::ARCHIVE_SITES_WITH_TRAFFIC_SINCE / 86400) . " days) will be archived.");
- $command->addOption('force-timeout-for-periods', null, InputOption::VALUE_OPTIONAL,
- "The current week/ current month/ current year will be processed at most every [seconds].\nIf not "
- . "specified, defaults to " . CronArchive::SECONDS_DELAY_BETWEEN_PERIOD_ARCHIVES . ".");
$command->addOption('skip-idsites', null, InputOption::VALUE_OPTIONAL,
'If specified, archiving will be skipped for these websites (in case these website ids would have been archived).');
$command->addOption('skip-all-segments', null, InputOption::VALUE_NONE,
@@ -107,7 +91,7 @@ class CoreArchiver extends ConsoleCommand
$command->addOption('force-periods', null, InputOption::VALUE_OPTIONAL,
"If specified, archiving will be processed only for these Periods (comma separated eg. day,week,month,year,range)");
$command->addOption('force-date-last-n', null, InputOption::VALUE_REQUIRED,
- "This script calls the API with period=lastN. You can force the N in lastN by specifying this value.");
+ "This last N number of years of data to invalidate when a recently created or updated segment is found.", 7);
$command->addOption('force-date-range', null, InputOption::VALUE_OPTIONAL,
"If specified, archiving will be processed only for periods included in this date range. Format: YYYY-MM-DD,YYYY-MM-DD");
$command->addOption('force-idsegments', null, InputOption::VALUE_REQUIRED,
@@ -124,5 +108,6 @@ class CoreArchiver extends ConsoleCommand
"It is _NOT_ recommended to use this argument. Instead, you should use a valid SSL certificate!\nIt can be "
. "useful if you specified --url=https://... or if you are using Piwik with force_ssl=1");
$command->addOption('php-cli-options', null, InputOption::VALUE_OPTIONAL, 'Forwards the PHP configuration options to the PHP CLI command. For example "-d memory_limit=8G". Note: These options are only applied if the archiver actually uses CLI and not HTTP.', $default = '');
+ $command->addOption('force-all-websites', null, InputOption::VALUE_NONE, 'Force archiving all websites.');
}
}
diff --git a/plugins/CoreConsole/tests/System/ArchiveCronTest.php b/plugins/CoreConsole/tests/System/ArchiveCronTest.php
index 412ef94304..e426778427 100644
--- a/plugins/CoreConsole/tests/System/ArchiveCronTest.php
+++ b/plugins/CoreConsole/tests/System/ArchiveCronTest.php
@@ -8,8 +8,12 @@
namespace Piwik\Plugins\CoreConsole\tests\System;
use Interop\Container\ContainerInterface;
+use Piwik\Common;
use Piwik\Config;
+use Piwik\CronArchive;
use Piwik\Date;
+use Piwik\Db;
+use Piwik\Option;
use Piwik\Plugins\SitesManager\API;
use Piwik\Tests\Framework\TestCase\SystemTestCase;
use Piwik\Tests\Fixtures\ManySitesImportedLogs;
@@ -88,9 +92,12 @@ class ArchiveCronTest extends SystemTestCase
public function testArchivePhpCron()
{
- $this->setLastRunArchiveOptions();
$output = $this->runArchivePhpCron();
+ $expectedInvalidations = [];
+ $invalidationEntries = $this->getInvalidatedArchiveTableEntries();
+ $this->assertEquals($expectedInvalidations, $invalidationEntries);
+
$this->compareArchivePhpOutputAgainstExpected($output);
foreach ($this->getApiForTesting() as $testInfo) {
@@ -113,8 +120,6 @@ class ArchiveCronTest extends SystemTestCase
public function testArchivePhpCronArchivesFullRanges()
{
- $this->setLastRunArchiveOptions();
-
self::$fixture->getTestEnvironment()->overrideConfig('General', 'enable_browser_archiving_triggering', 0);
self::$fixture->getTestEnvironment()->overrideConfig('General', 'archiving_range_force_on_browser_request', 0);
self::$fixture->getTestEnvironment()->overrideConfig('General', 'archiving_custom_ranges', ['2012-08-09,2012-08-13']);
@@ -126,6 +131,13 @@ class ArchiveCronTest extends SystemTestCase
$output = $this->runArchivePhpCron(['--force-periods' => 'range', '--force-idsites' => 1]);
+ $expectedInvalidations = [];
+ $invalidationEntries = $this->getInvalidatedArchiveTableEntries();
+ $invalidationEntries = array_filter($invalidationEntries, function ($entry) {
+ return $entry['period'] == 5;
+ });
+ $this->assertEquals($expectedInvalidations, $invalidationEntries);
+
$this->runApiTests(array(
'VisitsSummary.get', 'Actions.get', 'DevicesDetection.getType'),
array('idSite' => '1',
@@ -139,30 +151,11 @@ class ArchiveCronTest extends SystemTestCase
public function test_archivePhpScript_DoesNotFail_WhenCommandHelpRequested()
{
$output = $this->runArchivePhpCron(array('--help' => null), PIWIK_INCLUDE_PATH . '/misc/cron/archive.php');
- $output = implode("\n", $output);
$this->assertRegExp('/Usage:\s*core:archive/', $output);
self::assertStringNotContainsString("Starting Piwik reports archiving...", $output);
}
- private function setLastRunArchiveOptions()
- {
- $periodTypes = array('day', 'periods');
- $idSites = API::getInstance()->getAllSitesId();
-
- $daysAgoArchiveRanSuccessfully = 1500;
- $this->assertTrue($daysAgoArchiveRanSuccessfully > (\Piwik\CronArchive::ARCHIVE_SITES_WITH_TRAFFIC_SINCE / 86400));
- $time = Date::factory(self::$fixture->dateTime)->subDay($daysAgoArchiveRanSuccessfully)->getTimestamp();
-
- foreach ($periodTypes as $period) {
- foreach ($idSites as $idSite) {
- // lastRunKey() function inlined
- $lastRunArchiveOption = "lastRunArchive" . $period . "_" . $idSite;
- \Piwik\Option::set($lastRunArchiveOption, $time);
- }
- }
- }
-
private function runArchivePhpCron($options = array(), $archivePhpScript = false)
{
$archivePhpScript = $archivePhpScript ?: PIWIK_INCLUDE_PATH . '/tests/PHPUnit/proxy/archive.php';
@@ -180,8 +173,10 @@ class ArchiveCronTest extends SystemTestCase
// run the command
exec($cmd, $output, $result);
- if ($result !== 0 || stripos($result, "error")) {
- $this->fail("archive cron failed: " . implode("\n", $output) . "\n\ncommand used: $cmd");
+ $output = implode("\n", $output);
+
+ if ($result !== 0 || strpos($output, "ERROR") || strpos($output, "Error")) {
+ $this->fail("archive cron failed (result = $result): " . $output . "\n\ncommand used: $cmd");
}
return $output;
@@ -189,8 +184,6 @@ class ArchiveCronTest extends SystemTestCase
private function compareArchivePhpOutputAgainstExpected($output)
{
- $output = implode("\n", $output);
-
$fileName = 'test_ArchiveCronTest_archive_php_cron_output.txt';
list($pathProcessed, $pathExpected) = static::getProcessedAndExpectedDirs();
@@ -216,7 +209,9 @@ class ArchiveCronTest extends SystemTestCase
// is a translation token, and nothing else.
'Piwik\Translation\Translator' => function (ContainerInterface $c) {
return new \Piwik\Translation\Translator($c->get('Piwik\Translation\Loader\LoaderInterface'));
- }
+ },
+
+ 'Tests.log.allowAllHandlers' => true,
);
}
@@ -224,6 +219,11 @@ class ArchiveCronTest extends SystemTestCase
{
return dirname(__FILE__);
}
+
+ private function getInvalidatedArchiveTableEntries()
+ {
+ return Db::fetchAll("SELECT idinvalidation, idarchive, idsite, date1, date2, period, name, status FROM " . Common::prefixTable('archive_invalidations'));
+ }
}
ArchiveCronTest::$fixture = new ManySitesImportedLogs();