diff options
author | diosmosis <diosmosis@users.noreply.github.com> | 2020-05-07 19:11:06 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-07 19:11:06 +0300 |
commit | 6d744db7f3f7a55fc4bb1ca573e937f0832c642b (patch) | |
tree | 148d845d4ed521641f66f6bb2640697b14da6b26 /plugins/CoreConsole | |
parent | d829c23496c9f1c53bd4144009d67f8a287d97e4 (diff) |
Rewrite cron archiving process for easier maintenance and performance (#15499)
* Adding initial new code for cron archive rewrite.
* first pass at removing unused CronArchive code.
* unfinished commit
* fill out archiveinvalidator code
* getting some tests to pass
* unfinished commit
* fixing part of test
* Another test fix.
* another sql change
* fix broken merge or something else that went wrong
* Couple more fixes and extra logs.
* Fixing enough issues to get core archive command to run completely.
* Fix and log change.
* Fixed more segment/test related issues for CronArchiveTest. Includes optimization for no visits for period + segment process from handling.
* another optimization and possible build fix
* no visit optimization
* test fix
* Implement archiving_custom_ranges logic w/ queue based implementation
* fixes to get archivecrontest to work
* add logic to invalidate today period
* fix optimization and some tests
* Fixing more tests.
* Fixing more tests
* debug travis failure
* more test fixes
* more test fixes, removing more unneeded code, handling some TODOs
* Handle more TODOs including creating ArchiveFilter class for some cli options.
* tests and todos
* idarchives are specific to table + start on archivefilter tests
* one test
* more TODOs and tests
* more tests and todo taken care of
* handle more todos
* fixing more tests
* fix comment
* make sure autoarchiving is enabled for segments when cron archive picks them up
* Fixing test.
* apply more pr feedback
* order by date1 asc
* quick refactor
* use batch insert instead of createDummyArchives
* apply rest of pr feedback
* add removed events, add new test, fix an issue (when deleting idarchives older than do not lump all segments together).
* re-add fixed/shared siteids
* fix tests
* incomplete commit
* Insert archive entries into archive_invalidations table.
* Use invalidations table in core:archive and get ArchiveCronTest to pass.
* fixing some tests
* debugging travis
* fix more tests & remove DONE_IN_PROGRESS which is no longer used.
* fix more tests
* apply review feedback
* fix tests
* try fixing alltests build
* try again
* try again
* Move archive_invalidations update to new beta since omnifixture was updated w/o it.
* Another fix.
Diffstat (limited to 'plugins/CoreConsole')
-rw-r--r-- | plugins/CoreConsole/Commands/CoreArchiver.php | 37 | ||||
-rw-r--r-- | plugins/CoreConsole/tests/System/ArchiveCronTest.php | 54 |
2 files changed, 38 insertions, 53 deletions
diff --git a/plugins/CoreConsole/Commands/CoreArchiver.php b/plugins/CoreConsole/Commands/CoreArchiver.php index b74e573286..44719e7b64 100644 --- a/plugins/CoreConsole/Commands/CoreArchiver.php +++ b/plugins/CoreConsole/Commands/CoreArchiver.php @@ -34,32 +34,28 @@ class CoreArchiver extends ConsoleCommand $archiver->disableScheduledTasks = $input->getOption('disable-scheduled-tasks'); $archiver->acceptInvalidSSLCertificate = $input->getOption("accept-invalid-ssl-certificate"); - $archiver->shouldArchiveAllSites = (bool) $input->getOption("force-all-websites"); $archiver->shouldStartProfiler = (bool) $input->getOption("xhprof"); $archiver->shouldArchiveSpecifiedSites = self::getSitesListOption($input, "force-idsites"); $archiver->shouldSkipSpecifiedSites = self::getSitesListOption($input, "skip-idsites"); - $archiver->forceTimeoutPeriod = $input->getOption("force-timeout-for-periods"); - $archiver->shouldArchiveAllPeriodsSince = $input->getOption("force-all-periods"); - $archiver->restrictToDateRange = $input->getOption("force-date-range"); $archiver->phpCliConfigurationOptions = $input->getOption("php-cli-options"); - - $restrictToPeriods = $input->getOption("force-periods"); - $restrictToPeriods = explode(',', $restrictToPeriods); - $archiver->restrictToPeriods = array_map('trim', $restrictToPeriods); - $archiver->dateLastForced = $input->getOption('force-date-last-n'); $archiver->concurrentRequestsPerWebsite = $input->getOption('concurrent-requests-per-website'); $archiver->maxConcurrentArchivers = $input->getOption('concurrent-archivers'); - - $archiver->disableSegmentsArchiving = $input->getOption('skip-all-segments'); $archiver->skipSegmentsToday = $input->getOption('skip-segments-today'); + $archiver->shouldArchiveAllSites = $input->getOption('force-all-websites'); + $archiver->setUrlToPiwik($url); + + $archiveFilter = new CronArchive\ArchiveFilter(); + $archiveFilter->setDisableSegmentsArchiving($input->getOption('skip-all-segments')); + $archiveFilter->setRestrictToDateRange($input->getOption("force-date-range")); + $archiveFilter->setRestrictToPeriods($input->getOption("force-periods")); $segmentIds = $input->getOption('force-idsegments'); $segmentIds = explode(',', $segmentIds); $segmentIds = array_map('trim', $segmentIds); - $archiver->setSegmentsToForceFromSegmentIds($segmentIds); + $archiveFilter->setSegmentsToForceFromSegmentIds($segmentIds); - $archiver->setUrlToPiwik($url); + $archiver->setArchiveFilter($archiveFilter); return $archiver; } @@ -84,18 +80,6 @@ class CoreArchiver extends ConsoleCommand "Forces the value of this option to be used as the URL to Piwik. \nIf your system does not support" . " archiving with CLI processes, you may need to set this in order for the archiving HTTP requests to use" . " the desired URLs."); - $command->addOption('force-all-websites', null, InputOption::VALUE_NONE, - "If specified, the script will trigger archiving on all websites.\nUse with --force-all-periods=[seconds] " - . "to also process those websites that had visits in the last [seconds] seconds.\nLaunching several processes" - . " with this option will make them share the list of sites to process."); - $command->addOption('force-all-periods', null, InputOption::VALUE_OPTIONAL, - "Limits archiving to websites with some traffic in the last [seconds] seconds. \nFor example " - . "--force-all-periods=86400 will archive websites that had visits in the last 24 hours. \nIf [seconds] is " - . "not specified, all websites with visits in the last " . CronArchive::ARCHIVE_SITES_WITH_TRAFFIC_SINCE - . " seconds (" . round(CronArchive::ARCHIVE_SITES_WITH_TRAFFIC_SINCE / 86400) . " days) will be archived."); - $command->addOption('force-timeout-for-periods', null, InputOption::VALUE_OPTIONAL, - "The current week/ current month/ current year will be processed at most every [seconds].\nIf not " - . "specified, defaults to " . CronArchive::SECONDS_DELAY_BETWEEN_PERIOD_ARCHIVES . "."); $command->addOption('skip-idsites', null, InputOption::VALUE_OPTIONAL, 'If specified, archiving will be skipped for these websites (in case these website ids would have been archived).'); $command->addOption('skip-all-segments', null, InputOption::VALUE_NONE, @@ -107,7 +91,7 @@ class CoreArchiver extends ConsoleCommand $command->addOption('force-periods', null, InputOption::VALUE_OPTIONAL, "If specified, archiving will be processed only for these Periods (comma separated eg. day,week,month,year,range)"); $command->addOption('force-date-last-n', null, InputOption::VALUE_REQUIRED, - "This script calls the API with period=lastN. You can force the N in lastN by specifying this value."); + "This last N number of years of data to invalidate when a recently created or updated segment is found.", 7); $command->addOption('force-date-range', null, InputOption::VALUE_OPTIONAL, "If specified, archiving will be processed only for periods included in this date range. Format: YYYY-MM-DD,YYYY-MM-DD"); $command->addOption('force-idsegments', null, InputOption::VALUE_REQUIRED, @@ -124,5 +108,6 @@ class CoreArchiver extends ConsoleCommand "It is _NOT_ recommended to use this argument. Instead, you should use a valid SSL certificate!\nIt can be " . "useful if you specified --url=https://... or if you are using Piwik with force_ssl=1"); $command->addOption('php-cli-options', null, InputOption::VALUE_OPTIONAL, 'Forwards the PHP configuration options to the PHP CLI command. For example "-d memory_limit=8G". Note: These options are only applied if the archiver actually uses CLI and not HTTP.', $default = ''); + $command->addOption('force-all-websites', null, InputOption::VALUE_NONE, 'Force archiving all websites.'); } } diff --git a/plugins/CoreConsole/tests/System/ArchiveCronTest.php b/plugins/CoreConsole/tests/System/ArchiveCronTest.php index 412ef94304..e426778427 100644 --- a/plugins/CoreConsole/tests/System/ArchiveCronTest.php +++ b/plugins/CoreConsole/tests/System/ArchiveCronTest.php @@ -8,8 +8,12 @@ namespace Piwik\Plugins\CoreConsole\tests\System; use Interop\Container\ContainerInterface; +use Piwik\Common; use Piwik\Config; +use Piwik\CronArchive; use Piwik\Date; +use Piwik\Db; +use Piwik\Option; use Piwik\Plugins\SitesManager\API; use Piwik\Tests\Framework\TestCase\SystemTestCase; use Piwik\Tests\Fixtures\ManySitesImportedLogs; @@ -88,9 +92,12 @@ class ArchiveCronTest extends SystemTestCase public function testArchivePhpCron() { - $this->setLastRunArchiveOptions(); $output = $this->runArchivePhpCron(); + $expectedInvalidations = []; + $invalidationEntries = $this->getInvalidatedArchiveTableEntries(); + $this->assertEquals($expectedInvalidations, $invalidationEntries); + $this->compareArchivePhpOutputAgainstExpected($output); foreach ($this->getApiForTesting() as $testInfo) { @@ -113,8 +120,6 @@ class ArchiveCronTest extends SystemTestCase public function testArchivePhpCronArchivesFullRanges() { - $this->setLastRunArchiveOptions(); - self::$fixture->getTestEnvironment()->overrideConfig('General', 'enable_browser_archiving_triggering', 0); self::$fixture->getTestEnvironment()->overrideConfig('General', 'archiving_range_force_on_browser_request', 0); self::$fixture->getTestEnvironment()->overrideConfig('General', 'archiving_custom_ranges', ['2012-08-09,2012-08-13']); @@ -126,6 +131,13 @@ class ArchiveCronTest extends SystemTestCase $output = $this->runArchivePhpCron(['--force-periods' => 'range', '--force-idsites' => 1]); + $expectedInvalidations = []; + $invalidationEntries = $this->getInvalidatedArchiveTableEntries(); + $invalidationEntries = array_filter($invalidationEntries, function ($entry) { + return $entry['period'] == 5; + }); + $this->assertEquals($expectedInvalidations, $invalidationEntries); + $this->runApiTests(array( 'VisitsSummary.get', 'Actions.get', 'DevicesDetection.getType'), array('idSite' => '1', @@ -139,30 +151,11 @@ class ArchiveCronTest extends SystemTestCase public function test_archivePhpScript_DoesNotFail_WhenCommandHelpRequested() { $output = $this->runArchivePhpCron(array('--help' => null), PIWIK_INCLUDE_PATH . '/misc/cron/archive.php'); - $output = implode("\n", $output); $this->assertRegExp('/Usage:\s*core:archive/', $output); self::assertStringNotContainsString("Starting Piwik reports archiving...", $output); } - private function setLastRunArchiveOptions() - { - $periodTypes = array('day', 'periods'); - $idSites = API::getInstance()->getAllSitesId(); - - $daysAgoArchiveRanSuccessfully = 1500; - $this->assertTrue($daysAgoArchiveRanSuccessfully > (\Piwik\CronArchive::ARCHIVE_SITES_WITH_TRAFFIC_SINCE / 86400)); - $time = Date::factory(self::$fixture->dateTime)->subDay($daysAgoArchiveRanSuccessfully)->getTimestamp(); - - foreach ($periodTypes as $period) { - foreach ($idSites as $idSite) { - // lastRunKey() function inlined - $lastRunArchiveOption = "lastRunArchive" . $period . "_" . $idSite; - \Piwik\Option::set($lastRunArchiveOption, $time); - } - } - } - private function runArchivePhpCron($options = array(), $archivePhpScript = false) { $archivePhpScript = $archivePhpScript ?: PIWIK_INCLUDE_PATH . '/tests/PHPUnit/proxy/archive.php'; @@ -180,8 +173,10 @@ class ArchiveCronTest extends SystemTestCase // run the command exec($cmd, $output, $result); - if ($result !== 0 || stripos($result, "error")) { - $this->fail("archive cron failed: " . implode("\n", $output) . "\n\ncommand used: $cmd"); + $output = implode("\n", $output); + + if ($result !== 0 || strpos($output, "ERROR") || strpos($output, "Error")) { + $this->fail("archive cron failed (result = $result): " . $output . "\n\ncommand used: $cmd"); } return $output; @@ -189,8 +184,6 @@ class ArchiveCronTest extends SystemTestCase private function compareArchivePhpOutputAgainstExpected($output) { - $output = implode("\n", $output); - $fileName = 'test_ArchiveCronTest_archive_php_cron_output.txt'; list($pathProcessed, $pathExpected) = static::getProcessedAndExpectedDirs(); @@ -216,7 +209,9 @@ class ArchiveCronTest extends SystemTestCase // is a translation token, and nothing else. 'Piwik\Translation\Translator' => function (ContainerInterface $c) { return new \Piwik\Translation\Translator($c->get('Piwik\Translation\Loader\LoaderInterface')); - } + }, + + 'Tests.log.allowAllHandlers' => true, ); } @@ -224,6 +219,11 @@ class ArchiveCronTest extends SystemTestCase { return dirname(__FILE__); } + + private function getInvalidatedArchiveTableEntries() + { + return Db::fetchAll("SELECT idinvalidation, idarchive, idsite, date1, date2, period, name, status FROM " . Common::prefixTable('archive_invalidations')); + } } ArchiveCronTest::$fixture = new ManySitesImportedLogs(); |