diff options
author | Kate Butler <kate@innocraft.com> | 2019-05-03 03:33:59 +0300 |
---|---|---|
committer | Thomas Steur <tsteur@users.noreply.github.com> | 2019-05-03 03:33:59 +0300 |
commit | b00014475cd0a53c1d7bb6452b04605b7fdc34d0 (patch) | |
tree | 91c7d08a6c774dbc7d06153950de2ff383cd5a13 /plugins | |
parent | 8f1f8ec8bd9a5a4cd651fdcb0a50081d58cd38ab (diff) |
Task to purge archives for deleted websites and segments (#14317)
* Purge archives for deleted sites and segments
* Purge archives for deleted sites and segments
* Add new purgeOrphanedArchives task to expected list
* Fix build
* PR improvements
* Fix consistency of method names
* Fix typo
* Unit tests for getSegmentHashesByIdSite
* PR changes
* add note on how to test the command
* minor tweak to make sure no injections are possible
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/CoreAdminHome/Tasks.php | 51 | ||||
-rw-r--r-- | plugins/CoreAdminHome/tests/Integration/TasksTest.php | 101 |
2 files changed, 152 insertions, 0 deletions
diff --git a/plugins/CoreAdminHome/Tasks.php b/plugins/CoreAdminHome/Tasks.php index 5d8dadb3a5..136dfdf7e0 100644 --- a/plugins/CoreAdminHome/Tasks.php +++ b/plugins/CoreAdminHome/Tasks.php @@ -11,6 +11,7 @@ namespace Piwik\Plugins\CoreAdminHome; use Piwik\API\Request; use Piwik\ArchiveProcessor\Rules; use Piwik\Archive\ArchivePurger; +use Piwik\Common; use Piwik\Config; use Piwik\Container\StaticContainer; use Piwik\DataAccess\ArchiveTableCreator; @@ -24,6 +25,7 @@ use Piwik\Plugins\CoreAdminHome\Emails\TrackingFailuresEmail; use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList; use Piwik\Plugins\SitesManager\SitesManager; use Piwik\Scheduler\Schedule\SpecificTime; +use Piwik\Segment; use Piwik\Settings\Storage\Backend\MeasurableSettingsTable; use Piwik\Tracker\Failures; use Piwik\Site; @@ -64,6 +66,8 @@ class Tasks extends \Piwik\Plugin\Tasks // general data purge on invalidated archive records, executed daily $this->daily('purgeInvalidatedArchives', null, self::LOW_PRIORITY); + $this->weekly('purgeOrphanedArchives', null, self::NORMAL_PRIORITY); + // lowest priority since tables should be optimized after they are modified $this->monthly('optimizeArchiveTable', null, self::LOWEST_PRIORITY); @@ -259,6 +263,53 @@ class Tasks extends \Piwik\Plugin\Tasks } /** + * To test execute the following command: + * `./console core:run-scheduled-tasks "Piwik\Plugins\CoreAdminHome\Tasks.purgeOrphanedArchives"` + * + * @throws \Exception + */ + public function purgeOrphanedArchives() + { + $segmentHashesByIdSite = $this->getSegmentHashesByIdSite(); + $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled('numeric'); + + $datesPurged = array(); + foreach ($archiveTables as $table) { + $date = ArchiveTableCreator::getDateFromTableName($table); + list($year, $month) = explode('_', $date); + + $dateObj = Date::factory("$year-$month-15"); + + $this->archivePurger->purgeDeletedSiteArchives($dateObj); + $this->archivePurger->purgeDeletedSegmentArchives($dateObj, $segmentHashesByIdSite); + + $datesPurged[$date] = true; + } + } + + /** + * Get a list of all segment hashes that currently exist, indexed by idSite. + * @return array + */ + public function getSegmentHashesByIdSite() + { + //Get a list of hashes of all segments that exist now + $sql = "SELECT DISTINCT definition, enable_only_idsite FROM " . Common::prefixTable('segment') + . " WHERE deleted = 0"; + $rows = Db::fetchAll($sql); + $segmentHashes = array(); + foreach ($rows as $row) { + $idSite = (int)$row['enable_only_idsite']; + if (! isset($segmentHashes[$idSite])) { + $segmentHashes[$idSite] = array(); + } + $segmentHashes[$idSite][] = Segment::getSegmentHash($row['definition']); + } + + return $segmentHashes; + } + + /** * we should only purge outdated & custom range archives if we know cron archiving has just run, * or if browser triggered archiving is enabled. if cron archiving has run, then we know the latest * archives are in the database, and we can remove temporary ones. if browser triggered archiving is diff --git a/plugins/CoreAdminHome/tests/Integration/TasksTest.php b/plugins/CoreAdminHome/tests/Integration/TasksTest.php index ffab94c01d..446bf66b86 100644 --- a/plugins/CoreAdminHome/tests/Integration/TasksTest.php +++ b/plugins/CoreAdminHome/tests/Integration/TasksTest.php @@ -17,6 +17,9 @@ use Piwik\Plugins\CoreAdminHome\Emails\JsTrackingCodeMissingEmail; use Piwik\Plugins\CoreAdminHome\Emails\TrackingFailuresEmail; use Piwik\Plugins\CoreAdminHome\Tasks; use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList; +use Piwik\Plugins\CustomDimensions\CustomDimensions; +use Piwik\Plugins\CustomDimensions\Dao\Configuration; +use Piwik\Plugins\SegmentEditor\Model; use Piwik\Scheduler\Task; use Piwik\Tests\Fixtures\RawArchiveDataWithTempAndInvalidated; use Piwik\Tests\Framework\Fixture; @@ -131,6 +134,7 @@ class TasksTest extends IntegrationTestCase $expected = [ 'purgeOutdatedArchives.', 'purgeInvalidatedArchives.', + 'purgeOrphanedArchives.', 'optimizeArchiveTable.', 'cleanupTrackingFailures.', 'notifyTrackingFailures.', @@ -215,6 +219,103 @@ class TasksTest extends IntegrationTestCase $this->assertEquals(2, $mail->getNumFailures()); } + public function test_getSegmentHashesByIdSite_emptyWhenNoSegments() + { + $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite(); + $this->assertEquals(array(), $segmentsByIdSite); + } + + public function test_getSegmentHashesByIdSite_allWebsiteAndSiteSpecificSegments() + { + $model = new Model(); + $model->createSegment(array( + 'name' => 'Test Segment 1', + 'definition' => 'continentCode==eur', + 'enable_only_idsite' => 0, + 'deleted' => 0 + )); + $model->createSegment(array( + 'name' => 'Test Segment 2', + 'definition' => 'countryCode==nz', + 'enable_only_idsite' => 0, + 'deleted' => 0 + )); + $model->createSegment(array( + 'name' => 'Test Segment 3', + 'definition' => 'countryCode==au', + 'enable_only_idsite' => 2, + 'deleted' => 0 + )); + + $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite(); + $expected = array( + 0 => array('be90051048558489e1d62f4245a6dc65', 'b92fbb3009b32cf632965802de2fb760'), + 2 => array('cffd4336c22c6782211f853495076b1a') + ); + $this->assertEquals($expected, $segmentsByIdSite); + } + + public function test_getSegmentHashesByIdSite_invalidSegment() + { + $model = new Model(); + $model->createSegment(array( + 'name' => 'Test Segment 4', + 'definition' => 'countryCode=nz', //The single "=" is invalid - we should generate a hash anyway + 'enable_only_idsite' => 0, + 'deleted' => 0 + )); + $model->createSegment(array( + 'name' => 'Test Segment 5', + 'definition' => 'countryCode==au', + 'enable_only_idsite' => 0, + 'deleted' => 0 + )); + + $expected = array( + 0 => array('5ffe7e116fae7576c047b1fb811584a5', 'cffd4336c22c6782211f853495076b1a'), + ); + + $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite(); + $this->assertEquals($expected, $segmentsByIdSite); + } + + public function test_getSegmentHashesByIdSite_siteSpecificCustomDimension() + { + // Insert a custom dimension for idsite = 1 + $configuration = new Configuration(); + $configuration->configureNewDimension( + 1, + 'mydimension', + CustomDimensions::SCOPE_VISIT, + 1, + 1, + array(), + true + ); + + $model = new Model(); + $model->createSegment(array( + 'name' => 'Test Segment 6', + 'definition' => 'mydimension==red', + 'enable_only_idsite' => 1, + 'deleted' => 0 + )); + $model->createSegment(array( + 'name' => 'Test Segment 7', + 'definition' => 'countryCode==au', + 'enable_only_idsite' => 2, + 'deleted' => 0 + )); + + $expected = array( + 1 => array('240d2a84a309debd26bdbaa8eb3d363c'), + 2 => array('cffd4336c22c6782211f853495076b1a') + ); + + $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite(); + $this->assertEquals($expected, $segmentsByIdSite); + } + /** * @param Date[] $dates */ |