diff options
author | Kate Butler <kate@innocraft.com> | 2019-05-03 03:33:59 +0300 |
---|---|---|
committer | Thomas Steur <tsteur@users.noreply.github.com> | 2019-05-03 03:33:59 +0300 |
commit | b00014475cd0a53c1d7bb6452b04605b7fdc34d0 (patch) | |
tree | 91c7d08a6c774dbc7d06153950de2ff383cd5a13 /plugins/CoreAdminHome/Tasks.php | |
parent | 8f1f8ec8bd9a5a4cd651fdcb0a50081d58cd38ab (diff) |
Task to purge archives for deleted websites and segments (#14317)
* Purge archives for deleted sites and segments
* Purge archives for deleted sites and segments
* Add new purgeOrphanedArchives task to expected list
* Fix build
* PR improvements
* Fix consistency of method names
* Fix typo
* Unit tests for getSegmentHashesByIdSite
* PR changes
* add note on how to test the command
* minor tweak to make sure no injections are possible
Diffstat (limited to 'plugins/CoreAdminHome/Tasks.php')
-rw-r--r-- | plugins/CoreAdminHome/Tasks.php | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/plugins/CoreAdminHome/Tasks.php b/plugins/CoreAdminHome/Tasks.php index 5d8dadb3a5..136dfdf7e0 100644 --- a/plugins/CoreAdminHome/Tasks.php +++ b/plugins/CoreAdminHome/Tasks.php @@ -11,6 +11,7 @@ namespace Piwik\Plugins\CoreAdminHome; use Piwik\API\Request; use Piwik\ArchiveProcessor\Rules; use Piwik\Archive\ArchivePurger; +use Piwik\Common; use Piwik\Config; use Piwik\Container\StaticContainer; use Piwik\DataAccess\ArchiveTableCreator; @@ -24,6 +25,7 @@ use Piwik\Plugins\CoreAdminHome\Emails\TrackingFailuresEmail; use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList; use Piwik\Plugins\SitesManager\SitesManager; use Piwik\Scheduler\Schedule\SpecificTime; +use Piwik\Segment; use Piwik\Settings\Storage\Backend\MeasurableSettingsTable; use Piwik\Tracker\Failures; use Piwik\Site; @@ -64,6 +66,8 @@ class Tasks extends \Piwik\Plugin\Tasks // general data purge on invalidated archive records, executed daily $this->daily('purgeInvalidatedArchives', null, self::LOW_PRIORITY); + $this->weekly('purgeOrphanedArchives', null, self::NORMAL_PRIORITY); + // lowest priority since tables should be optimized after they are modified $this->monthly('optimizeArchiveTable', null, self::LOWEST_PRIORITY); @@ -259,6 +263,53 @@ class Tasks extends \Piwik\Plugin\Tasks } /** + * To test execute the following command: + * `./console core:run-scheduled-tasks "Piwik\Plugins\CoreAdminHome\Tasks.purgeOrphanedArchives"` + * + * @throws \Exception + */ + public function purgeOrphanedArchives() + { + $segmentHashesByIdSite = $this->getSegmentHashesByIdSite(); + $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled('numeric'); + + $datesPurged = array(); + foreach ($archiveTables as $table) { + $date = ArchiveTableCreator::getDateFromTableName($table); + list($year, $month) = explode('_', $date); + + $dateObj = Date::factory("$year-$month-15"); + + $this->archivePurger->purgeDeletedSiteArchives($dateObj); + $this->archivePurger->purgeDeletedSegmentArchives($dateObj, $segmentHashesByIdSite); + + $datesPurged[$date] = true; + } + } + + /** + * Get a list of all segment hashes that currently exist, indexed by idSite. + * @return array + */ + public function getSegmentHashesByIdSite() + { + //Get a list of hashes of all segments that exist now + $sql = "SELECT DISTINCT definition, enable_only_idsite FROM " . Common::prefixTable('segment') + . " WHERE deleted = 0"; + $rows = Db::fetchAll($sql); + $segmentHashes = array(); + foreach ($rows as $row) { + $idSite = (int)$row['enable_only_idsite']; + if (! isset($segmentHashes[$idSite])) { + $segmentHashes[$idSite] = array(); + } + $segmentHashes[$idSite][] = Segment::getSegmentHash($row['definition']); + } + + return $segmentHashes; + } + + /** * we should only purge outdated & custom range archives if we know cron archiving has just run, * or if browser triggered archiving is enabled. if cron archiving has run, then we know the latest * archives are in the database, and we can remove temporary ones. if browser triggered archiving is |