Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKate Butler <kate@innocraft.com>2019-05-03 03:33:59 +0300
committerThomas Steur <tsteur@users.noreply.github.com>2019-05-03 03:33:59 +0300
commitb00014475cd0a53c1d7bb6452b04605b7fdc34d0 (patch)
tree91c7d08a6c774dbc7d06153950de2ff383cd5a13 /plugins
parent8f1f8ec8bd9a5a4cd651fdcb0a50081d58cd38ab (diff)
Task to purge archives for deleted websites and segments (#14317)
* Purge archives for deleted sites and segments * Purge archives for deleted sites and segments * Add new purgeOrphanedArchives task to expected list * Fix build * PR improvements * Fix consistency of method names * Fix typo * Unit tests for getSegmentHashesByIdSite * PR changes * add note on how to test the command * minor tweak to make sure no injections are possible
Diffstat (limited to 'plugins')
-rw-r--r--plugins/CoreAdminHome/Tasks.php51
-rw-r--r--plugins/CoreAdminHome/tests/Integration/TasksTest.php101
2 files changed, 152 insertions, 0 deletions
diff --git a/plugins/CoreAdminHome/Tasks.php b/plugins/CoreAdminHome/Tasks.php
index 5d8dadb3a5..136dfdf7e0 100644
--- a/plugins/CoreAdminHome/Tasks.php
+++ b/plugins/CoreAdminHome/Tasks.php
@@ -11,6 +11,7 @@ namespace Piwik\Plugins\CoreAdminHome;
use Piwik\API\Request;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Archive\ArchivePurger;
+use Piwik\Common;
use Piwik\Config;
use Piwik\Container\StaticContainer;
use Piwik\DataAccess\ArchiveTableCreator;
@@ -24,6 +25,7 @@ use Piwik\Plugins\CoreAdminHome\Emails\TrackingFailuresEmail;
use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList;
use Piwik\Plugins\SitesManager\SitesManager;
use Piwik\Scheduler\Schedule\SpecificTime;
+use Piwik\Segment;
use Piwik\Settings\Storage\Backend\MeasurableSettingsTable;
use Piwik\Tracker\Failures;
use Piwik\Site;
@@ -64,6 +66,8 @@ class Tasks extends \Piwik\Plugin\Tasks
// general data purge on invalidated archive records, executed daily
$this->daily('purgeInvalidatedArchives', null, self::LOW_PRIORITY);
+ $this->weekly('purgeOrphanedArchives', null, self::NORMAL_PRIORITY);
+
// lowest priority since tables should be optimized after they are modified
$this->monthly('optimizeArchiveTable', null, self::LOWEST_PRIORITY);
@@ -259,6 +263,53 @@ class Tasks extends \Piwik\Plugin\Tasks
}
/**
+ * To test execute the following command:
+ * `./console core:run-scheduled-tasks "Piwik\Plugins\CoreAdminHome\Tasks.purgeOrphanedArchives"`
+ *
+ * @throws \Exception
+ */
+ public function purgeOrphanedArchives()
+ {
+ $segmentHashesByIdSite = $this->getSegmentHashesByIdSite();
+ $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled('numeric');
+
+ $datesPurged = array();
+ foreach ($archiveTables as $table) {
+ $date = ArchiveTableCreator::getDateFromTableName($table);
+ list($year, $month) = explode('_', $date);
+
+ $dateObj = Date::factory("$year-$month-15");
+
+ $this->archivePurger->purgeDeletedSiteArchives($dateObj);
+ $this->archivePurger->purgeDeletedSegmentArchives($dateObj, $segmentHashesByIdSite);
+
+ $datesPurged[$date] = true;
+ }
+ }
+
+ /**
+ * Get a list of all segment hashes that currently exist, indexed by idSite.
+ * @return array
+ */
+ public function getSegmentHashesByIdSite()
+ {
+ //Get a list of hashes of all segments that exist now
+ $sql = "SELECT DISTINCT definition, enable_only_idsite FROM " . Common::prefixTable('segment')
+ . " WHERE deleted = 0";
+ $rows = Db::fetchAll($sql);
+ $segmentHashes = array();
+ foreach ($rows as $row) {
+ $idSite = (int)$row['enable_only_idsite'];
+ if (! isset($segmentHashes[$idSite])) {
+ $segmentHashes[$idSite] = array();
+ }
+ $segmentHashes[$idSite][] = Segment::getSegmentHash($row['definition']);
+ }
+
+ return $segmentHashes;
+ }
+
+ /**
* we should only purge outdated & custom range archives if we know cron archiving has just run,
* or if browser triggered archiving is enabled. if cron archiving has run, then we know the latest
* archives are in the database, and we can remove temporary ones. if browser triggered archiving is
diff --git a/plugins/CoreAdminHome/tests/Integration/TasksTest.php b/plugins/CoreAdminHome/tests/Integration/TasksTest.php
index ffab94c01d..446bf66b86 100644
--- a/plugins/CoreAdminHome/tests/Integration/TasksTest.php
+++ b/plugins/CoreAdminHome/tests/Integration/TasksTest.php
@@ -17,6 +17,9 @@ use Piwik\Plugins\CoreAdminHome\Emails\JsTrackingCodeMissingEmail;
use Piwik\Plugins\CoreAdminHome\Emails\TrackingFailuresEmail;
use Piwik\Plugins\CoreAdminHome\Tasks;
use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList;
+use Piwik\Plugins\CustomDimensions\CustomDimensions;
+use Piwik\Plugins\CustomDimensions\Dao\Configuration;
+use Piwik\Plugins\SegmentEditor\Model;
use Piwik\Scheduler\Task;
use Piwik\Tests\Fixtures\RawArchiveDataWithTempAndInvalidated;
use Piwik\Tests\Framework\Fixture;
@@ -131,6 +134,7 @@ class TasksTest extends IntegrationTestCase
$expected = [
'purgeOutdatedArchives.',
'purgeInvalidatedArchives.',
+ 'purgeOrphanedArchives.',
'optimizeArchiveTable.',
'cleanupTrackingFailures.',
'notifyTrackingFailures.',
@@ -215,6 +219,103 @@ class TasksTest extends IntegrationTestCase
$this->assertEquals(2, $mail->getNumFailures());
}
+ public function test_getSegmentHashesByIdSite_emptyWhenNoSegments()
+ {
+ $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite();
+ $this->assertEquals(array(), $segmentsByIdSite);
+ }
+
+ public function test_getSegmentHashesByIdSite_allWebsiteAndSiteSpecificSegments()
+ {
+ $model = new Model();
+ $model->createSegment(array(
+ 'name' => 'Test Segment 1',
+ 'definition' => 'continentCode==eur',
+ 'enable_only_idsite' => 0,
+ 'deleted' => 0
+ ));
+ $model->createSegment(array(
+ 'name' => 'Test Segment 2',
+ 'definition' => 'countryCode==nz',
+ 'enable_only_idsite' => 0,
+ 'deleted' => 0
+ ));
+ $model->createSegment(array(
+ 'name' => 'Test Segment 3',
+ 'definition' => 'countryCode==au',
+ 'enable_only_idsite' => 2,
+ 'deleted' => 0
+ ));
+
+ $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite();
+ $expected = array(
+ 0 => array('be90051048558489e1d62f4245a6dc65', 'b92fbb3009b32cf632965802de2fb760'),
+ 2 => array('cffd4336c22c6782211f853495076b1a')
+ );
+ $this->assertEquals($expected, $segmentsByIdSite);
+ }
+
+ public function test_getSegmentHashesByIdSite_invalidSegment()
+ {
+ $model = new Model();
+ $model->createSegment(array(
+ 'name' => 'Test Segment 4',
+ 'definition' => 'countryCode=nz', //The single "=" is invalid - we should generate a hash anyway
+ 'enable_only_idsite' => 0,
+ 'deleted' => 0
+ ));
+ $model->createSegment(array(
+ 'name' => 'Test Segment 5',
+ 'definition' => 'countryCode==au',
+ 'enable_only_idsite' => 0,
+ 'deleted' => 0
+ ));
+
+ $expected = array(
+ 0 => array('5ffe7e116fae7576c047b1fb811584a5', 'cffd4336c22c6782211f853495076b1a'),
+ );
+
+ $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite();
+ $this->assertEquals($expected, $segmentsByIdSite);
+ }
+
+ public function test_getSegmentHashesByIdSite_siteSpecificCustomDimension()
+ {
+ // Insert a custom dimension for idsite = 1
+ $configuration = new Configuration();
+ $configuration->configureNewDimension(
+ 1,
+ 'mydimension',
+ CustomDimensions::SCOPE_VISIT,
+ 1,
+ 1,
+ array(),
+ true
+ );
+
+ $model = new Model();
+ $model->createSegment(array(
+ 'name' => 'Test Segment 6',
+ 'definition' => 'mydimension==red',
+ 'enable_only_idsite' => 1,
+ 'deleted' => 0
+ ));
+ $model->createSegment(array(
+ 'name' => 'Test Segment 7',
+ 'definition' => 'countryCode==au',
+ 'enable_only_idsite' => 2,
+ 'deleted' => 0
+ ));
+
+ $expected = array(
+ 1 => array('240d2a84a309debd26bdbaa8eb3d363c'),
+ 2 => array('cffd4336c22c6782211f853495076b1a')
+ );
+
+ $segmentsByIdSite = $this->tasks->getSegmentHashesByIdSite();
+ $this->assertEquals($expected, $segmentsByIdSite);
+ }
+
/**
* @param Date[] $dates
*/