Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZoltan Flamis <zoltan@innocraft.com>2021-04-19 01:18:49 +0300
committerGitHub <noreply@github.com>2021-04-19 01:18:49 +0300
commit7b1b36c46559ee71c144152a0d95cd41e162e1dc (patch)
tree3223186fdfd371f5f21d16b7e5acbc03e5a206c6
parent84b9f9c33ce6402556008c8764a79747f24b5b0f (diff)
store segment hash in DB (#17408)
* get segment hash * convert tab indentations to spaces * Create 4.3.0-b2.php * update tests and update file * bump version * Update 4.3.0-b3.php * Update ApiTest.php * fixing urlencode bugs * cache segment hashes * update segment caching * update segment caching * add segment cache test * add testdox to phpunit.xml * revert phunit.xml * test investigation * Update phpunit.xml.dist * update blobreportlimitingt test * Revert "update blobreportlimitingt test" This reverts commit 90fe7355cd1d227193f0b73ddf1d715c8b016616. * Update phpunit.xml.dist * Update BlobReportLimitingTest.php * Update BlobReportLimitingTest.php * Update SystemTestCase.php * Update BlobReportLimitingTest.php * Update SystemTestCase.php * Update phpunit.xml.dist * modify mem limit for travis * Update .travis.yml * revert travis.yml * Update SystemTestCase.php * try test without cache * test witch cache and gc_disabled * Revert "test witch cache and gc_disabled" This reverts commit 7e1d37093c7be648a84b57335edf397c89913758. * test witch cache and gc_disabled * use other model method * refactor test * Workaround error in Overlay when site has no URLs (#17457) * Set setting value even if set to NULL so it will still be validated. * Make sure when creating a site that the urls options is set. * workaround in Overlay for instances that have an invalid site URL set for some reason * Add integration tests for changes to SitesManager API. * revert non-overlay changes * Add warning if site has no URLs when viewing Overlay. * add more tests for segment caches * get segment hash * convert tab indentations to spaces * Create 4.3.0-b2.php * update tests and update file * bump version * Update 4.3.0-b3.php * Update ApiTest.php * fixing urlencode bugs * cache segment hashes * update segment caching * update segment caching * add segment cache test * add testdox to phpunit.xml * revert phunit.xml * test investigation * Update phpunit.xml.dist * update blobreportlimitingt test * Revert "update blobreportlimitingt test" This reverts commit 90fe7355cd1d227193f0b73ddf1d715c8b016616. * Update phpunit.xml.dist * Update BlobReportLimitingTest.php * Update BlobReportLimitingTest.php * Update SystemTestCase.php * Update BlobReportLimitingTest.php * Update SystemTestCase.php * Update phpunit.xml.dist * modify mem limit for travis * Update .travis.yml * revert travis.yml * Update SystemTestCase.php * try test without cache * test witch cache and gc_disabled * Revert "test witch cache and gc_disabled" This reverts commit 7e1d37093c7be648a84b57335edf397c89913758. * test witch cache and gc_disabled * use other model method * refactor test * add more tests for segment caches * revert phpunit.xml * revert phpunit.xml * add more tests Co-authored-by: dizzy <diosmosis@users.noreply.github.com>
-rw-r--r--core/Archive.php4
-rw-r--r--core/CronArchive/QueueConsumer.php4
-rw-r--r--core/Segment.php49
-rw-r--r--core/Updates/4.3.0-b3.php53
-rw-r--r--core/Version.php2
-rw-r--r--plugins/SegmentEditor/API.php7
-rw-r--r--plugins/SegmentEditor/Model.php31
-rw-r--r--plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php5
-rw-r--r--plugins/SegmentEditor/tests/System/ApiTest.php33
-rw-r--r--tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php108
-rw-r--r--tests/PHPUnit/System/BlobReportLimitingTest.php4
11 files changed, 278 insertions, 22 deletions
diff --git a/core/Archive.php b/core/Archive.php
index 0c0651a029..8338d19ec3 100644
--- a/core/Archive.php
+++ b/core/Archive.php
@@ -756,7 +756,7 @@ class Archive implements ArchiveQuery
{
$periods = $this->params->getPeriods();
$periodLabel = reset($periods)->getLabel();
-
+
if (Rules::shouldProcessReportsAllPlugins($this->params->getIdSites(), $this->params->getSegment(), $periodLabel)) {
return self::ARCHIVE_ALL_PLUGINS_FLAG;
}
@@ -822,7 +822,7 @@ class Archive implements ArchiveQuery
$this->initializeArchiveIdCache($doneFlag);
$prepareResult = $coreAdminHomeApi->archiveReports(
- $site->getId(), $period->getLabel(), $periodDateStr, $this->params->getSegment()->getString(),
+ $site->getId(), $period->getLabel(), $periodDateStr, urlencode($this->params->getSegment()->getString()),
$plugin, $requestedReport);
if (!empty($prepareResult)
diff --git a/core/CronArchive/QueueConsumer.php b/core/CronArchive/QueueConsumer.php
index ff751abd0d..966e9ea6ca 100644
--- a/core/CronArchive/QueueConsumer.php
+++ b/core/CronArchive/QueueConsumer.php
@@ -575,7 +575,7 @@ class QueueConsumer
$dateStr = $periodLabel == 'range' ? ($invalidatedArchive['date1'] . ',' . $invalidatedArchive['date2']) : $invalidatedArchive['date1'];
$period = PeriodFactory::build($periodLabel, $dateStr);
- $segment = new Segment($invalidatedArchive['segment'], [$invalidatedArchive['idsite']]);
+ $segment = new Segment(urlencode($invalidatedArchive['segment']), [$invalidatedArchive['idsite']]);
$params = new Parameters($site, $period, $segment);
@@ -604,4 +604,4 @@ class QueueConsumer
{
return $this->idSite;
}
-} \ No newline at end of file
+}
diff --git a/core/Segment.php b/core/Segment.php
index 20761ffa17..2d06cb5199 100644
--- a/core/Segment.php
+++ b/core/Segment.php
@@ -15,6 +15,8 @@ use Piwik\Container\StaticContainer;
use Piwik\DataAccess\LogQueryBuilder;
use Piwik\Plugins\SegmentEditor\SegmentEditor;
use Piwik\Segment\SegmentExpression;
+use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel;
+use Piwik\Cache;
/**
* Limits the set of visits Piwik uses when aggregating analytics data.
@@ -100,6 +102,9 @@ class Segment
*/
const SEGMENT_TRUNCATE_LIMIT = 8192;
+ const CACHE_KEY = 'segmenthashes';
+ const SEGMENT_HAS_BUILT_CACHE_KEY ='segmenthashbuilt';
+
/**
* Constructor.
*
@@ -460,8 +465,46 @@ class Segment
public static function getSegmentHash($definition)
{
- // urldecode to normalize the string, as browsers may send slightly different payloads for the same archive
- return md5(urldecode($definition));
+ $cache = Cache::getEagerCache();
+ $cacheKey = self::CACHE_KEY . md5($definition);
+
+ if ($cache->contains($cacheKey)) {
+ return $cache->fetch($cacheKey);
+ }
+
+ $defaultHash = md5(urldecode($definition));
+
+ // if the cache for segments already built, but this segment was not found,
+ // we return the default segment, this can be a segment from url or
+ // something like "visitorType==new"
+ if ($cache->contains(self::SEGMENT_HAS_BUILT_CACHE_KEY)) {
+ return $defaultHash;
+ }
+
+ // the segment hash is not built yet, let's do it
+ $model = new SegmentEditorModel();
+ $segments = $model->getAllSegmentsAndIgnoreVisibility();
+
+ foreach ($segments as $segment) {
+ $cacheKeyTemp = self::CACHE_KEY . md5($segment['definition']);
+ $cache->save($cacheKeyTemp, $segment['hash']);
+
+ $cacheKeyTemp = self::CACHE_KEY . md5(urldecode($segment['definition']));
+ $cache->save($cacheKeyTemp, $segment['hash']);
+
+ $cacheKeyTemp = self::CACHE_KEY . md5(urlencode($segment['definition']));
+ $cache->save($cacheKeyTemp, $segment['hash']);
+ }
+
+ $cache->save(self::SEGMENT_HAS_BUILT_CACHE_KEY, true);
+
+ // if we found the segment, return it's hash, but maybe this
+ // segment is not stored in the db, return the default
+ if ($cache->contains($cacheKey)) {
+ return $cache->fetch($cacheKey);
+ }
+
+ return $defaultHash;
}
/**
@@ -477,7 +520,7 @@ class Segment
* @param int $limit Limit number of result to $limit
* @param int $offset Specified the offset of the first row to return
* @param bool $forceGroupBy Force the group by and not using a subquery. Note: This may make the query slower see https://github.com/matomo-org/matomo/issues/9200#issuecomment-183641293
- * A $groupBy value needs to be set for this to work.
+ * A $groupBy value needs to be set for this to work.
* @param int If set to value >= 1 then the Select query (and All inner queries) will be LIMIT'ed by this value.
* Use only when you're not aggregating or it will sample the data.
* @return string The entire select query.
diff --git a/core/Updates/4.3.0-b3.php b/core/Updates/4.3.0-b3.php
new file mode 100644
index 0000000000..92237e25ca
--- /dev/null
+++ b/core/Updates/4.3.0-b3.php
@@ -0,0 +1,53 @@
+<?php
+/**
+ * Matomo - free/libre analytics platform
+ *
+ * @link https://matomo.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+
+namespace Piwik\Updates;
+
+use Piwik\Updater;
+use Piwik\Updates as PiwikUpdates;
+use Piwik\Updater\Migration;
+use Piwik\Updater\Migration\Factory as MigrationFactory;
+use Piwik\Db;
+use Piwik\Common;
+
+/**
+ * Update for version 4.3.0-b3.
+ */
+class Updates_4_3_0_b3 extends PiwikUpdates
+{
+ /**
+ * @var MigrationFactory
+ */
+ private $migration;
+
+ public function __construct(MigrationFactory $factory)
+ {
+ $this->migration = $factory;
+ }
+
+ public function getMigrations(Updater $updater)
+ {
+ $migrations = [];
+
+ $migrations[] = $this->migration->db->addColumn('segment', 'hash', 'CHAR(32) NULL', 'definition');
+
+ $segmentTable = Common::prefixTable('segment');
+ $segments = Db::fetchAll("SELECT idsegment, definition FROM $segmentTable WHERE deleted = ?", [0]);
+ foreach ($segments as $segment) {
+ $hash = md5(urldecode($segment['definition']));
+ $migrations[] = $this->migration->db->sql("UPDATE `$segmentTable` SET `hash` = '$hash' WHERE `idsegment` = '{$segment['idsegment']}'");
+ }
+ return $migrations;
+ }
+
+ public function doUpdate(Updater $updater)
+ {
+ $updater->executeMigrations(__FILE__, $this->getMigrations($updater));
+ }
+}
diff --git a/core/Version.php b/core/Version.php
index dd5d8de953..e38899c779 100644
--- a/core/Version.php
+++ b/core/Version.php
@@ -20,7 +20,7 @@ final class Version
* The current Matomo version.
* @var string
*/
- const VERSION = '4.3.0-b2';
+ const VERSION = '4.3.0-b3';
const MAJOR_VERSION = 4;
public function isStableVersion($version)
diff --git a/plugins/SegmentEditor/API.php b/plugins/SegmentEditor/API.php
index 65758b3c64..49b436c3d2 100644
--- a/plugins/SegmentEditor/API.php
+++ b/plugins/SegmentEditor/API.php
@@ -23,6 +23,7 @@ use Piwik\Config;
use Piwik\Segment;
use Piwik\Site;
use Psr\Log\LoggerInterface;
+use Piwik\Cache;
/**
* The SegmentEditor API lets you add, update, delete custom Segments, and list saved segments.
@@ -222,6 +223,8 @@ class API extends \Piwik\Plugin\API
$this->getModel()->deleteSegment($idSegment);
+ Cache::getEagerCache()->flushAll();
+
return true;
}
@@ -280,6 +283,8 @@ class API extends \Piwik\Plugin\API
$this->segmentArchiving->reArchiveSegment($bind);
}
+ Cache::getEagerCache()->flushAll();
+
return true;
}
@@ -316,6 +321,8 @@ class API extends \Piwik\Plugin\API
$id = $this->getModel()->createSegment($bind);
+ Cache::getEagerCache()->flushAll();
+
if ($autoArchive
&& !Rules::isBrowserTriggerEnabled()
&& $this->processNewSegmentsFrom != SegmentArchiving::CREATION_TIME
diff --git a/plugins/SegmentEditor/Model.php b/plugins/SegmentEditor/Model.php
index 5dccf40b14..f876d2a46c 100644
--- a/plugins/SegmentEditor/Model.php
+++ b/plugins/SegmentEditor/Model.php
@@ -225,6 +225,9 @@ class Model
{
$idSegment = (int) $idSegment;
+ if (isset($segment['definition'])) {
+ $segment['hash'] = $this->createHash($segment['definition']);
+ }
$db = $this->getDb();
$db->update($this->getTable(), $segment, "idsegment = $idSegment");
@@ -234,6 +237,8 @@ class Model
public function createSegment($segment)
{
$db = $this->getDb();
+
+ $segment['hash'] = $this->createHash($segment['definition']);
$db->insert($this->getTable(), $segment);
$id = $db->lastInsertId();
@@ -258,19 +263,25 @@ class Model
return "SELECT * FROM " . $this->getTable() . " WHERE $where ORDER BY name ASC";
}
+ private function createHash($definition)
+ {
+ return md5(urldecode($definition));
+ }
+
public static function install()
{
$segmentTable = "`idsegment` INT(11) NOT NULL AUTO_INCREMENT,
- `name` VARCHAR(255) NOT NULL,
- `definition` TEXT NOT NULL,
- `login` VARCHAR(100) NOT NULL,
- `enable_all_users` tinyint(4) NOT NULL default 0,
- `enable_only_idsite` INTEGER(11) NULL,
- `auto_archive` tinyint(4) NOT NULL default 0,
- `ts_created` TIMESTAMP NULL,
- `ts_last_edit` TIMESTAMP NULL,
- `deleted` tinyint(4) NOT NULL default 0,
- PRIMARY KEY (`idsegment`)";
+ `name` VARCHAR(255) NOT NULL,
+ `definition` TEXT NOT NULL,
+ `hash` CHAR(32) NULL,
+ `login` VARCHAR(100) NOT NULL,
+ `enable_all_users` tinyint(4) NOT NULL default 0,
+ `enable_only_idsite` INTEGER(11) NULL,
+ `auto_archive` tinyint(4) NOT NULL default 0,
+ `ts_created` TIMESTAMP NULL,
+ `ts_last_edit` TIMESTAMP NULL,
+ `deleted` tinyint(4) NOT NULL default 0,
+ PRIMARY KEY (`idsegment`)";
DbHelper::createTable(self::$rawPrefix, $segmentTable);
}
diff --git a/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php b/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php
index ec594117a9..09430ca9be 100644
--- a/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php
+++ b/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php
@@ -72,9 +72,10 @@ class SegmentEditorTest extends IntegrationTestCase
$segment = API::getInstance()->get($idSegment);
unset($segment['ts_created']);
$expected = array(
- 'idsegment' => 1,
+ 'idsegment' => '1',
'name' => $name,
'definition' => $definition,
+ 'hash' => md5($definition),
'login' => 'superUserLogin',
'enable_all_users' => '0',
'enable_only_idsite' => '0',
@@ -104,6 +105,7 @@ class SegmentEditorTest extends IntegrationTestCase
'idsegment' => '1',
'name' => $name,
'definition' => $definition,
+ 'hash' => md5($definition),
'login' => 'superUserLogin',
'enable_all_users' => '1',
'enable_only_idsite' => '1',
@@ -145,6 +147,7 @@ class SegmentEditorTest extends IntegrationTestCase
'idsegment' => $idSegment2,
'name' => 'NEW name',
'definition' => 'searches==0',
+ 'hash' => md5('searches==0'),
'enable_only_idsite' => '0',
'enable_all_users' => '0',
'auto_archive' => '1',
diff --git a/plugins/SegmentEditor/tests/System/ApiTest.php b/plugins/SegmentEditor/tests/System/ApiTest.php
index 0ae9dea78c..2b8d881490 100644
--- a/plugins/SegmentEditor/tests/System/ApiTest.php
+++ b/plugins/SegmentEditor/tests/System/ApiTest.php
@@ -62,4 +62,35 @@ class ApiTest extends SystemTestCase
$this->assertEquals($segmentApiHash, $segmentDefinitionHash);
}
-} \ No newline at end of file
+
+ /**
+ * @dataProvider definitionsDataProvider
+ */
+ public function test_generatedSegmentHash($definition)
+ {
+ Fixture::createWebsite('2020-03-03 00:00:00');
+
+ Config::getInstance()->General['enable_browser_archiving_triggering'] = 0;
+ self::$fixture->getTestEnvironment()->overrideConfig('General', 'enable_browser_archiving_triggering', 0);
+ self::$fixture->getTestEnvironment()->save();
+
+ $idSegment = SegmentEditorApi::getInstance()->add('test segment', $definition, 1, 1, 1);
+ $segment = SegmentEditorApi::getInstance()->get($idSegment);
+
+ $hash = $segment['hash'];
+ $generatedHash = md5(urldecode($segment['definition']));
+
+ $this->assertEquals($generatedHash, $hash);
+ }
+
+ public function definitionsDataProvider()
+ {
+ return [
+ ['pageUrl=@%252F1'],
+ ['actions>=1'],
+ ['operatingSystemName==Ubuntu;browserName==Firefox'],
+ ['pageUrl==https%253A%252F%252Fmatomo.org%252Fpricing%252F'],
+ ['visitIp>=80.229.0.0;visitIp<=80.229.255.255'],
+ ];
+ }
+}
diff --git a/tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php b/tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php
new file mode 100644
index 0000000000..c715788e7f
--- /dev/null
+++ b/tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php
@@ -0,0 +1,108 @@
+<?php
+/**
+ * Matomo - free/libre analytics platform
+ *
+ * @link https://matomo.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ */
+
+namespace Piwik\Tests\Integration;
+
+use Piwik\Cache;
+use Piwik\Segment;
+use Piwik\Tests\Framework\TestCase\IntegrationTestCase;
+use Piwik\Plugins\SegmentEditor\API as SegmentEditorApi;
+use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel;
+
+/**
+ * @group Segment
+ * @group SegmentsCacheTest
+ */
+class SegmentsCacheTest extends IntegrationTestCase
+{
+ public function test_segment_hash_cached()
+ {
+ $cache = Cache::getEagerCache();
+ $model = new SegmentEditorModel();
+
+ $idSegment = SegmentEditorApi::getInstance()->add('test segment 1', 'browserCode==ff');
+ $segment = $model->getSegment($idSegment);
+ $key1 = Segment::CACHE_KEY . md5($segment['definition']);
+
+ $this->assertFalse($cache->contains($key1));
+
+ Segment::getSegmentHash('dummy');
+
+ $this->assertTrue($cache->contains($key1));
+ }
+
+ public function test_if_segment_not_found_default_hash_returned()
+ {
+ $definition = 'browserCode==ch';
+ $defaultHash = md5(urldecode($definition));
+
+ $this->assertEquals($defaultHash, Segment::getSegmentHash($definition));
+ }
+
+ public function test_cashed_hashes_for_similar_segments()
+ {
+ $cache = Cache::getEagerCache();
+ $model = new SegmentEditorModel();
+
+ // pageUrl==http://abc.com/d+f
+ // pageUrl==http://abc.com/d f
+ // pageUrl==http://abc.com/d%20f
+ $definitions = [
+ 'pageUrl==http%253A%252F%252Fabc.com%252Fd%252Bf',
+ 'pageUrl==http%253A%252F%252Fabc.com%252Fd%2520f',
+ 'pageUrl==http%253A%252F%252Fabc.com%252Fd%252520f',
+ ];
+
+ $idSegment1 = SegmentEditorApi::getInstance()->add('test segment 1', $definitions[0]);
+ $idSegment2 = SegmentEditorApi::getInstance()->add('test segment 2', $definitions[1]);
+ $idSegment3 = SegmentEditorApi::getInstance()->add('test segment 3', $definitions[2]);
+ $segment1 = $model->getSegment($idSegment1);
+ $segment2 = $model->getSegment($idSegment2);
+ $segment3 = $model->getSegment($idSegment3);
+ $tests = [];
+
+ $tests[$segment1['hash']] = [
+ Segment::CACHE_KEY . md5($segment1['definition']),
+ Segment::CACHE_KEY . md5(urldecode($segment1['definition'])),
+ Segment::CACHE_KEY . md5(urlencode($segment1['definition'])),
+ ];
+ $tests[$segment2['hash']] = [
+ Segment::CACHE_KEY . md5($segment2['definition']),
+ Segment::CACHE_KEY . md5(urldecode($segment2['definition'])),
+ Segment::CACHE_KEY . md5(urlencode($segment2['definition'])),
+ ];
+ $tests[$segment3['hash']] = [
+ Segment::CACHE_KEY . md5($segment3['definition']),
+ Segment::CACHE_KEY . md5(urldecode($segment3['definition'])),
+ Segment::CACHE_KEY . md5(urlencode($segment3['definition'])),
+ ];
+
+ Segment::getSegmentHash('dummy');
+
+ foreach ($tests as $hash => $keys) {
+ foreach ($keys as $key) {
+ $this->assertEquals($hash, $cache->fetch($key));
+ }
+ }
+ }
+
+ public function test_segment_cache_with_operator_characters()
+ {
+ $cache = Cache::getEagerCache();
+ $model = new SegmentEditorModel();
+
+ // pageUrl==http://abc.com/=/@/!/,/;
+ $idSegment = SegmentEditorApi::getInstance()->add('test segment 1', 'pageUrl==http%253A%252F%252Fabc.com%252F%253D%252F%2540%252F!%252F%252C%252F%253B');
+ $segment = $model->getSegment($idSegment);
+ $key1 = Segment::CACHE_KEY . md5($segment['definition']);
+
+ Segment::getSegmentHash('dummy');
+
+ $this->assertTrue($cache->contains($key1));
+ }
+}
diff --git a/tests/PHPUnit/System/BlobReportLimitingTest.php b/tests/PHPUnit/System/BlobReportLimitingTest.php
index 3a092baf6e..9f1f5fa717 100644
--- a/tests/PHPUnit/System/BlobReportLimitingTest.php
+++ b/tests/PHPUnit/System/BlobReportLimitingTest.php
@@ -222,9 +222,9 @@ class BlobReportLimitingTest extends SystemTestCase
$generalConfig['datatable_archiving_maximum_rows_events'] = 3;
$generalConfig['datatable_archiving_maximum_rows_subtable_events'] = 2;
$generalConfig['archiving_ranking_query_row_limit'] = 50000;
- // Should be more than the datatable_archiving_maximum_rows_actions as code will take the max of these two
+ // Should be more than the datatable_archiving_maximum_rows_actions as code will take the max of these two
$generalConfig['datatable_archiving_maximum_rows_site_search'] = 5;
}
}
-BlobReportLimitingTest::$fixture = new ManyVisitsWithMockLocationProvider(); \ No newline at end of file
+BlobReportLimitingTest::$fixture = new ManyVisitsWithMockLocationProvider();