diff options
author | Zoltan Flamis <zoltan@innocraft.com> | 2021-04-19 01:18:49 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-19 01:18:49 +0300 |
commit | 7b1b36c46559ee71c144152a0d95cd41e162e1dc (patch) | |
tree | 3223186fdfd371f5f21d16b7e5acbc03e5a206c6 | |
parent | 84b9f9c33ce6402556008c8764a79747f24b5b0f (diff) |
store segment hash in DB (#17408)
* get segment hash
* convert tab indentations to spaces
* Create 4.3.0-b2.php
* update tests and update file
* bump version
* Update 4.3.0-b3.php
* Update ApiTest.php
* fixing urlencode bugs
* cache segment hashes
* update segment caching
* update segment caching
* add segment cache test
* add testdox to phpunit.xml
* revert phunit.xml
* test investigation
* Update phpunit.xml.dist
* update blobreportlimitingt test
* Revert "update blobreportlimitingt test"
This reverts commit 90fe7355cd1d227193f0b73ddf1d715c8b016616.
* Update phpunit.xml.dist
* Update BlobReportLimitingTest.php
* Update BlobReportLimitingTest.php
* Update SystemTestCase.php
* Update BlobReportLimitingTest.php
* Update SystemTestCase.php
* Update phpunit.xml.dist
* modify mem limit for travis
* Update .travis.yml
* revert travis.yml
* Update SystemTestCase.php
* try test without cache
* test witch cache and gc_disabled
* Revert "test witch cache and gc_disabled"
This reverts commit 7e1d37093c7be648a84b57335edf397c89913758.
* test witch cache and gc_disabled
* use other model method
* refactor test
* Workaround error in Overlay when site has no URLs (#17457)
* Set setting value even if set to NULL so it will still be validated.
* Make sure when creating a site that the urls options is set.
* workaround in Overlay for instances that have an invalid site URL set for some reason
* Add integration tests for changes to SitesManager API.
* revert non-overlay changes
* Add warning if site has no URLs when viewing Overlay.
* add more tests for segment caches
* get segment hash
* convert tab indentations to spaces
* Create 4.3.0-b2.php
* update tests and update file
* bump version
* Update 4.3.0-b3.php
* Update ApiTest.php
* fixing urlencode bugs
* cache segment hashes
* update segment caching
* update segment caching
* add segment cache test
* add testdox to phpunit.xml
* revert phunit.xml
* test investigation
* Update phpunit.xml.dist
* update blobreportlimitingt test
* Revert "update blobreportlimitingt test"
This reverts commit 90fe7355cd1d227193f0b73ddf1d715c8b016616.
* Update phpunit.xml.dist
* Update BlobReportLimitingTest.php
* Update BlobReportLimitingTest.php
* Update SystemTestCase.php
* Update BlobReportLimitingTest.php
* Update SystemTestCase.php
* Update phpunit.xml.dist
* modify mem limit for travis
* Update .travis.yml
* revert travis.yml
* Update SystemTestCase.php
* try test without cache
* test witch cache and gc_disabled
* Revert "test witch cache and gc_disabled"
This reverts commit 7e1d37093c7be648a84b57335edf397c89913758.
* test witch cache and gc_disabled
* use other model method
* refactor test
* add more tests for segment caches
* revert phpunit.xml
* revert phpunit.xml
* add more tests
Co-authored-by: dizzy <diosmosis@users.noreply.github.com>
-rw-r--r-- | core/Archive.php | 4 | ||||
-rw-r--r-- | core/CronArchive/QueueConsumer.php | 4 | ||||
-rw-r--r-- | core/Segment.php | 49 | ||||
-rw-r--r-- | core/Updates/4.3.0-b3.php | 53 | ||||
-rw-r--r-- | core/Version.php | 2 | ||||
-rw-r--r-- | plugins/SegmentEditor/API.php | 7 | ||||
-rw-r--r-- | plugins/SegmentEditor/Model.php | 31 | ||||
-rw-r--r-- | plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php | 5 | ||||
-rw-r--r-- | plugins/SegmentEditor/tests/System/ApiTest.php | 33 | ||||
-rw-r--r-- | tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php | 108 | ||||
-rw-r--r-- | tests/PHPUnit/System/BlobReportLimitingTest.php | 4 |
11 files changed, 278 insertions, 22 deletions
diff --git a/core/Archive.php b/core/Archive.php index 0c0651a029..8338d19ec3 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -756,7 +756,7 @@ class Archive implements ArchiveQuery { $periods = $this->params->getPeriods(); $periodLabel = reset($periods)->getLabel(); - + if (Rules::shouldProcessReportsAllPlugins($this->params->getIdSites(), $this->params->getSegment(), $periodLabel)) { return self::ARCHIVE_ALL_PLUGINS_FLAG; } @@ -822,7 +822,7 @@ class Archive implements ArchiveQuery $this->initializeArchiveIdCache($doneFlag); $prepareResult = $coreAdminHomeApi->archiveReports( - $site->getId(), $period->getLabel(), $periodDateStr, $this->params->getSegment()->getString(), + $site->getId(), $period->getLabel(), $periodDateStr, urlencode($this->params->getSegment()->getString()), $plugin, $requestedReport); if (!empty($prepareResult) diff --git a/core/CronArchive/QueueConsumer.php b/core/CronArchive/QueueConsumer.php index ff751abd0d..966e9ea6ca 100644 --- a/core/CronArchive/QueueConsumer.php +++ b/core/CronArchive/QueueConsumer.php @@ -575,7 +575,7 @@ class QueueConsumer $dateStr = $periodLabel == 'range' ? ($invalidatedArchive['date1'] . ',' . $invalidatedArchive['date2']) : $invalidatedArchive['date1']; $period = PeriodFactory::build($periodLabel, $dateStr); - $segment = new Segment($invalidatedArchive['segment'], [$invalidatedArchive['idsite']]); + $segment = new Segment(urlencode($invalidatedArchive['segment']), [$invalidatedArchive['idsite']]); $params = new Parameters($site, $period, $segment); @@ -604,4 +604,4 @@ class QueueConsumer { return $this->idSite; } -}
\ No newline at end of file +} diff --git a/core/Segment.php b/core/Segment.php index 20761ffa17..2d06cb5199 100644 --- a/core/Segment.php +++ b/core/Segment.php @@ -15,6 +15,8 @@ use Piwik\Container\StaticContainer; use Piwik\DataAccess\LogQueryBuilder; use Piwik\Plugins\SegmentEditor\SegmentEditor; use Piwik\Segment\SegmentExpression; +use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel; +use Piwik\Cache; /** * Limits the set of visits Piwik uses when aggregating analytics data. @@ -100,6 +102,9 @@ class Segment */ const SEGMENT_TRUNCATE_LIMIT = 8192; + const CACHE_KEY = 'segmenthashes'; + const SEGMENT_HAS_BUILT_CACHE_KEY ='segmenthashbuilt'; + /** * Constructor. * @@ -460,8 +465,46 @@ class Segment public static function getSegmentHash($definition) { - // urldecode to normalize the string, as browsers may send slightly different payloads for the same archive - return md5(urldecode($definition)); + $cache = Cache::getEagerCache(); + $cacheKey = self::CACHE_KEY . md5($definition); + + if ($cache->contains($cacheKey)) { + return $cache->fetch($cacheKey); + } + + $defaultHash = md5(urldecode($definition)); + + // if the cache for segments already built, but this segment was not found, + // we return the default segment, this can be a segment from url or + // something like "visitorType==new" + if ($cache->contains(self::SEGMENT_HAS_BUILT_CACHE_KEY)) { + return $defaultHash; + } + + // the segment hash is not built yet, let's do it + $model = new SegmentEditorModel(); + $segments = $model->getAllSegmentsAndIgnoreVisibility(); + + foreach ($segments as $segment) { + $cacheKeyTemp = self::CACHE_KEY . md5($segment['definition']); + $cache->save($cacheKeyTemp, $segment['hash']); + + $cacheKeyTemp = self::CACHE_KEY . md5(urldecode($segment['definition'])); + $cache->save($cacheKeyTemp, $segment['hash']); + + $cacheKeyTemp = self::CACHE_KEY . md5(urlencode($segment['definition'])); + $cache->save($cacheKeyTemp, $segment['hash']); + } + + $cache->save(self::SEGMENT_HAS_BUILT_CACHE_KEY, true); + + // if we found the segment, return it's hash, but maybe this + // segment is not stored in the db, return the default + if ($cache->contains($cacheKey)) { + return $cache->fetch($cacheKey); + } + + return $defaultHash; } /** @@ -477,7 +520,7 @@ class Segment * @param int $limit Limit number of result to $limit * @param int $offset Specified the offset of the first row to return * @param bool $forceGroupBy Force the group by and not using a subquery. Note: This may make the query slower see https://github.com/matomo-org/matomo/issues/9200#issuecomment-183641293 - * A $groupBy value needs to be set for this to work. + * A $groupBy value needs to be set for this to work. * @param int If set to value >= 1 then the Select query (and All inner queries) will be LIMIT'ed by this value. * Use only when you're not aggregating or it will sample the data. * @return string The entire select query. diff --git a/core/Updates/4.3.0-b3.php b/core/Updates/4.3.0-b3.php new file mode 100644 index 0000000000..92237e25ca --- /dev/null +++ b/core/Updates/4.3.0-b3.php @@ -0,0 +1,53 @@ +<?php +/** + * Matomo - free/libre analytics platform + * + * @link https://matomo.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + * + */ + +namespace Piwik\Updates; + +use Piwik\Updater; +use Piwik\Updates as PiwikUpdates; +use Piwik\Updater\Migration; +use Piwik\Updater\Migration\Factory as MigrationFactory; +use Piwik\Db; +use Piwik\Common; + +/** + * Update for version 4.3.0-b3. + */ +class Updates_4_3_0_b3 extends PiwikUpdates +{ + /** + * @var MigrationFactory + */ + private $migration; + + public function __construct(MigrationFactory $factory) + { + $this->migration = $factory; + } + + public function getMigrations(Updater $updater) + { + $migrations = []; + + $migrations[] = $this->migration->db->addColumn('segment', 'hash', 'CHAR(32) NULL', 'definition'); + + $segmentTable = Common::prefixTable('segment'); + $segments = Db::fetchAll("SELECT idsegment, definition FROM $segmentTable WHERE deleted = ?", [0]); + foreach ($segments as $segment) { + $hash = md5(urldecode($segment['definition'])); + $migrations[] = $this->migration->db->sql("UPDATE `$segmentTable` SET `hash` = '$hash' WHERE `idsegment` = '{$segment['idsegment']}'"); + } + return $migrations; + } + + public function doUpdate(Updater $updater) + { + $updater->executeMigrations(__FILE__, $this->getMigrations($updater)); + } +} diff --git a/core/Version.php b/core/Version.php index dd5d8de953..e38899c779 100644 --- a/core/Version.php +++ b/core/Version.php @@ -20,7 +20,7 @@ final class Version * The current Matomo version. * @var string */ - const VERSION = '4.3.0-b2'; + const VERSION = '4.3.0-b3'; const MAJOR_VERSION = 4; public function isStableVersion($version) diff --git a/plugins/SegmentEditor/API.php b/plugins/SegmentEditor/API.php index 65758b3c64..49b436c3d2 100644 --- a/plugins/SegmentEditor/API.php +++ b/plugins/SegmentEditor/API.php @@ -23,6 +23,7 @@ use Piwik\Config; use Piwik\Segment; use Piwik\Site; use Psr\Log\LoggerInterface; +use Piwik\Cache; /** * The SegmentEditor API lets you add, update, delete custom Segments, and list saved segments. @@ -222,6 +223,8 @@ class API extends \Piwik\Plugin\API $this->getModel()->deleteSegment($idSegment); + Cache::getEagerCache()->flushAll(); + return true; } @@ -280,6 +283,8 @@ class API extends \Piwik\Plugin\API $this->segmentArchiving->reArchiveSegment($bind); } + Cache::getEagerCache()->flushAll(); + return true; } @@ -316,6 +321,8 @@ class API extends \Piwik\Plugin\API $id = $this->getModel()->createSegment($bind); + Cache::getEagerCache()->flushAll(); + if ($autoArchive && !Rules::isBrowserTriggerEnabled() && $this->processNewSegmentsFrom != SegmentArchiving::CREATION_TIME diff --git a/plugins/SegmentEditor/Model.php b/plugins/SegmentEditor/Model.php index 5dccf40b14..f876d2a46c 100644 --- a/plugins/SegmentEditor/Model.php +++ b/plugins/SegmentEditor/Model.php @@ -225,6 +225,9 @@ class Model { $idSegment = (int) $idSegment; + if (isset($segment['definition'])) { + $segment['hash'] = $this->createHash($segment['definition']); + } $db = $this->getDb(); $db->update($this->getTable(), $segment, "idsegment = $idSegment"); @@ -234,6 +237,8 @@ class Model public function createSegment($segment) { $db = $this->getDb(); + + $segment['hash'] = $this->createHash($segment['definition']); $db->insert($this->getTable(), $segment); $id = $db->lastInsertId(); @@ -258,19 +263,25 @@ class Model return "SELECT * FROM " . $this->getTable() . " WHERE $where ORDER BY name ASC"; } + private function createHash($definition) + { + return md5(urldecode($definition)); + } + public static function install() { $segmentTable = "`idsegment` INT(11) NOT NULL AUTO_INCREMENT, - `name` VARCHAR(255) NOT NULL, - `definition` TEXT NOT NULL, - `login` VARCHAR(100) NOT NULL, - `enable_all_users` tinyint(4) NOT NULL default 0, - `enable_only_idsite` INTEGER(11) NULL, - `auto_archive` tinyint(4) NOT NULL default 0, - `ts_created` TIMESTAMP NULL, - `ts_last_edit` TIMESTAMP NULL, - `deleted` tinyint(4) NOT NULL default 0, - PRIMARY KEY (`idsegment`)"; + `name` VARCHAR(255) NOT NULL, + `definition` TEXT NOT NULL, + `hash` CHAR(32) NULL, + `login` VARCHAR(100) NOT NULL, + `enable_all_users` tinyint(4) NOT NULL default 0, + `enable_only_idsite` INTEGER(11) NULL, + `auto_archive` tinyint(4) NOT NULL default 0, + `ts_created` TIMESTAMP NULL, + `ts_last_edit` TIMESTAMP NULL, + `deleted` tinyint(4) NOT NULL default 0, + PRIMARY KEY (`idsegment`)"; DbHelper::createTable(self::$rawPrefix, $segmentTable); } diff --git a/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php b/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php index ec594117a9..09430ca9be 100644 --- a/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php +++ b/plugins/SegmentEditor/tests/Integration/SegmentEditorTest.php @@ -72,9 +72,10 @@ class SegmentEditorTest extends IntegrationTestCase $segment = API::getInstance()->get($idSegment); unset($segment['ts_created']); $expected = array( - 'idsegment' => 1, + 'idsegment' => '1', 'name' => $name, 'definition' => $definition, + 'hash' => md5($definition), 'login' => 'superUserLogin', 'enable_all_users' => '0', 'enable_only_idsite' => '0', @@ -104,6 +105,7 @@ class SegmentEditorTest extends IntegrationTestCase 'idsegment' => '1', 'name' => $name, 'definition' => $definition, + 'hash' => md5($definition), 'login' => 'superUserLogin', 'enable_all_users' => '1', 'enable_only_idsite' => '1', @@ -145,6 +147,7 @@ class SegmentEditorTest extends IntegrationTestCase 'idsegment' => $idSegment2, 'name' => 'NEW name', 'definition' => 'searches==0', + 'hash' => md5('searches==0'), 'enable_only_idsite' => '0', 'enable_all_users' => '0', 'auto_archive' => '1', diff --git a/plugins/SegmentEditor/tests/System/ApiTest.php b/plugins/SegmentEditor/tests/System/ApiTest.php index 0ae9dea78c..2b8d881490 100644 --- a/plugins/SegmentEditor/tests/System/ApiTest.php +++ b/plugins/SegmentEditor/tests/System/ApiTest.php @@ -62,4 +62,35 @@ class ApiTest extends SystemTestCase $this->assertEquals($segmentApiHash, $segmentDefinitionHash); } -}
\ No newline at end of file + + /** + * @dataProvider definitionsDataProvider + */ + public function test_generatedSegmentHash($definition) + { + Fixture::createWebsite('2020-03-03 00:00:00'); + + Config::getInstance()->General['enable_browser_archiving_triggering'] = 0; + self::$fixture->getTestEnvironment()->overrideConfig('General', 'enable_browser_archiving_triggering', 0); + self::$fixture->getTestEnvironment()->save(); + + $idSegment = SegmentEditorApi::getInstance()->add('test segment', $definition, 1, 1, 1); + $segment = SegmentEditorApi::getInstance()->get($idSegment); + + $hash = $segment['hash']; + $generatedHash = md5(urldecode($segment['definition'])); + + $this->assertEquals($generatedHash, $hash); + } + + public function definitionsDataProvider() + { + return [ + ['pageUrl=@%252F1'], + ['actions>=1'], + ['operatingSystemName==Ubuntu;browserName==Firefox'], + ['pageUrl==https%253A%252F%252Fmatomo.org%252Fpricing%252F'], + ['visitIp>=80.229.0.0;visitIp<=80.229.255.255'], + ]; + } +} diff --git a/tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php b/tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php new file mode 100644 index 0000000000..c715788e7f --- /dev/null +++ b/tests/PHPUnit/Integration/Segment/SegmentsCacheTest.php @@ -0,0 +1,108 @@ +<?php +/** + * Matomo - free/libre analytics platform + * + * @link https://matomo.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + */ + +namespace Piwik\Tests\Integration; + +use Piwik\Cache; +use Piwik\Segment; +use Piwik\Tests\Framework\TestCase\IntegrationTestCase; +use Piwik\Plugins\SegmentEditor\API as SegmentEditorApi; +use Piwik\Plugins\SegmentEditor\Model as SegmentEditorModel; + +/** + * @group Segment + * @group SegmentsCacheTest + */ +class SegmentsCacheTest extends IntegrationTestCase +{ + public function test_segment_hash_cached() + { + $cache = Cache::getEagerCache(); + $model = new SegmentEditorModel(); + + $idSegment = SegmentEditorApi::getInstance()->add('test segment 1', 'browserCode==ff'); + $segment = $model->getSegment($idSegment); + $key1 = Segment::CACHE_KEY . md5($segment['definition']); + + $this->assertFalse($cache->contains($key1)); + + Segment::getSegmentHash('dummy'); + + $this->assertTrue($cache->contains($key1)); + } + + public function test_if_segment_not_found_default_hash_returned() + { + $definition = 'browserCode==ch'; + $defaultHash = md5(urldecode($definition)); + + $this->assertEquals($defaultHash, Segment::getSegmentHash($definition)); + } + + public function test_cashed_hashes_for_similar_segments() + { + $cache = Cache::getEagerCache(); + $model = new SegmentEditorModel(); + + // pageUrl==http://abc.com/d+f + // pageUrl==http://abc.com/d f + // pageUrl==http://abc.com/d%20f + $definitions = [ + 'pageUrl==http%253A%252F%252Fabc.com%252Fd%252Bf', + 'pageUrl==http%253A%252F%252Fabc.com%252Fd%2520f', + 'pageUrl==http%253A%252F%252Fabc.com%252Fd%252520f', + ]; + + $idSegment1 = SegmentEditorApi::getInstance()->add('test segment 1', $definitions[0]); + $idSegment2 = SegmentEditorApi::getInstance()->add('test segment 2', $definitions[1]); + $idSegment3 = SegmentEditorApi::getInstance()->add('test segment 3', $definitions[2]); + $segment1 = $model->getSegment($idSegment1); + $segment2 = $model->getSegment($idSegment2); + $segment3 = $model->getSegment($idSegment3); + $tests = []; + + $tests[$segment1['hash']] = [ + Segment::CACHE_KEY . md5($segment1['definition']), + Segment::CACHE_KEY . md5(urldecode($segment1['definition'])), + Segment::CACHE_KEY . md5(urlencode($segment1['definition'])), + ]; + $tests[$segment2['hash']] = [ + Segment::CACHE_KEY . md5($segment2['definition']), + Segment::CACHE_KEY . md5(urldecode($segment2['definition'])), + Segment::CACHE_KEY . md5(urlencode($segment2['definition'])), + ]; + $tests[$segment3['hash']] = [ + Segment::CACHE_KEY . md5($segment3['definition']), + Segment::CACHE_KEY . md5(urldecode($segment3['definition'])), + Segment::CACHE_KEY . md5(urlencode($segment3['definition'])), + ]; + + Segment::getSegmentHash('dummy'); + + foreach ($tests as $hash => $keys) { + foreach ($keys as $key) { + $this->assertEquals($hash, $cache->fetch($key)); + } + } + } + + public function test_segment_cache_with_operator_characters() + { + $cache = Cache::getEagerCache(); + $model = new SegmentEditorModel(); + + // pageUrl==http://abc.com/=/@/!/,/; + $idSegment = SegmentEditorApi::getInstance()->add('test segment 1', 'pageUrl==http%253A%252F%252Fabc.com%252F%253D%252F%2540%252F!%252F%252C%252F%253B'); + $segment = $model->getSegment($idSegment); + $key1 = Segment::CACHE_KEY . md5($segment['definition']); + + Segment::getSegmentHash('dummy'); + + $this->assertTrue($cache->contains($key1)); + } +} diff --git a/tests/PHPUnit/System/BlobReportLimitingTest.php b/tests/PHPUnit/System/BlobReportLimitingTest.php index 3a092baf6e..9f1f5fa717 100644 --- a/tests/PHPUnit/System/BlobReportLimitingTest.php +++ b/tests/PHPUnit/System/BlobReportLimitingTest.php @@ -222,9 +222,9 @@ class BlobReportLimitingTest extends SystemTestCase $generalConfig['datatable_archiving_maximum_rows_events'] = 3; $generalConfig['datatable_archiving_maximum_rows_subtable_events'] = 2; $generalConfig['archiving_ranking_query_row_limit'] = 50000; - // Should be more than the datatable_archiving_maximum_rows_actions as code will take the max of these two + // Should be more than the datatable_archiving_maximum_rows_actions as code will take the max of these two $generalConfig['datatable_archiving_maximum_rows_site_search'] = 5; } } -BlobReportLimitingTest::$fixture = new ManyVisitsWithMockLocationProvider();
\ No newline at end of file +BlobReportLimitingTest::$fixture = new ManyVisitsWithMockLocationProvider(); |