diff options
author | Matthieu Aubry <matt@piwik.org> | 2015-09-29 13:32:37 +0300 |
---|---|---|
committer | Matthieu Aubry <matt@piwik.org> | 2015-09-29 13:32:37 +0300 |
commit | 5ce6703c239817eaca67259586e4cadd3f66974e (patch) | |
tree | bdb0a465811945cca28e5b8f6ba094120614b9f1 | |
parent | a581017150bbc11470d476ab2e8818ba96e944fe (diff) | |
parent | 3b71fe2782cb8540c42761a0651e8776a5c34da2 (diff) |
Merge pull request #8861 from piwik/8850
Caching id actions in general cache for faster segmented archive SQL queries
-rw-r--r-- | config/global.ini.php | 8 | ||||
-rw-r--r-- | core/Segment.php | 37 | ||||
-rw-r--r-- | core/Segment/SegmentExpression.php | 13 | ||||
-rw-r--r-- | core/Tracker/TableLogAction.php | 8 | ||||
-rw-r--r-- | core/Tracker/TableLogAction/Cache.php | 172 | ||||
-rw-r--r-- | tests/PHPUnit/Integration/SegmentTest.php | 222 | ||||
-rw-r--r-- | tests/PHPUnit/Unit/Segment/SegmentExpressionTest.php | 4 |
7 files changed, 430 insertions, 34 deletions
diff --git a/config/global.ini.php b/config/global.ini.php index c3afdb93ef..2b86a1c131 100644 --- a/config/global.ini.php +++ b/config/global.ini.php @@ -157,6 +157,14 @@ enable_processing_unique_visitors_multiple_sites = 0 enabled_periods_UI = "day,week,month,year,range" enabled_periods_API = "day,week,month,year,range" +; whether to enable subquery cache for Custom Segment archiving queries +enable_segments_subquery_cache = 0 +; Any segment subquery that matches more than segments_subquery_cache_limit IDs will not be cached, +; and the original subquery executed instead. +segments_subquery_cache_limit = 100000 +; TTL: Time to live for cache files, in seconds. Default to 60 minutes +segments_subquery_cache_ttl = 3600 + ; when set to 1, all requests to Piwik will return a maintenance message without connecting to the DB ; this is useful when upgrading using the shell command, to prevent other users from accessing the UI while Upgrade is in progress maintenance_mode = 0 diff --git a/core/Segment.php b/core/Segment.php index 2a64f705af..451afdb393 100644 --- a/core/Segment.php +++ b/core/Segment.php @@ -177,30 +177,31 @@ class Segment throw new NoAccessException("You do not have enough permission to access the segment " . $name); } - if ($matchType != SegmentExpression::MATCH_IS_NOT_NULL_NOR_EMPTY - && $matchType != SegmentExpression::MATCH_IS_NULL_OR_EMPTY) { - if (isset($segment['sqlFilterValue'])) { - $value = call_user_func($segment['sqlFilterValue'], $value); - } - - // apply presentation filter - if (isset($segment['sqlFilter'])) { - $value = call_user_func($segment['sqlFilter'], $value, $segment['sqlSegment'], $matchType, $name); + if ($matchType == SegmentExpression::MATCH_IS_NOT_NULL_NOR_EMPTY + || $matchType == SegmentExpression::MATCH_IS_NULL_OR_EMPTY) { + break; + } - if(is_null($value)) { // null is returned in TableLogAction::getIdActionFromSegment() - return array(null, $matchType, null); - } + if (isset($segment['sqlFilterValue'])) { + $value = call_user_func($segment['sqlFilterValue'], $value); + } - // sqlFilter-callbacks might return arrays for more complex cases - // e.g. see TableLogAction::getIdActionFromSegment() - if (is_array($value) && isset($value['SQL'])) { - // Special case: returned value is a sub sql expression! - $matchType = SegmentExpression::MATCH_ACTIONS_CONTAINS; - } + // apply presentation filter + if (isset($segment['sqlFilter'])) { + $value = call_user_func($segment['sqlFilter'], $value, $segment['sqlSegment'], $matchType, $name); + if(is_null($value)) { // null is returned in TableLogAction::getIdActionFromSegment() + return array(null, $matchType, null); + } + // sqlFilter-callbacks might return arrays for more complex cases + // e.g. see TableLogAction::getIdActionFromSegment() + if (is_array($value) && isset($value['SQL'])) { + // Special case: returned value is a sub sql expression! + $matchType = SegmentExpression::MATCH_ACTIONS_CONTAINS; } } + break; } diff --git a/core/Segment/SegmentExpression.php b/core/Segment/SegmentExpression.php index a6aecc6c29..7eac1bcdb0 100644 --- a/core/Segment/SegmentExpression.php +++ b/core/Segment/SegmentExpression.php @@ -192,6 +192,12 @@ class SegmentExpression // eg. pageUrl!=DoesNotExist // Not equal to NULL means it matches all rows $sqlExpression = self::SQL_WHERE_MATCHES_ALL_ROWS; + } elseif($matchType == self::MATCH_CONTAINS + || $matchType == self::MATCH_DOES_NOT_CONTAIN) { + // no action was found for CONTAINS / DOES NOT CONTAIN + // eg. pageUrl=@DoesNotExist -> matches no row + // eg. pageUrl!@DoesNotExist -> matches no rows + $sqlExpression = self::SQL_WHERE_DO_NOT_MATCH_ANY_ROW; } else { // it is not expected to reach this code path throw new Exception("Unexpected match type $matchType for your segment. " . @@ -280,8 +286,13 @@ class SegmentExpression } $sqlExpressions[] = $sqlExpression; + if ($value !== null) { - $values[] = $value; + if(is_array($value)) { + $values = array_merge($values, $value); + } else { + $values[] = $value; + } } $this->checkFieldIsAvailable($field, $availableTables); diff --git a/core/Tracker/TableLogAction.php b/core/Tracker/TableLogAction.php index d78240cb25..e54303df6d 100644 --- a/core/Tracker/TableLogAction.php +++ b/core/Tracker/TableLogAction.php @@ -10,6 +10,7 @@ namespace Piwik\Tracker; use Piwik\Common; +use Piwik\Config; use Piwik\Segment\SegmentExpression; /** @@ -188,11 +189,8 @@ class TableLogAction // special case $sql = TableLogAction::getSelectQueryWhereNameContains($matchType, $actionType); - return array( - // mark that the returned value is an sql-expression instead of a literal value - 'SQL' => $sql, - 'bind' => $valueToMatch, - ); + $cache = new TableLogAction\Cache(); + return $cache->getIdActionFromSegment($valueToMatch, $sql); } /** diff --git a/core/Tracker/TableLogAction/Cache.php b/core/Tracker/TableLogAction/Cache.php new file mode 100644 index 0000000000..6d803deb85 --- /dev/null +++ b/core/Tracker/TableLogAction/Cache.php @@ -0,0 +1,172 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + * + */ + +namespace Piwik\Tracker\TableLogAction; + +use Piwik\Common; +use Piwik\Config; +use Piwik\Container\StaticContainer; +use Psr\Log\LoggerInterface; + +class Cache +{ + /** + * @var bool + */ + public $isEnabled; + + /** + * @var int cache lifetime in seconds + */ + protected $lifetime; + + /** + * @var LoggerInterface + */ + private $logger; + + /** + * for tests + * + * @var int + */ + static public $hits = 0; + + public function __construct() + { + $this->isEnabled = (bool)Config::getInstance()->General['enable_segments_subquery_cache']; + $this->limitActionIds = Config::getInstance()->General['segments_subquery_cache_limit']; + $this->lifetime = Config::getInstance()->General['segments_subquery_cache_ttl']; + $this->logger = StaticContainer::get('Psr\Log\LoggerInterface'); + } + + /** + * @param $valueToMatch + * @param $sql + * @return array|null + * @throws \Exception + */ + public function getIdActionFromSegment($valueToMatch, $sql) + { + if (!$this->isEnabled) { + return array( + // mark that the returned value is an sql-expression instead of a literal value + 'SQL' => $sql, + 'bind' => $valueToMatch, + ); + } + + $ids = self::getIdsFromCache($valueToMatch, $sql); + + if(is_null($ids)) { + // Too Big To Cache, issue SQL as subquery instead + return array( + 'SQL' => $sql, + 'bind' => $valueToMatch, + ); + } + + if(count($ids) == 0) { + return null; + } + + + $sql = Common::getSqlStringFieldsArray($ids); + $bind = $ids; + + return array( + // mark that the returned value is an sql-expression instead of a literal value + 'SQL' => $sql, + 'bind' => $bind, + ); + } + + + /** + * @param $valueToMatch + * @param $sql + * @return array of IDs, or null if the returnset is too big to cache + */ + private function getIdsFromCache($valueToMatch, $sql) + { + $cache = $this->buildCache(); + $cacheKey = $this->getCacheKey($valueToMatch, $sql); + + if ($cache->contains($cacheKey) === true) { + self::$hits++; + $this->logger->debug("Segment subquery cache HIT (for '$valueToMatch' and SQL '$sql)"); + return $cache->fetch($cacheKey); + } + + $ids = $this->fetchActionIdsFromDb($valueToMatch, $sql); + + if($this->isTooBigToCache($ids)) { + $this->logger->debug("Segment subquery cache SKIPPED SAVE (too many IDs returned by subquery: %s ids)'", array(count($ids))); + $cache->save($cacheKey, $ids = null, $this->lifetime); + return null; + } + + $cache->save($cacheKey, $ids, $this->lifetime); + $this->logger->debug("Segment subquery cache SAVE (for '$valueToMatch' and SQL '$sql')'"); + + return $ids; + } + + /** + * @param $valueToMatch + * @param $sql + * @return string + * @throws + */ + private function getCacheKey($valueToMatch, $sql) + { + if(is_array($valueToMatch)) { + throw new \Exception("value to match is an array: this is not expected"); + } + + $uniqueKey = md5($sql . $valueToMatch); + $cacheKey = 'TableLogAction.getIdActionFromSegment.' . $uniqueKey; + return $cacheKey; + } + + /** + * @param $valueToMatch + * @param $sql + * @return array|null + * @throws \Exception + */ + private function fetchActionIdsFromDb($valueToMatch, $sql) + { + $idActions = \Piwik\Db::fetchAll($sql, $valueToMatch); + + $ids = array(); + foreach ($idActions as $idAction) { + $ids[] = $idAction['idaction']; + } + + return $ids; + } + + /** + * @return \Piwik\Cache\Lazy + */ + private function buildCache() + { + return \Piwik\Cache::getLazyCache(); + } + + /** + * @param $ids + * @return bool + */ + private function isTooBigToCache($ids) + { + return count($ids) > $this->limitActionIds; + } +}
\ No newline at end of file diff --git a/tests/PHPUnit/Integration/SegmentTest.php b/tests/PHPUnit/Integration/SegmentTest.php index 3245f32bfd..8dab54e193 100644 --- a/tests/PHPUnit/Integration/SegmentTest.php +++ b/tests/PHPUnit/Integration/SegmentTest.php @@ -9,7 +9,9 @@ namespace Piwik\Tests\Integration; use Exception; +use Piwik\Cache; use Piwik\Common; +use Piwik\Config; use Piwik\Db; use Piwik\Segment; use Piwik\Tests\Framework\Mock\FakeAccess; @@ -19,7 +21,7 @@ use Piwik\Tracker\TableLogAction; /** * @group Core - * @group SegmentTest + * @group Segment */ class SegmentTest extends IntegrationTestCase { @@ -34,6 +36,9 @@ class SegmentTest extends IntegrationTestCase public function tearDown() { parent::tearDown(); + + Cache::getLazyCache()->flushAll(); + TableLogAction\Cache::$hits = 0; } static public function removeExtraWhiteSpaces($valueToFilter) @@ -636,10 +641,12 @@ class SegmentTest extends IntegrationTestCase $this->assertEquals($this->removeExtraWhiteSpaces($expected), $this->removeExtraWhiteSpaces($query)); } - public function test_getSelectQuery_whenPageUrlDoesNotExist_asBothStatements_OR_AND() + public function test_getSelectQuery_whenPageUrlDoesNotExist_asBothStatements_OR_AND_withoutCache() { - $pageUrlFoundInDb = 'example.com/found-in-db'; - $actionIdFoundInDb = $this->insertPageUrlAsAction($pageUrlFoundInDb); + $this->disableSubqueryCache(); + $this->assertCacheWasHit($hit = 0); + + list($pageUrlFoundInDb, $actionIdFoundInDb) = $this->insertActions(); $select = 'log_visit.*'; $from = 'log_visit'; @@ -650,9 +657,12 @@ class SegmentTest extends IntegrationTestCase * pageUrl==xyz -- Matches none * pageUrl!=abcdefg -- Matches all * pageUrl=@does-not-exist -- Matches none + * pageUrl=@found-in-db -- Matches all * pageUrl=='.urlencode($pageUrlFoundInDb) -- Matches one + * pageUrl!@not-found -- matches all + * pageUrl!@found -- Matches none */ - $segment = 'visitServerHour==12,pageUrl==xyz;pageUrl!=abcdefg,pageUrl=@does-not-exist,pageUrl=='.urlencode($pageUrlFoundInDb); + $segment = 'visitServerHour==12,pageUrl==xyz;pageUrl!=abcdefg,pageUrl=@does-not-exist,pageUrl=@found-in-db,pageUrl=='.urlencode($pageUrlFoundInDb).',pageUrl!@not-found,pageUrl!@found'; $segment = new Segment($segment, $idSites = array()); $query = $segment->getSelectQuery($select, $from, $where, $bind); @@ -669,22 +679,184 @@ class SegmentTest extends IntegrationTestCase " . Common::prefixTable('log_visit') . " AS log_visit LEFT JOIN " . Common::prefixTable('log_link_visit_action') . " AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit WHERE (HOUR(log_visit.visit_last_action_time) = ? - OR (1 = 0)) - AND ((1 = 1) - OR ( log_link_visit_action.idaction_url IN (SELECT idaction FROM log_action WHERE ( name LIKE CONCAT('%', ?, '%') AND type = 1 )) ) - OR log_link_visit_action.idaction_url = ? ) + OR (1 = 0)) " . // pageUrl==xyz + "AND ((1 = 1) " . // pageUrl!=abcdefg + " OR ( log_link_visit_action.idaction_url IN (SELECT idaction FROM log_action WHERE ( name LIKE CONCAT('%', ?, '%') AND type = 1 )) ) " . // pageUrl=@does-not-exist + " OR ( log_link_visit_action.idaction_url IN (SELECT idaction FROM log_action WHERE ( name LIKE CONCAT('%', ?, '%') AND type = 1 )) )" . // pageUrl=@found-in-db + " OR log_link_visit_action.idaction_url = ?" . // pageUrl=='.urlencode($pageUrlFoundInDb) + " OR ( log_link_visit_action.idaction_url IN (SELECT idaction FROM log_action WHERE ( name NOT LIKE CONCAT('%', ?, '%') AND type = 1 )) )" . // pageUrl!@not-found + " OR ( log_link_visit_action.idaction_url IN (SELECT idaction FROM log_action WHERE ( name NOT LIKE CONCAT('%', ?, '%') AND type = 1 )) )" . // pageUrl!@found + " ) GROUP BY log_visit.idvisit ORDER BY NULL ) AS log_inner", "bind" => array( 12, "does-not-exist", - $actionIdFoundInDb + "found-in-db", + $actionIdFoundInDb, + "not-found", + "found", + )); + + $cache = new TableLogAction\Cache(); + $this->assertTrue( empty($cache->isEnabled) ); + $this->assertCacheWasHit($hit = 0); + $this->assertEquals($this->removeExtraWhiteSpaces($expected), $this->removeExtraWhiteSpaces($query)); + } + + public function test_getSelectQuery_whenPageUrlDoesNotExist_asBothStatements_OR_AND_withCacheSave() + { + $this->enableSubqueryCache(); + + list($pageUrlFoundInDb, $actionIdFoundInDb) = $this->insertActions(); + $select = 'log_visit.*'; + $from = 'log_visit'; + $where = false; + $bind = array(); + + /** + * pageUrl==xyz -- Matches none + * pageUrl!=abcdefg -- Matches all + * pageUrl=@does-not-exist -- Matches none + * pageUrl=@found-in-db -- Matches all + * pageUrl=='.urlencode($pageUrlFoundInDb) -- Matches one + * pageUrl!@not-found -- matches all + * pageUrl!@found -- Matches none + */ + $segment = 'visitServerHour==12,pageUrl==xyz;pageUrl!=abcdefg,pageUrl=@does-not-exist,pageUrl=@found-in-db,pageUrl=='.urlencode($pageUrlFoundInDb).',pageUrl!@not-found,pageUrl!@found'; + $segment = new Segment($segment, $idSites = array()); + + $query = $segment->getSelectQuery($select, $from, $where, $bind); + + $expected = array( + "sql" => " + SELECT + log_inner.* + FROM + ( + SELECT + log_visit.* + FROM + " . Common::prefixTable('log_visit') . " AS log_visit + LEFT JOIN " . Common::prefixTable('log_link_visit_action') . " AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit + WHERE (HOUR(log_visit.visit_last_action_time) = ? + OR (1 = 0))" . // pageUrl==xyz + " + AND ((1 = 1) " . // pageUrl!=abcdefg + " + OR (1 = 0) " . // pageUrl=@does-not-exist + " + OR ( log_link_visit_action.idaction_url IN (?,?,?) )" . // pageUrl=@found-in-db + " + OR log_link_visit_action.idaction_url = ?" . // pageUrl=='.urlencode($pageUrlFoundInDb) + " + OR ( log_link_visit_action.idaction_url IN (?,?,?) )" . // pageUrl!@not-found + " + OR (1 = 0) " . // pageUrl!@found + ") + GROUP BY log_visit.idvisit + ORDER BY NULL + ) AS log_inner", + "bind" => array( + 12, + 1, // pageUrl=@found-in-db + 2, // pageUrl=@found-in-db + 3, // pageUrl=@found-in-db + $actionIdFoundInDb, // pageUrl=='.urlencode($pageUrlFoundInDb) + 1, // pageUrl!@not-found + 2, // pageUrl!@not-found + 3, // pageUrl!@not-found )); + $cache = new TableLogAction\Cache(); + $this->assertTrue( !empty($cache->isEnabled) ); + $this->assertEquals($this->removeExtraWhiteSpaces($expected), $this->removeExtraWhiteSpaces($query)); } + public function test_getSelectQuery_whenPageUrlDoesNotExist_asBothStatements_OR_AND_withCacheisHit() + { + $this->enableSubqueryCache(); + $this->assertCacheWasHit($hits = 0); + + $this->test_getSelectQuery_whenPageUrlDoesNotExist_asBothStatements_OR_AND_withCacheSave(); + $this->assertCacheWasHit($hits = 0); + + $this->test_getSelectQuery_whenPageUrlDoesNotExist_asBothStatements_OR_AND_withCacheSave(); + $this->assertCacheWasHit($hits = 4); + + $this->test_getSelectQuery_whenPageUrlDoesNotExist_asBothStatements_OR_AND_withCacheSave(); + $this->assertCacheWasHit($hits = 4 + 4); + + } + + + public function test_getSelectQuery_withTwoSegments_subqueryNotCached_whenResultsetTooLarge() + { + $this->enableSubqueryCache(); + + // do not cache when more than 3 idactions returned by subquery + Config::getInstance()->General['segments_subquery_cache_limit'] = 2; + + list($pageUrlFoundInDb, $actionIdFoundInDb) = $this->insertActions(); + $select = 'log_visit.*'; + $from = 'log_visit'; + $where = false; + $bind = array(); + + /** + * pageUrl=@found-in-db-bis -- Will be cached + * pageUrl!@not-found -- Too big to cache + */ + $segment = 'pageUrl=@found-in-db-bis;pageUrl!@not-found'; + $segment = new Segment($segment, $idSites = array()); + + $query = $segment->getSelectQuery($select, $from, $where, $bind); + + $expected = array( + "sql" => " + SELECT + log_inner.* + FROM + ( + SELECT + log_visit.* + FROM + " . Common::prefixTable('log_visit') . " AS log_visit + LEFT JOIN " . Common::prefixTable('log_link_visit_action') . " AS log_link_visit_action ON log_link_visit_action.idvisit = log_visit.idvisit + WHERE + ( log_link_visit_action.idaction_url IN (?) )" . // pageUrl=@found-in-db-bis + " + AND ( log_link_visit_action.idaction_url IN (SELECT idaction FROM log_action WHERE ( name NOT LIKE CONCAT('%', ?, '%') AND type = 1 )) ) " . // pageUrl!@not-found + "GROUP BY log_visit.idvisit + ORDER BY NULL + ) AS log_inner", + "bind" => array( + 2, // pageUrl=@found-in-db-bis + "not-found", // pageUrl!@not-found + )); + + $cache = new TableLogAction\Cache(); + $this->assertTrue( !empty($cache->isEnabled) ); + + $this->assertEquals($this->removeExtraWhiteSpaces($expected), $this->removeExtraWhiteSpaces($query)); + } + + + public function test_getSelectQuery_withTwoSegments_partiallyCached() + { + $this->assertCacheWasHit($hits = 0); + + // this will create the caches for both segments + $this->test_getSelectQuery_withTwoSegments_subqueryNotCached_whenResultsetTooLarge(); + $this->assertCacheWasHit($hits = 0); + + // this will hit caches for both segments + $this->test_getSelectQuery_withTwoSegments_subqueryNotCached_whenResultsetTooLarge(); + $this->assertCacheWasHit($hits = 2); + } + public function provideContainerConfig() { return array( @@ -707,4 +879,34 @@ class SegmentTest extends IntegrationTestCase $this->assertNotEmpty($actionIdFoundInDb, "Action $pageUrlFoundInDb was not found in the " . Common::prefixTable('log_action') . " table."); return $actionIdFoundInDb; } + + /** + * @return array + */ + private function insertActions() + { + $pageUrlFoundInDb = 'example.com/found-in-db'; + $actionIdFoundInDb = $this->insertPageUrlAsAction($pageUrlFoundInDb); + + // Adding some other actions to make test case more realistic + $this->insertPageUrlAsAction('example.net/found-in-db-bis'); + $this->insertPageUrlAsAction('example.net/found-in-db-ter'); + + return array($pageUrlFoundInDb, $actionIdFoundInDb); + } + + private function assertCacheWasHit($expectedHits) + { + $this->assertTrue(TableLogAction\Cache::$hits == $expectedHits, "expected cache was hit $expectedHits time(s), but got " . TableLogAction\Cache::$hits . " cache hits instead."); + } + + private function disableSubqueryCache() + { + Config::getInstance()->General['enable_segments_subquery_cache'] = 0; + } + + private function enableSubqueryCache() + { + Config::getInstance()->General['enable_segments_subquery_cache'] = 1; + } } diff --git a/tests/PHPUnit/Unit/Segment/SegmentExpressionTest.php b/tests/PHPUnit/Unit/Segment/SegmentExpressionTest.php index ede256707b..6d9dcc67b1 100644 --- a/tests/PHPUnit/Unit/Segment/SegmentExpressionTest.php +++ b/tests/PHPUnit/Unit/Segment/SegmentExpressionTest.php @@ -10,6 +10,10 @@ namespace Piwik\Tests\Unit; use Piwik\Segment\SegmentExpression; +/** + * @group SegmentExpressionTest + * @group Segment + */ class SegmentExpressionTest extends \PHPUnit_Framework_TestCase { /** |