diff options
author | Matthieu Aubry <matt@piwik.org> | 2014-12-06 04:10:51 +0300 |
---|---|---|
committer | Matthieu Aubry <matt@piwik.org> | 2014-12-06 04:10:51 +0300 |
commit | 0bb5558f8a97e1bace18e87741b37837d5954d47 (patch) | |
tree | f1a2c5f4ea5ece208456d0d7878b03fe01019ec3 /core | |
parent | 5c51746f627f2cffa5bb15a339f8083a34b5ba69 (diff) | |
parent | 8f079b21b1f635a6bb9bbbdfd8396001fc2bdfa3 (diff) |
Merge pull request #6817 from piwik/6786_refactor2.10.0-b4
Live API refactoring and hopefully Fixing the performance
Diffstat (limited to 'core')
-rw-r--r-- | core/DataAccess/LogQueryBuilder.php | 284 | ||||
-rw-r--r-- | core/RankingQuery.php | 4 | ||||
-rw-r--r-- | core/Segment.php | 258 | ||||
-rw-r--r-- | core/Segment/SegmentExpression.php (renamed from core/SegmentExpression.php) | 9 | ||||
-rw-r--r-- | core/Tracker/TableLogAction.php | 2 |
5 files changed, 319 insertions, 238 deletions
diff --git a/core/DataAccess/LogQueryBuilder.php b/core/DataAccess/LogQueryBuilder.php new file mode 100644 index 0000000000..a5b2fcf986 --- /dev/null +++ b/core/DataAccess/LogQueryBuilder.php @@ -0,0 +1,284 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + * + */ + +namespace Piwik\DataAccess; + + +use Exception; +use Piwik\Common; +use Piwik\Segment\SegmentExpression; + +class LogQueryBuilder +{ + public function __construct(SegmentExpression $segmentExpression) + { + $this->segmentExpression = $segmentExpression; + } + + public function getSelectQueryString($select, $from, $where, $bind, $groupBy, $orderBy, $limit) + { + if (!is_array($from)) { + $from = array($from); + } + + if(!$this->segmentExpression->isEmpty()) { + $this->segmentExpression->parseSubExpressionsIntoSqlExpressions($from); + $segmentSql = $this->segmentExpression->getSql(); + $where = $this->getWhereMatchBoth($where, $segmentSql['where']); + $bind = array_merge($bind, $segmentSql['bind']); + } + + $joins = $this->generateJoinsString($from); + $joinWithSubSelect = $joins['joinWithSubSelect']; + $from = $joins['sql']; + + if ($joinWithSubSelect) { + $sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit); + } else { + $sql = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit); + } + return array( + 'sql' => $sql, + 'bind' => $bind + ); + } + + + /** + * Generate the join sql based on the needed tables + * @param array $tables tables to join + * @throws Exception if tables can't be joined + * @return array + */ + private function generateJoinsString($tables) + { + $knownTables = array("log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item"); + $visitsAvailable = $actionsAvailable = $conversionsAvailable = $conversionItemAvailable = false; + $joinWithSubSelect = false; + $sql = ''; + + // make sure the tables are joined in the right order + // base table first, then action before conversion + // this way, conversions can be joined on idlink_va + $actionIndex = array_search("log_link_visit_action", $tables); + $conversionIndex = array_search("log_conversion", $tables); + if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) { + $tables[$actionIndex] = "log_conversion"; + $tables[$conversionIndex] = "log_link_visit_action"; + } + + // same as above: action before visit + $actionIndex = array_search("log_link_visit_action", $tables); + $visitIndex = array_search("log_visit", $tables); + if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) { + $tables[$actionIndex] = "log_visit"; + $tables[$visitIndex] = "log_link_visit_action"; + } + + foreach ($tables as $i => $table) { + if (is_array($table)) { + // join condition provided + $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table']; + $sql .= " + LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias + . " ON " . $table['joinOn']; + continue; + } + + if (!in_array($table, $knownTables)) { + throw new Exception("Table '$table' can't be used for segmentation"); + } + + $tableSql = Common::prefixTable($table) . " AS $table"; + + if ($i == 0) { + // first table + $sql .= $tableSql; + } else { + if ($actionsAvailable && $table == "log_conversion") { + // have actions, need conversions => join on idlink_va + $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va " + . "AND log_conversion.idsite = log_link_visit_action.idsite"; + } else if ($actionsAvailable && $table == "log_visit") { + // have actions, need visits => join on idvisit + $join = "log_visit.idvisit = log_link_visit_action.idvisit"; + } else if ($visitsAvailable && $table == "log_link_visit_action") { + // have visits, need actions => we have to use a more complex join + // we don't hande this here, we just return joinWithSubSelect=true in this case + $joinWithSubSelect = true; + $join = "log_link_visit_action.idvisit = log_visit.idvisit"; + } else if ($conversionsAvailable && $table == "log_link_visit_action") { + // have conversions, need actions => join on idlink_va + $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va"; + } else if (($visitsAvailable && $table == "log_conversion") + || ($conversionsAvailable && $table == "log_visit") + ) { + // have visits, need conversion (or vice versa) => join on idvisit + // notice that joining conversions on visits has lower priority than joining it on actions + $join = "log_conversion.idvisit = log_visit.idvisit"; + + // if conversions are joined on visits, we need a complex join + if ($table == "log_conversion") { + $joinWithSubSelect = true; + } + } elseif ($conversionItemAvailable && $table === 'log_visit') { + $join = "log_conversion_item.idvisit = log_visit.idvisit"; + } elseif ($conversionItemAvailable && $table === 'log_link_visit_action') { + $join = "log_conversion_item.idvisit = log_link_visit_action.idvisit"; + } elseif ($conversionItemAvailable && $table === 'log_conversion') { + $join = "log_conversion_item.idvisit = log_conversion.idvisit"; + } else { + throw new Exception("Table '$table' can't be joined for segmentation"); + } + + // the join sql the default way + $sql .= " + LEFT JOIN $tableSql ON $join"; + } + + // remember which tables are available + $visitsAvailable = ($visitsAvailable || $table == "log_visit"); + $actionsAvailable = ($actionsAvailable || $table == "log_link_visit_action"); + $conversionsAvailable = ($conversionsAvailable || $table == "log_conversion"); + $conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item"); + } + + $return = array( + 'sql' => $sql, + 'joinWithSubSelect' => $joinWithSubSelect + ); + return $return; + + } + + + /** + * Build a select query where actions have to be joined on visits (or conversions) + * In this case, the query gets wrapped in another query so that grouping by visit is possible + * @param string $select + * @param string $from + * @param string $where + * @param string $groupBy + * @param string $orderBy + * @param string $limit + * @throws Exception + * @return string + */ + private function buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit) + { + $matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)"; + preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches); + $neededFields = array_unique($matches[0]); + + if (count($neededFields) == 0) { + throw new Exception("No needed fields found in select expression. " + . "Please use a table prefix."); + } + + $innerSelect = implode(", \n", $neededFields); + $innerFrom = $from; + $innerWhere = $where; + + $innerLimit = $limit; + $innerGroupBy = "log_visit.idvisit"; + $innerOrderBy = "NULL"; + if($innerLimit && $orderBy) { + // only When LIMITing we can apply to the inner query the same ORDER BY as the parent query + $innerOrderBy = $orderBy; + } + if($innerLimit) { + // When LIMITing, no need to GROUP BY (GROUPing by is done before the LIMIT which is super slow when large amount of rows is matched) + $innerGroupBy = false; + } + + $innerQuery = $this->buildSelectQuery($innerSelect, $innerFrom, $innerWhere, $innerGroupBy, $innerOrderBy, $innerLimit); + + $select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select); + $from = " + ( + $innerQuery + ) AS log_inner"; + $where = false; + $orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy); + $groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy); + $query = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit); + return $query; + } + + + /** + * Build select query the normal way + * + * @param string $select fieldlist to be selected + * @param string $from tablelist to select from + * @param string $where where clause + * @param string $groupBy group by clause + * @param string $orderBy order by clause + * @param string $limit limit by clause + * @return string + */ + private function buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit) + { + $sql = " + SELECT + $select + FROM + $from"; + + if ($where) { + $sql .= " + WHERE + $where"; + } + + if ($groupBy) { + $sql .= " + GROUP BY + $groupBy"; + } + + if ($orderBy) { + $sql .= " + ORDER BY + $orderBy"; + } + + $limit = (int)$limit; + if ($limit >= 1) { + $sql .= " + LIMIT + $limit"; + } + + return $sql; + } + + /** + * @param $where + * @param $segmentWhere + * @return string + * @throws + */ + protected function getWhereMatchBoth($where, $segmentWhere) + { + if (empty($segmentWhere) && empty($where)) { + throw new \Exception("Segment where clause should be non empty."); + } + if (empty($segmentWhere)) { + return $where; + } + if (empty($where)) { + return $segmentWhere; + } + return "( $where ) + AND + ($segmentWhere)"; + } + +}
\ No newline at end of file diff --git a/core/RankingQuery.php b/core/RankingQuery.php index f44845f075..cd4f830669 100644 --- a/core/RankingQuery.php +++ b/core/RankingQuery.php @@ -214,7 +214,7 @@ class RankingQuery */ public function execute($innerQuery, $bind = array()) { - $query = $this->generateQuery($innerQuery); + $query = $this->generateRankingQuery($innerQuery); $data = Db::fetchAll($query, $bind); if ($this->columnToMarkExcludedRows !== false) { @@ -268,7 +268,7 @@ class RankingQuery * itself. * @return string The entire ranking query SQL. */ - public function generateQuery($innerQuery) + public function generateRankingQuery($innerQuery) { // +1 to include "Others" $limit = $this->limit + 1; diff --git a/core/Segment.php b/core/Segment.php index 420946794d..7fe4c061fd 100644 --- a/core/Segment.php +++ b/core/Segment.php @@ -9,7 +9,9 @@ namespace Piwik; use Exception; +use Piwik\DataAccess\LogQueryBuilder; use Piwik\Plugins\API\API; +use Piwik\Segment\SegmentExpression; /** * Limits the set of visits Piwik uses when aggregating analytics data. @@ -57,7 +59,17 @@ class Segment /** * @var SegmentExpression */ - protected $segment = null; + protected $segmentExpression = null; + + /** + * @var string + */ + protected $string = null; + + /** + * @var array + */ + protected $idSites = null; /** * Truncate the Segments to 8k @@ -68,9 +80,8 @@ class Segment * Constructor. * * @param string $segmentCondition The segment condition, eg, `'browserCode=ff;countryCode=CA'`. - * @param array $idSites The list of sites the segment will be used with. Some segments are - * dependent on the site, such as goal segments. - * @throws Exception + * @param array $idSites The list of sites the st + * @throws */ public function __construct($segmentCondition, $idSites) { @@ -103,7 +114,7 @@ class Segment $this->string = $string; $this->idSites = $idSites; $segment = new SegmentExpression($string); - $this->segment = $segment; + $this->segmentExpression = $segment; // parse segments $expressions = $segment->parseSubExpressions(); @@ -127,7 +138,7 @@ class Segment */ public function isEmpty() { - return empty($this->string); + return $this->segmentExpression->isEmpty(); } protected $availableSegments = array(); @@ -222,236 +233,16 @@ class Segment * @param array|string $bind (optional) Bind parameters, eg, `array($col1Value, $col2Value)`. * @param false|string $orderBy (optional) Order by clause, eg, `"t1.col1 ASC"`. * @param false|string $groupBy (optional) Group by clause, eg, `"t2.col2"`. + * @param int $limit Limit by clause + * @param int If set to value >= 1 then the Select query (and All inner queries) will be LIMIT'ed by this value. + * Use only when you're not aggregating or it will sample the data. * @return string The entire select query. */ - public function getSelectQuery($select, $from, $where = false, $bind = array(), $orderBy = false, $groupBy = false) - { - if (!is_array($from)) { - $from = array($from); - } - - if (!$this->isEmpty()) { - $this->segment->parseSubExpressionsIntoSqlExpressions($from); - - $joins = $this->generateJoins($from); - $from = $joins['sql']; - $joinWithSubSelect = $joins['joinWithSubSelect']; - - $segmentSql = $this->segment->getSql(); - $segmentWhere = $segmentSql['where']; - if (!empty($segmentWhere)) { - if (!empty($where)) { - $where = "( $where ) - AND - ($segmentWhere)"; - } else { - $where = $segmentWhere; - } - } - - $bind = array_merge($bind, $segmentSql['bind']); - } else { - $joins = $this->generateJoins($from); - $from = $joins['sql']; - $joinWithSubSelect = $joins['joinWithSubSelect']; - } - - if ($joinWithSubSelect) { - $sql = $this->buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy); - } else { - $sql = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy); - } - return array( - 'sql' => $sql, - 'bind' => $bind - ); - } - - /** - * Generate the join sql based on the needed tables - * @param array $tables tables to join - * @throws Exception if tables can't be joined - * @return array - */ - private function generateJoins($tables) - { - $knownTables = array("log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item"); - $visitsAvailable = $actionsAvailable = $conversionsAvailable = $conversionItemAvailable = false; - $joinWithSubSelect = false; - $sql = ''; - - // make sure the tables are joined in the right order - // base table first, then action before conversion - // this way, conversions can be joined on idlink_va - $actionIndex = array_search("log_link_visit_action", $tables); - $conversionIndex = array_search("log_conversion", $tables); - if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) { - $tables[$actionIndex] = "log_conversion"; - $tables[$conversionIndex] = "log_link_visit_action"; - } - - // same as above: action before visit - $actionIndex = array_search("log_link_visit_action", $tables); - $visitIndex = array_search("log_visit", $tables); - if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) { - $tables[$actionIndex] = "log_visit"; - $tables[$visitIndex] = "log_link_visit_action"; - } - - foreach ($tables as $i => $table) { - if (is_array($table)) { - // join condition provided - $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table']; - $sql .= " - LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias - . " ON " . $table['joinOn']; - continue; - } - - if (!in_array($table, $knownTables)) { - throw new Exception("Table '$table' can't be used for segmentation"); - } - - $tableSql = Common::prefixTable($table) . " AS $table"; - - if ($i == 0) { - // first table - $sql .= $tableSql; - } else { - if ($actionsAvailable && $table == "log_conversion") { - // have actions, need conversions => join on idlink_va - $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va " - . "AND log_conversion.idsite = log_link_visit_action.idsite"; - } else if ($actionsAvailable && $table == "log_visit") { - // have actions, need visits => join on idvisit - $join = "log_visit.idvisit = log_link_visit_action.idvisit"; - } else if ($visitsAvailable && $table == "log_link_visit_action") { - // have visits, need actions => we have to use a more complex join - // we don't hande this here, we just return joinWithSubSelect=true in this case - $joinWithSubSelect = true; - $join = "log_link_visit_action.idvisit = log_visit.idvisit"; - } else if ($conversionsAvailable && $table == "log_link_visit_action") { - // have conversions, need actions => join on idlink_va - $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va"; - } else if (($visitsAvailable && $table == "log_conversion") - || ($conversionsAvailable && $table == "log_visit") - ) { - // have visits, need conversion (or vice versa) => join on idvisit - // notice that joining conversions on visits has lower priority than joining it on actions - $join = "log_conversion.idvisit = log_visit.idvisit"; - - // if conversions are joined on visits, we need a complex join - if ($table == "log_conversion") { - $joinWithSubSelect = true; - } - } elseif ($conversionItemAvailable && $table === 'log_visit') { - $join = "log_conversion_item.idvisit = log_visit.idvisit"; - } elseif ($conversionItemAvailable && $table === 'log_link_visit_action') { - $join = "log_conversion_item.idvisit = log_link_visit_action.idvisit"; - } elseif ($conversionItemAvailable && $table === 'log_conversion') { - $join = "log_conversion_item.idvisit = log_conversion.idvisit"; - } else { - throw new Exception("Table '$table' can't be joined for segmentation"); - } - - // the join sql the default way - $sql .= " - LEFT JOIN $tableSql ON $join"; - } - - // remember which tables are available - $visitsAvailable = ($visitsAvailable || $table == "log_visit"); - $actionsAvailable = ($actionsAvailable || $table == "log_link_visit_action"); - $conversionsAvailable = ($conversionsAvailable || $table == "log_conversion"); - $conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item"); - } - - $return = array( - 'sql' => $sql, - 'joinWithSubSelect' => $joinWithSubSelect - ); - return $return; - - } - - /** - * Build select query the normal way - * @param string $select fieldlist to be selected - * @param string $from tablelist to select from - * @param string $where where clause - * @param string $orderBy order by clause - * @param string $groupBy group by clause - * @return string - */ - private function buildSelectQuery($select, $from, $where, $orderBy, $groupBy) - { - $sql = " - SELECT - $select - FROM - $from"; - - if ($where) { - $sql .= " - WHERE - $where"; - } - - if ($groupBy) { - $sql .= " - GROUP BY - $groupBy"; - } - - if ($orderBy) { - $sql .= " - ORDER BY - $orderBy"; - } - - return $sql; - } - - /** - * Build a select query where actions have to be joined on visits (or conversions) - * In this case, the query gets wrapped in another query so that grouping by visit is possible - * @param string $select - * @param string $from - * @param string $where - * @param string $orderBy - * @param string $groupBy - * @throws Exception - * @return string - */ - private function buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy) + public function getSelectQuery($select, $from, $where = false, $bind = array(), $orderBy = false, $groupBy = false, $limit = 0) { - $matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)"; - preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches); - $neededFields = array_unique($matches[0]); - - if (count($neededFields) == 0) { - throw new Exception("No needed fields found in select expression. " - . "Please use a table prefix."); - } - - $select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select); - $orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy); - $groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy); - - $from = "( - SELECT - " . implode(", - ", $neededFields) . " - FROM - $from - WHERE - $where - GROUP BY log_visit.idvisit - ) AS log_inner"; - - $where = false; - $query = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy); - return $query; + $segmentExpression = $this->segmentExpression; + $segmentQuery = new LogQueryBuilder($segmentExpression); + return $segmentQuery->getSelectQueryString($select, $from, $where, $bind, $groupBy, $orderBy, $limit); } /** @@ -463,4 +254,5 @@ class Segment { return (string) $this->getString(); } + }
\ No newline at end of file diff --git a/core/SegmentExpression.php b/core/Segment/SegmentExpression.php index cbd2a3eb9e..10cf0294e3 100644 --- a/core/SegmentExpression.php +++ b/core/Segment/SegmentExpression.php @@ -7,7 +7,7 @@ * */ -namespace Piwik; +namespace Piwik\Segment; use Exception; @@ -46,6 +46,11 @@ class SegmentExpression $this->tree = $this->parseTree(); } + public function isEmpty() + { + return count($this->tree) == 0; + } + protected $joins = array(); protected $valuesBind = array(); protected $parsedTree = array(); @@ -337,7 +342,7 @@ class SegmentExpression */ public function getSql() { - if (count($this->tree) == 0) { + if ($this->isEmpty()) { throw new Exception("Invalid segment, please specify a valid segment."); } $sql = ''; diff --git a/core/Tracker/TableLogAction.php b/core/Tracker/TableLogAction.php index 709936f2a5..fe620035d7 100644 --- a/core/Tracker/TableLogAction.php +++ b/core/Tracker/TableLogAction.php @@ -10,7 +10,7 @@ namespace Piwik\Tracker; use Piwik\Common; -use Piwik\SegmentExpression; +use Piwik\Segment\SegmentExpression; use Piwik\Tracker; /** |