Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorMatthieu Aubry <matt@piwik.org>2014-12-06 04:10:51 +0300
committerMatthieu Aubry <matt@piwik.org>2014-12-06 04:10:51 +0300
commit0bb5558f8a97e1bace18e87741b37837d5954d47 (patch)
treef1a2c5f4ea5ece208456d0d7878b03fe01019ec3 /core
parent5c51746f627f2cffa5bb15a339f8083a34b5ba69 (diff)
parent8f079b21b1f635a6bb9bbbdfd8396001fc2bdfa3 (diff)
Merge pull request #6817 from piwik/6786_refactor2.10.0-b4
Live API refactoring and hopefully Fixing the performance
Diffstat (limited to 'core')
-rw-r--r--core/DataAccess/LogQueryBuilder.php284
-rw-r--r--core/RankingQuery.php4
-rw-r--r--core/Segment.php258
-rw-r--r--core/Segment/SegmentExpression.php (renamed from core/SegmentExpression.php)9
-rw-r--r--core/Tracker/TableLogAction.php2
5 files changed, 319 insertions, 238 deletions
diff --git a/core/DataAccess/LogQueryBuilder.php b/core/DataAccess/LogQueryBuilder.php
new file mode 100644
index 0000000000..a5b2fcf986
--- /dev/null
+++ b/core/DataAccess/LogQueryBuilder.php
@@ -0,0 +1,284 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+
+namespace Piwik\DataAccess;
+
+
+use Exception;
+use Piwik\Common;
+use Piwik\Segment\SegmentExpression;
+
+class LogQueryBuilder
+{
+ public function __construct(SegmentExpression $segmentExpression)
+ {
+ $this->segmentExpression = $segmentExpression;
+ }
+
+ public function getSelectQueryString($select, $from, $where, $bind, $groupBy, $orderBy, $limit)
+ {
+ if (!is_array($from)) {
+ $from = array($from);
+ }
+
+ if(!$this->segmentExpression->isEmpty()) {
+ $this->segmentExpression->parseSubExpressionsIntoSqlExpressions($from);
+ $segmentSql = $this->segmentExpression->getSql();
+ $where = $this->getWhereMatchBoth($where, $segmentSql['where']);
+ $bind = array_merge($bind, $segmentSql['bind']);
+ }
+
+ $joins = $this->generateJoinsString($from);
+ $joinWithSubSelect = $joins['joinWithSubSelect'];
+ $from = $joins['sql'];
+
+ if ($joinWithSubSelect) {
+ $sql = $this->buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit);
+ } else {
+ $sql = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit);
+ }
+ return array(
+ 'sql' => $sql,
+ 'bind' => $bind
+ );
+ }
+
+
+ /**
+ * Generate the join sql based on the needed tables
+ * @param array $tables tables to join
+ * @throws Exception if tables can't be joined
+ * @return array
+ */
+ private function generateJoinsString($tables)
+ {
+ $knownTables = array("log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item");
+ $visitsAvailable = $actionsAvailable = $conversionsAvailable = $conversionItemAvailable = false;
+ $joinWithSubSelect = false;
+ $sql = '';
+
+ // make sure the tables are joined in the right order
+ // base table first, then action before conversion
+ // this way, conversions can be joined on idlink_va
+ $actionIndex = array_search("log_link_visit_action", $tables);
+ $conversionIndex = array_search("log_conversion", $tables);
+ if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) {
+ $tables[$actionIndex] = "log_conversion";
+ $tables[$conversionIndex] = "log_link_visit_action";
+ }
+
+ // same as above: action before visit
+ $actionIndex = array_search("log_link_visit_action", $tables);
+ $visitIndex = array_search("log_visit", $tables);
+ if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) {
+ $tables[$actionIndex] = "log_visit";
+ $tables[$visitIndex] = "log_link_visit_action";
+ }
+
+ foreach ($tables as $i => $table) {
+ if (is_array($table)) {
+ // join condition provided
+ $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table'];
+ $sql .= "
+ LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias
+ . " ON " . $table['joinOn'];
+ continue;
+ }
+
+ if (!in_array($table, $knownTables)) {
+ throw new Exception("Table '$table' can't be used for segmentation");
+ }
+
+ $tableSql = Common::prefixTable($table) . " AS $table";
+
+ if ($i == 0) {
+ // first table
+ $sql .= $tableSql;
+ } else {
+ if ($actionsAvailable && $table == "log_conversion") {
+ // have actions, need conversions => join on idlink_va
+ $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va "
+ . "AND log_conversion.idsite = log_link_visit_action.idsite";
+ } else if ($actionsAvailable && $table == "log_visit") {
+ // have actions, need visits => join on idvisit
+ $join = "log_visit.idvisit = log_link_visit_action.idvisit";
+ } else if ($visitsAvailable && $table == "log_link_visit_action") {
+ // have visits, need actions => we have to use a more complex join
+ // we don't hande this here, we just return joinWithSubSelect=true in this case
+ $joinWithSubSelect = true;
+ $join = "log_link_visit_action.idvisit = log_visit.idvisit";
+ } else if ($conversionsAvailable && $table == "log_link_visit_action") {
+ // have conversions, need actions => join on idlink_va
+ $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va";
+ } else if (($visitsAvailable && $table == "log_conversion")
+ || ($conversionsAvailable && $table == "log_visit")
+ ) {
+ // have visits, need conversion (or vice versa) => join on idvisit
+ // notice that joining conversions on visits has lower priority than joining it on actions
+ $join = "log_conversion.idvisit = log_visit.idvisit";
+
+ // if conversions are joined on visits, we need a complex join
+ if ($table == "log_conversion") {
+ $joinWithSubSelect = true;
+ }
+ } elseif ($conversionItemAvailable && $table === 'log_visit') {
+ $join = "log_conversion_item.idvisit = log_visit.idvisit";
+ } elseif ($conversionItemAvailable && $table === 'log_link_visit_action') {
+ $join = "log_conversion_item.idvisit = log_link_visit_action.idvisit";
+ } elseif ($conversionItemAvailable && $table === 'log_conversion') {
+ $join = "log_conversion_item.idvisit = log_conversion.idvisit";
+ } else {
+ throw new Exception("Table '$table' can't be joined for segmentation");
+ }
+
+ // the join sql the default way
+ $sql .= "
+ LEFT JOIN $tableSql ON $join";
+ }
+
+ // remember which tables are available
+ $visitsAvailable = ($visitsAvailable || $table == "log_visit");
+ $actionsAvailable = ($actionsAvailable || $table == "log_link_visit_action");
+ $conversionsAvailable = ($conversionsAvailable || $table == "log_conversion");
+ $conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item");
+ }
+
+ $return = array(
+ 'sql' => $sql,
+ 'joinWithSubSelect' => $joinWithSubSelect
+ );
+ return $return;
+
+ }
+
+
+ /**
+ * Build a select query where actions have to be joined on visits (or conversions)
+ * In this case, the query gets wrapped in another query so that grouping by visit is possible
+ * @param string $select
+ * @param string $from
+ * @param string $where
+ * @param string $groupBy
+ * @param string $orderBy
+ * @param string $limit
+ * @throws Exception
+ * @return string
+ */
+ private function buildWrappedSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit)
+ {
+ $matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)";
+ preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches);
+ $neededFields = array_unique($matches[0]);
+
+ if (count($neededFields) == 0) {
+ throw new Exception("No needed fields found in select expression. "
+ . "Please use a table prefix.");
+ }
+
+ $innerSelect = implode(", \n", $neededFields);
+ $innerFrom = $from;
+ $innerWhere = $where;
+
+ $innerLimit = $limit;
+ $innerGroupBy = "log_visit.idvisit";
+ $innerOrderBy = "NULL";
+ if($innerLimit && $orderBy) {
+ // only When LIMITing we can apply to the inner query the same ORDER BY as the parent query
+ $innerOrderBy = $orderBy;
+ }
+ if($innerLimit) {
+ // When LIMITing, no need to GROUP BY (GROUPing by is done before the LIMIT which is super slow when large amount of rows is matched)
+ $innerGroupBy = false;
+ }
+
+ $innerQuery = $this->buildSelectQuery($innerSelect, $innerFrom, $innerWhere, $innerGroupBy, $innerOrderBy, $innerLimit);
+
+ $select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select);
+ $from = "
+ (
+ $innerQuery
+ ) AS log_inner";
+ $where = false;
+ $orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy);
+ $groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy);
+ $query = $this->buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit);
+ return $query;
+ }
+
+
+ /**
+ * Build select query the normal way
+ *
+ * @param string $select fieldlist to be selected
+ * @param string $from tablelist to select from
+ * @param string $where where clause
+ * @param string $groupBy group by clause
+ * @param string $orderBy order by clause
+ * @param string $limit limit by clause
+ * @return string
+ */
+ private function buildSelectQuery($select, $from, $where, $groupBy, $orderBy, $limit)
+ {
+ $sql = "
+ SELECT
+ $select
+ FROM
+ $from";
+
+ if ($where) {
+ $sql .= "
+ WHERE
+ $where";
+ }
+
+ if ($groupBy) {
+ $sql .= "
+ GROUP BY
+ $groupBy";
+ }
+
+ if ($orderBy) {
+ $sql .= "
+ ORDER BY
+ $orderBy";
+ }
+
+ $limit = (int)$limit;
+ if ($limit >= 1) {
+ $sql .= "
+ LIMIT
+ $limit";
+ }
+
+ return $sql;
+ }
+
+ /**
+ * @param $where
+ * @param $segmentWhere
+ * @return string
+ * @throws
+ */
+ protected function getWhereMatchBoth($where, $segmentWhere)
+ {
+ if (empty($segmentWhere) && empty($where)) {
+ throw new \Exception("Segment where clause should be non empty.");
+ }
+ if (empty($segmentWhere)) {
+ return $where;
+ }
+ if (empty($where)) {
+ return $segmentWhere;
+ }
+ return "( $where )
+ AND
+ ($segmentWhere)";
+ }
+
+} \ No newline at end of file
diff --git a/core/RankingQuery.php b/core/RankingQuery.php
index f44845f075..cd4f830669 100644
--- a/core/RankingQuery.php
+++ b/core/RankingQuery.php
@@ -214,7 +214,7 @@ class RankingQuery
*/
public function execute($innerQuery, $bind = array())
{
- $query = $this->generateQuery($innerQuery);
+ $query = $this->generateRankingQuery($innerQuery);
$data = Db::fetchAll($query, $bind);
if ($this->columnToMarkExcludedRows !== false) {
@@ -268,7 +268,7 @@ class RankingQuery
* itself.
* @return string The entire ranking query SQL.
*/
- public function generateQuery($innerQuery)
+ public function generateRankingQuery($innerQuery)
{
// +1 to include "Others"
$limit = $this->limit + 1;
diff --git a/core/Segment.php b/core/Segment.php
index 420946794d..7fe4c061fd 100644
--- a/core/Segment.php
+++ b/core/Segment.php
@@ -9,7 +9,9 @@
namespace Piwik;
use Exception;
+use Piwik\DataAccess\LogQueryBuilder;
use Piwik\Plugins\API\API;
+use Piwik\Segment\SegmentExpression;
/**
* Limits the set of visits Piwik uses when aggregating analytics data.
@@ -57,7 +59,17 @@ class Segment
/**
* @var SegmentExpression
*/
- protected $segment = null;
+ protected $segmentExpression = null;
+
+ /**
+ * @var string
+ */
+ protected $string = null;
+
+ /**
+ * @var array
+ */
+ protected $idSites = null;
/**
* Truncate the Segments to 8k
@@ -68,9 +80,8 @@ class Segment
* Constructor.
*
* @param string $segmentCondition The segment condition, eg, `'browserCode=ff;countryCode=CA'`.
- * @param array $idSites The list of sites the segment will be used with. Some segments are
- * dependent on the site, such as goal segments.
- * @throws Exception
+ * @param array $idSites The list of sites the st
+ * @throws
*/
public function __construct($segmentCondition, $idSites)
{
@@ -103,7 +114,7 @@ class Segment
$this->string = $string;
$this->idSites = $idSites;
$segment = new SegmentExpression($string);
- $this->segment = $segment;
+ $this->segmentExpression = $segment;
// parse segments
$expressions = $segment->parseSubExpressions();
@@ -127,7 +138,7 @@ class Segment
*/
public function isEmpty()
{
- return empty($this->string);
+ return $this->segmentExpression->isEmpty();
}
protected $availableSegments = array();
@@ -222,236 +233,16 @@ class Segment
* @param array|string $bind (optional) Bind parameters, eg, `array($col1Value, $col2Value)`.
* @param false|string $orderBy (optional) Order by clause, eg, `"t1.col1 ASC"`.
* @param false|string $groupBy (optional) Group by clause, eg, `"t2.col2"`.
+ * @param int $limit Limit by clause
+ * @param int If set to value >= 1 then the Select query (and All inner queries) will be LIMIT'ed by this value.
+ * Use only when you're not aggregating or it will sample the data.
* @return string The entire select query.
*/
- public function getSelectQuery($select, $from, $where = false, $bind = array(), $orderBy = false, $groupBy = false)
- {
- if (!is_array($from)) {
- $from = array($from);
- }
-
- if (!$this->isEmpty()) {
- $this->segment->parseSubExpressionsIntoSqlExpressions($from);
-
- $joins = $this->generateJoins($from);
- $from = $joins['sql'];
- $joinWithSubSelect = $joins['joinWithSubSelect'];
-
- $segmentSql = $this->segment->getSql();
- $segmentWhere = $segmentSql['where'];
- if (!empty($segmentWhere)) {
- if (!empty($where)) {
- $where = "( $where )
- AND
- ($segmentWhere)";
- } else {
- $where = $segmentWhere;
- }
- }
-
- $bind = array_merge($bind, $segmentSql['bind']);
- } else {
- $joins = $this->generateJoins($from);
- $from = $joins['sql'];
- $joinWithSubSelect = $joins['joinWithSubSelect'];
- }
-
- if ($joinWithSubSelect) {
- $sql = $this->buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy);
- } else {
- $sql = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy);
- }
- return array(
- 'sql' => $sql,
- 'bind' => $bind
- );
- }
-
- /**
- * Generate the join sql based on the needed tables
- * @param array $tables tables to join
- * @throws Exception if tables can't be joined
- * @return array
- */
- private function generateJoins($tables)
- {
- $knownTables = array("log_visit", "log_link_visit_action", "log_conversion", "log_conversion_item");
- $visitsAvailable = $actionsAvailable = $conversionsAvailable = $conversionItemAvailable = false;
- $joinWithSubSelect = false;
- $sql = '';
-
- // make sure the tables are joined in the right order
- // base table first, then action before conversion
- // this way, conversions can be joined on idlink_va
- $actionIndex = array_search("log_link_visit_action", $tables);
- $conversionIndex = array_search("log_conversion", $tables);
- if ($actionIndex > 0 && $conversionIndex > 0 && $actionIndex > $conversionIndex) {
- $tables[$actionIndex] = "log_conversion";
- $tables[$conversionIndex] = "log_link_visit_action";
- }
-
- // same as above: action before visit
- $actionIndex = array_search("log_link_visit_action", $tables);
- $visitIndex = array_search("log_visit", $tables);
- if ($actionIndex > 0 && $visitIndex > 0 && $actionIndex > $visitIndex) {
- $tables[$actionIndex] = "log_visit";
- $tables[$visitIndex] = "log_link_visit_action";
- }
-
- foreach ($tables as $i => $table) {
- if (is_array($table)) {
- // join condition provided
- $alias = isset($table['tableAlias']) ? $table['tableAlias'] : $table['table'];
- $sql .= "
- LEFT JOIN " . Common::prefixTable($table['table']) . " AS " . $alias
- . " ON " . $table['joinOn'];
- continue;
- }
-
- if (!in_array($table, $knownTables)) {
- throw new Exception("Table '$table' can't be used for segmentation");
- }
-
- $tableSql = Common::prefixTable($table) . " AS $table";
-
- if ($i == 0) {
- // first table
- $sql .= $tableSql;
- } else {
- if ($actionsAvailable && $table == "log_conversion") {
- // have actions, need conversions => join on idlink_va
- $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va "
- . "AND log_conversion.idsite = log_link_visit_action.idsite";
- } else if ($actionsAvailable && $table == "log_visit") {
- // have actions, need visits => join on idvisit
- $join = "log_visit.idvisit = log_link_visit_action.idvisit";
- } else if ($visitsAvailable && $table == "log_link_visit_action") {
- // have visits, need actions => we have to use a more complex join
- // we don't hande this here, we just return joinWithSubSelect=true in this case
- $joinWithSubSelect = true;
- $join = "log_link_visit_action.idvisit = log_visit.idvisit";
- } else if ($conversionsAvailable && $table == "log_link_visit_action") {
- // have conversions, need actions => join on idlink_va
- $join = "log_conversion.idlink_va = log_link_visit_action.idlink_va";
- } else if (($visitsAvailable && $table == "log_conversion")
- || ($conversionsAvailable && $table == "log_visit")
- ) {
- // have visits, need conversion (or vice versa) => join on idvisit
- // notice that joining conversions on visits has lower priority than joining it on actions
- $join = "log_conversion.idvisit = log_visit.idvisit";
-
- // if conversions are joined on visits, we need a complex join
- if ($table == "log_conversion") {
- $joinWithSubSelect = true;
- }
- } elseif ($conversionItemAvailable && $table === 'log_visit') {
- $join = "log_conversion_item.idvisit = log_visit.idvisit";
- } elseif ($conversionItemAvailable && $table === 'log_link_visit_action') {
- $join = "log_conversion_item.idvisit = log_link_visit_action.idvisit";
- } elseif ($conversionItemAvailable && $table === 'log_conversion') {
- $join = "log_conversion_item.idvisit = log_conversion.idvisit";
- } else {
- throw new Exception("Table '$table' can't be joined for segmentation");
- }
-
- // the join sql the default way
- $sql .= "
- LEFT JOIN $tableSql ON $join";
- }
-
- // remember which tables are available
- $visitsAvailable = ($visitsAvailable || $table == "log_visit");
- $actionsAvailable = ($actionsAvailable || $table == "log_link_visit_action");
- $conversionsAvailable = ($conversionsAvailable || $table == "log_conversion");
- $conversionItemAvailable = ($conversionItemAvailable || $table == "log_conversion_item");
- }
-
- $return = array(
- 'sql' => $sql,
- 'joinWithSubSelect' => $joinWithSubSelect
- );
- return $return;
-
- }
-
- /**
- * Build select query the normal way
- * @param string $select fieldlist to be selected
- * @param string $from tablelist to select from
- * @param string $where where clause
- * @param string $orderBy order by clause
- * @param string $groupBy group by clause
- * @return string
- */
- private function buildSelectQuery($select, $from, $where, $orderBy, $groupBy)
- {
- $sql = "
- SELECT
- $select
- FROM
- $from";
-
- if ($where) {
- $sql .= "
- WHERE
- $where";
- }
-
- if ($groupBy) {
- $sql .= "
- GROUP BY
- $groupBy";
- }
-
- if ($orderBy) {
- $sql .= "
- ORDER BY
- $orderBy";
- }
-
- return $sql;
- }
-
- /**
- * Build a select query where actions have to be joined on visits (or conversions)
- * In this case, the query gets wrapped in another query so that grouping by visit is possible
- * @param string $select
- * @param string $from
- * @param string $where
- * @param string $orderBy
- * @param string $groupBy
- * @throws Exception
- * @return string
- */
- private function buildWrappedSelectQuery($select, $from, $where, $orderBy, $groupBy)
+ public function getSelectQuery($select, $from, $where = false, $bind = array(), $orderBy = false, $groupBy = false, $limit = 0)
{
- $matchTables = "(log_visit|log_conversion_item|log_conversion|log_action)";
- preg_match_all("/". $matchTables ."\.[a-z0-9_\*]+/", $select, $matches);
- $neededFields = array_unique($matches[0]);
-
- if (count($neededFields) == 0) {
- throw new Exception("No needed fields found in select expression. "
- . "Please use a table prefix.");
- }
-
- $select = preg_replace('/'.$matchTables.'\./', 'log_inner.', $select);
- $orderBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $orderBy);
- $groupBy = preg_replace('/'.$matchTables.'\./', 'log_inner.', $groupBy);
-
- $from = "(
- SELECT
- " . implode(",
- ", $neededFields) . "
- FROM
- $from
- WHERE
- $where
- GROUP BY log_visit.idvisit
- ) AS log_inner";
-
- $where = false;
- $query = $this->buildSelectQuery($select, $from, $where, $orderBy, $groupBy);
- return $query;
+ $segmentExpression = $this->segmentExpression;
+ $segmentQuery = new LogQueryBuilder($segmentExpression);
+ return $segmentQuery->getSelectQueryString($select, $from, $where, $bind, $groupBy, $orderBy, $limit);
}
/**
@@ -463,4 +254,5 @@ class Segment
{
return (string) $this->getString();
}
+
} \ No newline at end of file
diff --git a/core/SegmentExpression.php b/core/Segment/SegmentExpression.php
index cbd2a3eb9e..10cf0294e3 100644
--- a/core/SegmentExpression.php
+++ b/core/Segment/SegmentExpression.php
@@ -7,7 +7,7 @@
*
*/
-namespace Piwik;
+namespace Piwik\Segment;
use Exception;
@@ -46,6 +46,11 @@ class SegmentExpression
$this->tree = $this->parseTree();
}
+ public function isEmpty()
+ {
+ return count($this->tree) == 0;
+ }
+
protected $joins = array();
protected $valuesBind = array();
protected $parsedTree = array();
@@ -337,7 +342,7 @@ class SegmentExpression
*/
public function getSql()
{
- if (count($this->tree) == 0) {
+ if ($this->isEmpty()) {
throw new Exception("Invalid segment, please specify a valid segment.");
}
$sql = '';
diff --git a/core/Tracker/TableLogAction.php b/core/Tracker/TableLogAction.php
index 709936f2a5..fe620035d7 100644
--- a/core/Tracker/TableLogAction.php
+++ b/core/Tracker/TableLogAction.php
@@ -10,7 +10,7 @@
namespace Piwik\Tracker;
use Piwik\Common;
-use Piwik\SegmentExpression;
+use Piwik\Segment\SegmentExpression;
use Piwik\Tracker;
/**