diff options
author | BeezyT <timo@ezdesign.de> | 2012-08-17 13:29:57 +0400 |
---|---|---|
committer | BeezyT <timo@ezdesign.de> | 2012-08-17 13:29:57 +0400 |
commit | c828df60e352ebac697e444b49b979bf1cc1f12f (patch) | |
tree | 4ba527cf2283e6355f26d0a30617c876d66cda63 /core/RankingQuery.php | |
parent | ad60d64fb4d2967c3686bdcfd613f6c349f6663a (diff) |
refs #3330 ranking query
* Piwik_RankingQuery encapsulates the logic to apply the limit + grouping of others to an arbitrary select query
* queryActionsByDimension() and queryVisitsByDimension() in Piwik_ArchiveProcessing_Day get new parameters to use the ranking query
git-svn-id: http://dev.piwik.org/svn/trunk@6803 59fd770c-687e-43c8-a1e3-f5a4ff64c105
Diffstat (limited to 'core/RankingQuery.php')
-rw-r--r-- | core/RankingQuery.php | 380 |
1 files changed, 380 insertions, 0 deletions
diff --git a/core/RankingQuery.php b/core/RankingQuery.php new file mode 100644 index 0000000000..a8575aed36 --- /dev/null +++ b/core/RankingQuery.php @@ -0,0 +1,380 @@ +<?php +/** + * Piwik - Open source web analytics + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + * @version $Id$ + * + * @category Piwik + * @package Piwik + */ + +/** + * The ranking query class wraps an arbitrary SQL query with more SQL that limits + * the number of results while grouping the rest to "Others" and allows for some + * more fancy things that can be configured via method calls of this class. The + * advanced use cases are explained in the doc comments of the methods. + * + * The general use case looks like this: + * + * // limit to 500 rows + "Others" + * $rankingQuery = new Piwik_RankingQuery(500); + * + * // idaction_url will be "Others" in the row that contains the aggregated rest + * $rankingQuery->addLabelColumn('idaction_url'); + * + * // the actual query. it's important to sort it before the limit is applied + * $sql = 'SELECT idaction_url, COUNT(*) AS nb_hits + * FROM log_link_visit_action + * GROUP BY idaction_url + * ORDER BY nb_hits DESC'; + * + * // execute the query + * $rankingQuery->execute($sql); + * + * + * @package Piwik + */ +class Piwik_RankingQuery +{ + + /** + * Contains the labels of the inner query. + * Format: "label" => true (to make sure labels don't appear twice) + * @var array + */ + private $labelColumns = array(); + + /** + * The columns of the inner query that are not labels + * Format: "label" => "aggregation function" or false for no aggregation + * @var array + */ + private $additionalColumns = array(); + + /** + * The limit for each group + * @var int + */ + private $limit = 5; + + /** + * The name of the columns that marks rows to be excluded from the limit + * @var string + */ + private $columnToMarkExcludedRows = false; + + /** + * The column that is used to partition the result + * @var bool|string + */ + private $partitionColumn = false; + + /** + * The possible values for the column $this->partitionColumn + * @var array + */ + private $partitionColumnValues = array(); + + + /** + * The constructor. + * Can be used as a shortcut for setLimit() + */ + public function __construct($limit=false) + { + if ($limit !== false) + { + $this->setLimit($limit); + } + } + + /** + * Set the limit after which everything is grouped to "Others" + * + * @param $limit int + */ + public function setLimit($limit) + { + $this->limit = $limit; + } + + /** + * Add a label column. + * Labels are the columns that are replaced with "Others" after the limit. + * + * @param $labelColumn string|array + */ + public function addLabelColumn($labelColumn) + { + if (is_array($labelColumn)) + { + foreach ($labelColumn as $label) + { + $this->addLabelColumn($label); + } + return; + } + $this->labelColumns[$labelColumn] = true; + } + + /** + * Add a column that has be added to the outer queries. + * + * @param $column + * @param string|bool $aggregationFunction string + * If set, this function is used to aggregate the values of "Others" + */ + public function addColumn($column, $aggregationFunction=false) + { + if (is_array($column)) + { + foreach ($column as $c) + { + $this->addColumn($c, $aggregationFunction); + } + return; + } + $this->additionalColumns[$column] = $aggregationFunction; + } + + /** + * The inner query can have a column that marks the rows that shall be excluded from limiting. + * If the column contains 0, rows are handled as usual. For values greater than 0, separate + * groups are made. If this method is used, generate() returns both the regular result and + * the excluded columns separately. + * + * @param $column string name of the column + * @throws Exception when method is used more than once + */ + public function setColumnToMarkExcludedRows($column) + { + if ($this->columnToMarkExcludedRows !== false) + { + throw new Exception("setColumnToMarkExcludedRows can only be used once"); + } + + $this->columnToMarkExcludedRows = $column; + $this->addColumn($this->columnToMarkExcludedRows); + } + + /** + * This method can be used to get multiple groups in one go. For example, one might query + * the top following pages, outlinks and downloads in one go by using log_action.type as + * the partition column and [TYPE_ACTION_URL, TYPE_OUTLINK, TYPE_DOWNLOAD] as the possible + * values. + * When this method has been used, generate() returns as array that contains one array + * per group of data. + * + * @param $partitionColumn string + * @param $possibleValues array of integers + * @throws Exception when method is used more than once + */ + public function partitionResultIntoMultipleGroups($partitionColumn, $possibleValues) + { + if ($this->partitionColumn !== false) + { + throw new Exception("partitionResultIntoMultipleGroups can only be used once"); + } + + $this->partitionColumn = $partitionColumn; + $this->partitionColumnValues = $possibleValues; + $this->addColumn($partitionColumn); + } + + /** + * Execute the query. + * The object has to be configured first using the other methods. + * + * @param $innerQuery string The "payload" query. The result has be sorted as desired. + * @param $bind array Bindings for the inner query. + * @return array The format depends on which methods have been used + * to configure the ranking query + */ + public function execute($innerQuery, $bind=array()) + { + $query = $this->generateQuery($innerQuery); + $data = Piwik_FetchAll($query, $bind); + + if ($this->columnToMarkExcludedRows !== false) + { + // split the result into the regular result and the rows with special treatment + $excludedFromLimit = array(); + $result = array(); + foreach ($data as &$row) + { + if ($row[$this->columnToMarkExcludedRows] != 0) + { + $excludedFromLimit[] = $row; + } + else + { + $result[] = $row; + } + } + $data = array( + 'result' => &$result, + 'excludedFromLimit' => &$excludedFromLimit + ); + } + + if ($this->partitionColumn !== false) + { + if ($this->columnToMarkExcludedRows !== false) + { + $data['result'] = $this->splitPartitions($data['result']); + } + else + { + $data = $this->splitPartitions($data); + } + } + + return $data; + } + + private function splitPartitions(&$data) + { + $result = array(); + foreach ($data as &$row) + { + $partition = $row[$this->partitionColumn]; + if (!isset($result[$partition])) + { + $result[$partition] = array(); + } + $result[$partition][] = &$row; + } + return $result; + } + + /** + * Generate the SQL code that does the magic. + * If you want to get the result, use execute() instead. If you're interested in + * the generated SQL code (e.g. for debugging), use this method. + * + * @param $innerQuery string SQL of the actual query + * @return string entire ranking query SQL + */ + public function generateQuery($innerQuery) + { + // +1 to include "Others" + $limit = $this->limit + 1; + $counterExpression = $this->getCounterExpression($limit); + + // generate select clauses for label columns + $labelColumnsString = '`'.implode('`, `', array_keys($this->labelColumns)).'`'; + $labelColumnsOthersSwitch = array(); + foreach ($this->labelColumns as $column => $true) + { + $labelColumnsOthersSwitch[] = " + CASE + WHEN counter = $limit THEN \"Others\" + ELSE `$column` + END AS `$column` + "; + } + $labelColumnsOthersSwitch = implode(', ', $labelColumnsOthersSwitch); + + // generate select clauses for additional columns + $additionalColumnsString = ''; + $additionalColumnsAggregatedString = ''; + foreach ($this->additionalColumns as $additionalColumn => $aggregation) + { + $additionalColumnsString .= ', `'.$additionalColumn.'`'; + if ($aggregation !== false) + { + $additionalColumnsAggregatedString .= ', '.$aggregation.'(`'.$additionalColumn.'`) AS `'.$additionalColumn.'`'; + } + else + { + $additionalColumnsAggregatedString .= ', `'.$additionalColumn.'`'; + } + + } + + // initialize the counters + if ($this->partitionColumn !== false) + { + $initCounter = ''; + foreach ($this->partitionColumnValues as $value) + { + $initCounter .= '( SELECT @counter'.intval($value).':=0 ) initCounter'.intval($value).', '; + } + } + else + { + $initCounter = '( SELECT @counter:=0 ) initCounter,'; + } + + // add a counter to the query + // we rely on the sorting of the inner query + $withCounter = " + SELECT + $labelColumnsString, + $counterExpression AS counter + $additionalColumnsString + FROM + $initCounter + ( $innerQuery ) actualQuery + "; + + // group by the counter - this groups "Others" because the counter stops at $limit + $groupBy = 'counter'; + if ($this->partitionColumn !== false) + { + $groupBy .= ', `'.$this->partitionColumn.'`'; + } + $groupOthers = " + SELECT + $labelColumnsOthersSwitch + $additionalColumnsAggregatedString + FROM ( $withCounter ) AS withCounter + GROUP BY $groupBy + "; + + return $groupOthers; + } + + private function getCounterExpression($limit) + { + $whens = array(); + + if ($this->columnToMarkExcludedRows !== false) + { + // when a row has been specified that marks which records should be excluded + // from limiting, we don't give those rows the normal counter but -1 times the + // value they had before. this way, they have a separate number space (i.e. negative + // integers). + $whens[] = "WHEN {$this->columnToMarkExcludedRows} != 0 THEN -1 * {$this->columnToMarkExcludedRows}"; + } + + if ($this->partitionColumn !== false) + { + // partition: one counter per possible value + foreach ($this->partitionColumnValues as $value) + { + $isValue = '`'.$this->partitionColumn.'` = '.intval($value); + $counter = '@counter'.intval($value); + $whens[] = "WHEN $isValue AND $counter = $limit THEN $limit"; + $whens[] = "WHEN $isValue THEN $counter:=$counter+1"; + } + $whens[] = "ELSE 0"; + } + else + { + // no partitioning: add a single counter + $whens[] = "WHEN @counter = $limit THEN $limit"; + $whens[] = "ELSE @counter:=@counter+1"; + } + + return " + CASE + ".implode(" + ", $whens)." + END + "; + } + +}
\ No newline at end of file |