Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/Actions/ArchivingHelper.php')
-rw-r--r--plugins/Actions/ArchivingHelper.php939
1 files changed, 448 insertions, 491 deletions
diff --git a/plugins/Actions/ArchivingHelper.php b/plugins/Actions/ArchivingHelper.php
index 30fb58b555..739e8a2fef 100644
--- a/plugins/Actions/ArchivingHelper.php
+++ b/plugins/Actions/ArchivingHelper.php
@@ -19,495 +19,452 @@
class Piwik_Actions_ArchivingHelper
{
- const OTHERS_ROW_KEY = '';
-
- /**
- * @param Zend_Db_Statement|PDOStatement $query
- * @param string|bool $fieldQueried
- * @param array $actionsTablesByType
- * @return int
- */
- static public function updateActionsTableWithRowQuery($query, $fieldQueried, & $actionsTablesByType)
- {
- $rowsProcessed = 0;
- while( $row = $query->fetch() )
- {
- if(empty($row['idaction']))
- {
- $row['type'] = ($fieldQueried == 'idaction_url' ? Piwik_Tracker_Action::TYPE_ACTION_URL : Piwik_Tracker_Action::TYPE_ACTION_NAME);
- // This will be replaced with 'X not defined' later
- $row['name'] = '';
- // Yes, this is kind of a hack, so we don't mix 'page url not defined' with 'page title not defined' etc.
- $row['idaction'] = -$row['type'];
- }
-
- if($row['type'] != Piwik_Tracker_Action::TYPE_SITE_SEARCH)
- {
- unset($row[Piwik_Archive::INDEX_SITE_SEARCH_HAS_NO_RESULT]);
- }
-
- // This will appear as <url /> in the API, which is actually very important to keep
- // eg. When there's at least one row in a report that does not have a URL, not having this <url/> would break HTML/PDF reports.
- $url = '';
- if($row['type'] == Piwik_Tracker_Action::TYPE_SITE_SEARCH
- || $row['type'] == Piwik_Tracker_Action::TYPE_ACTION_NAME)
- {
- $url = null;
- }
- elseif(!empty($row['name'])
- && $row['name'] != Piwik_DataTable::LABEL_SUMMARY_ROW)
- {
- $url = Piwik_Tracker_Action::reconstructNormalizedUrl((string)$row['name'], $row['url_prefix']);
- }
-
- if(isset($row['name'])
- && isset($row['type']))
- {
- $actionName = $row['name'];
- $actionType = $row['type'];
- $urlPrefix = $row['url_prefix'];
- $idaction = $row['idaction'];
-
- // in some unknown case, the type field is NULL, as reported in #1082 - we ignore this page view
- if(empty($actionType))
- {
- if ($idaction != Piwik_DataTable::LABEL_SUMMARY_ROW)
- {
- self::setCachedActionRow($idaction, $actionType, false);
- }
- continue;
- }
-
- $actionRow = self::getActionRow($actionName, $actionType, $urlPrefix, $actionsTablesByType);
-
- self::setCachedActionRow($idaction, $actionType, $actionRow);
- }
- else
- {
- $actionRow = self::getCachedActionRow($row['idaction'], $row['type']);
-
- // Action processed as "to skip" for some reasons
- if($actionRow === false)
- {
- continue;
- }
- }
-
-
- if (is_null($actionRow))
- {
- continue;
- }
-
- // Here we do ensure that, the Metadata URL set for a given row, is the one from the Pageview with the most hits.
- // This is to ensure that when, different URLs are loaded with the same page name.
- // For example http://piwik.org and http://id.piwik.org are reported in Piwik > Actions > Pages with /index
- // But, we must make sure http://piwik.org is used to link & for transitions
- // Note: this code is partly duplicated from Piwik_DataTable_Row->sumRowMetadata()
- if( !is_null($url)
- && !$actionRow->isSummaryRow())
- {
- if(($existingUrl = $actionRow->getMetadata('url')) !== false)
- {
- if( !empty($row[Piwik_Archive::INDEX_PAGE_NB_HITS])
- && $row[Piwik_Archive::INDEX_PAGE_NB_HITS] > $actionRow->maxVisitsSummed)
- {
- $actionRow->setMetadata('url', $url);
- $actionRow->maxVisitsSummed = $row[Piwik_Archive::INDEX_PAGE_NB_HITS];
- }
- }
- else
- {
- $actionRow->setMetadata('url', $url);
- $actionRow->maxVisitsSummed = !empty($row[Piwik_Archive::INDEX_PAGE_NB_HITS]) ? $row[Piwik_Archive::INDEX_PAGE_NB_HITS] : 0;
- }
- }
-
- if ($row['type'] != Piwik_Tracker_Action::TYPE_ACTION_URL
- && $row['type'] != Piwik_Tracker_Action::TYPE_ACTION_NAME) {
- // only keep performance metrics when they're used (i.e. for URLs and page titles)
- if (array_key_exists(Piwik_Archive::INDEX_PAGE_SUM_TIME_GENERATION, $row)) {
- unset($row[Piwik_Archive::INDEX_PAGE_SUM_TIME_GENERATION]);
- }
- if (array_key_exists(Piwik_Archive::INDEX_PAGE_NB_HITS_WITH_TIME_GENERATION, $row)) {
- unset($row[Piwik_Archive::INDEX_PAGE_NB_HITS_WITH_TIME_GENERATION]);
- }
- }
-
- unset($row['name']);
- unset($row['type']);
- unset($row['idaction']);
- unset($row['url_prefix']);
-
- foreach($row as $name => $value)
- {
- // in some edge cases, we have twice the same action name with 2 different idaction
- // - this happens when 2 visitors visit the same new page at the same time, and 2 actions get recorded for the same name
- // - this could also happen when 2 URLs end up having the same label (eg. 2 subdomains get aggregated to the "/index" page name)
- if(($alreadyValue = $actionRow->getColumn($name)) !== false)
- {
- $actionRow->setColumn($name, $alreadyValue+$value);
- }
- else
- {
- $actionRow->addColumn($name, $value);
- }
- }
-
- // if the exit_action was not recorded properly in the log_link_visit_action
- // there would be an error message when getting the nb_hits column
- // we must fake the record and add the columns
- if($actionRow->getColumn(Piwik_Archive::INDEX_PAGE_NB_HITS) === false)
- {
- // to test this code: delete the entries in log_link_action_visit for
- // a given exit_idaction_url
- foreach(self::getDefaultRow()->getColumns() as $name => $value)
- {
- $actionRow->addColumn($name, $value);
- }
- }
- $rowsProcessed++;
- }
-
- // just to make sure php copies the last $actionRow in the $parentTable array
- $actionRow =& $actionsTablesByType;
- return $rowsProcessed;
- }
-
- static public $maximumRowsInDataTableLevelZero;
- static public $maximumRowsInSubDataTable;
- static public $columnToSortByBeforeTruncation;
-
- static protected $actionUrlCategoryDelimiter = null;
- static protected $actionTitleCategoryDelimiter = null;
- static protected $defaultActionName = null;
- static protected $defaultActionNameWhenNotDefined = null;
- static protected $defaultActionUrlWhenNotDefined = null;
-
- static public function reloadConfig()
- {
- // for BC, we read the old style delimiter first (see #1067)Row
- $actionDelimiter = @Piwik_Config::getInstance()->General['action_category_delimiter'];
- if(empty($actionDelimiter))
- {
- self::$actionUrlCategoryDelimiter = Piwik_Config::getInstance()->General['action_url_category_delimiter'];
- self::$actionTitleCategoryDelimiter = Piwik_Config::getInstance()->General['action_title_category_delimiter'];
- }
- else
- {
- self::$actionUrlCategoryDelimiter = self::$actionTitleCategoryDelimiter = $actionDelimiter;
- }
-
- self::$defaultActionName = Piwik_Config::getInstance()->General['action_default_name'];
- self::$columnToSortByBeforeTruncation = Piwik_Archive::INDEX_NB_VISITS;
- self::$maximumRowsInDataTableLevelZero = Piwik_Config::getInstance()->General['datatable_archiving_maximum_rows_actions'];
- self::$maximumRowsInSubDataTable = Piwik_Config::getInstance()->General['datatable_archiving_maximum_rows_subtable_actions'];
-
- Piwik_DataTable::setMaximumDepthLevelAllowedAtLeast(self::getSubCategoryLevelLimit() + 1);
- }
-
-
- /**
- * The default row is used when archiving, if data is inconsistent in the DB,
- * there could be pages that have exit/entry hits, but don't yet
- * have a record in the table (or the record was truncated).
- *
- * @return Piwik_DataTable_Row
- */
- static private function getDefaultRow()
- {
- static $row = false;
- if($row === false) {
- // This row is used in the case where an action is know as an exit_action
- // but this action was not properly recorded when it was hit in the first place
- // so we add this fake row information to make sure there is a nb_hits, etc. column for every action
- $row = new Piwik_DataTable_Row(array(
- Piwik_DataTable_Row::COLUMNS => array(
- Piwik_Archive::INDEX_NB_VISITS => 1,
- Piwik_Archive::INDEX_NB_UNIQ_VISITORS => 1,
- Piwik_Archive::INDEX_PAGE_NB_HITS => 1,
- )));
- }
- return $row;
- }
-
- /**
- * Given a page name and type, builds a recursive datatable where
- * each level of the tree is a category, based on the page name split by a delimiter (slash / by default)
- *
- * @param string $actionName
- * @param int $actionType
- * @param int $urlPrefix
- * @param array $actionsTablesByType
- * @return Piwik_DataTable
- */
- protected static function getActionRow( $actionName, $actionType, $urlPrefix=null, &$actionsTablesByType )
- {
- // we work on the root table of the given TYPE (either ACTION_URL or DOWNLOAD or OUTLINK etc.)
- /* @var Piwik_DataTable $currentTable */
- $currentTable =& $actionsTablesByType[$actionType];
-
- // check for ranking query cut-off
- if ($actionName == Piwik_DataTable::LABEL_SUMMARY_ROW)
- {
- $summaryRow = $currentTable->getRowFromId(Piwik_DataTable::ID_SUMMARY_ROW);
- if ($summaryRow === false)
- {
- $summaryRow = $currentTable->addSummaryRow(self::createSummaryRow());
- }
- return $summaryRow;
- }
-
- // go to the level of the subcategory
- $actionExplodedNames = self::getActionExplodedNames($actionName, $actionType, $urlPrefix);
- list($row, $level) = $currentTable->walkPath(
- $actionExplodedNames, self::getDefaultRowColumns(), self::$maximumRowsInSubDataTable);
-
- return $row;
- }
-
- /**
- * Explodes action name into an array of elements.
- *
- * NOTE: before calling this function make sure Piwik_Actions_ArchivingHelper::reloadConfig(); is called
- *
- * for downloads:
- * we explode link http://piwik.org/some/path/piwik.zip into an array( 'piwik.org', '/some/path/piwik.zip' );
- *
- * for outlinks:
- * we explode link http://dev.piwik.org/some/path into an array( 'dev.piwik.org', '/some/path' );
- *
- * for action urls:
- * we explode link http://piwik.org/some/path into an array( 'some', 'path' );
- *
- * for action names:
- * we explode name 'Piwik / Category 1 / Category 2' into an array('Piwik', 'Category 1', 'Category 2');
- *
- * @param string action name
- * @param int action type
- * @param int url prefix (only used for TYPE_ACTION_URL)
- * @return array of exploded elements from $name
- */
- static public function getActionExplodedNames($name, $type, $urlPrefix=null)
- {
- // Site Search does not split Search keywords
- if($type == Piwik_Tracker_Action::TYPE_SITE_SEARCH)
- {
- return array($name);
- }
-
- $matches = array();
- $isUrl = false;
- $name = str_replace("\n", "", $name);
-
- $urlRegexAfterDomain = '([^/]+)[/]?([^#]*)[#]?(.*)';
- if ($urlPrefix === null)
- {
- // match url with protocol (used for outlinks / downloads)
- $urlRegex = '@^http[s]?://'.$urlRegexAfterDomain.'$@i';
- }
- else
- {
- // the name is a url that does not contain protocol and www anymore
- // we know that normalization has been done on db level because $urlPrefix is set
- $urlRegex = '@^'.$urlRegexAfterDomain.'$@i';
- }
-
- preg_match($urlRegex, $name, $matches);
- if( count($matches) )
- {
- $isUrl = true;
- $urlHost = $matches[1];
- $urlPath = $matches[2];
- $urlFragment = $matches[3];
- }
-
- if($type == Piwik_Tracker_Action::TYPE_DOWNLOAD
- || $type == Piwik_Tracker_Action::TYPE_OUTLINK)
- {
- if( $isUrl )
- {
- return array(trim($urlHost), '/' . trim($urlPath));
- }
- }
-
- if( $isUrl )
- {
- $name = $urlPath;
-
- if( $name === '' || substr($name, -1) == '/' )
- {
- $name .= self::$defaultActionName;
- }
- }
-
- if($type == Piwik_Tracker_Action::TYPE_ACTION_NAME)
- {
- $categoryDelimiter = self::$actionTitleCategoryDelimiter;
- }
- else
- {
- $categoryDelimiter = self::$actionUrlCategoryDelimiter;
- }
-
-
- if( $isUrl )
- {
- $urlFragment = Piwik_Tracker_Action::processUrlFragment($urlFragment);
- if(!empty($urlFragment)) {
- $name .= '#' . $urlFragment;
- }
- }
-
- if(empty($categoryDelimiter))
- {
- return array( trim($name) );
- }
-
- $split = explode($categoryDelimiter, $name, self::getSubCategoryLevelLimit());
-
- // trim every category and remove empty categories
- $split = array_map('trim', $split);
- $split = array_filter($split, 'strlen');
-
- // forces array key to start at 0
- $split = array_values($split);
-
- if( empty($split) )
- {
- $defaultName = self::getUnknownActionName($type);
- return array( trim($defaultName) );
- }
-
- $lastPageName = end($split);
- // we are careful to prefix the page URL / name with some value
- // so that if a page has the same name as a category
- // we don't merge both entries
- if($type != Piwik_Tracker_Action::TYPE_ACTION_NAME )
- {
- $lastPageName = '/' . $lastPageName;
- }
- else
- {
- $lastPageName = ' ' . $lastPageName;
- }
- $split[count($split)-1] = $lastPageName;
- return array_values( $split );
- }
-
- /**
- * Gets the key for the cache of action rows from an action ID and type.
- *
- * @param int $idAction
- * @param int $actionType
- * @return string|int
- */
- private static function getCachedActionRowKey( $idAction, $actionType )
- {
- return $idAction == Piwik_DataTable::LABEL_SUMMARY_ROW
- ? $actionType.'_others'
- : $idAction;
- }
-
- /**
- * Returns the configured sub-category level limit.
- *
- * @return int
- */
- public static function getSubCategoryLevelLimit()
- {
- return Piwik_Config::getInstance()->General['action_category_level_limit'];
- }
-
- /**
- * Returns default label for the action type
- *
- * @param $type
- * @return string
- */
- static public function getUnknownActionName($type)
- {
- if(empty(self::$defaultActionNameWhenNotDefined))
- {
- self::$defaultActionNameWhenNotDefined = Piwik_Translate('General_NotDefined', Piwik_Translate('Actions_ColumnPageName'));
- self::$defaultActionUrlWhenNotDefined = Piwik_Translate('General_NotDefined', Piwik_Translate('Actions_ColumnPageURL'));
- }
- if($type == Piwik_Tracker_Action::TYPE_ACTION_NAME)
- {
- return self::$defaultActionNameWhenNotDefined;
- }
- return self::$defaultActionUrlWhenNotDefined;
- }
-
- /**
- * Static cache to store Rows during processing
- */
- static protected $cacheParsedAction = array();
-
- public static function clearActionsCache()
- {
- self::$cacheParsedAction = array();
- }
-
- /**
- * Get cached action row by id & type. If $idAction is set to -1, the 'Others' row
- * for the specific action type will be returned.
- *
- * @param int $idAction
- * @param int $actionType
- * @return Piwik_DataTable_Row|false
- */
- private static function getCachedActionRow( $idAction, $actionType )
- {
- $cacheLabel = self::getCachedActionRowKey($idAction, $actionType);
-
- if (!isset(self::$cacheParsedAction[$cacheLabel]))
- {
- // This can happen when
- // - We select an entry page ID that was only seen yesterday, so wasn't selected in the first query
- // - We count time spent on a page, when this page was only seen yesterday
- return false;
- }
-
- return self::$cacheParsedAction[$cacheLabel];
- }
-
- /**
- * Set cached action row for an id & type.
- *
- * @param int $idAction
- * @param int $actionType
- * @param Piwik_DataTable_Row
- */
- private static function setCachedActionRow( $idAction, $actionType, $actionRow )
- {
- $cacheLabel = self::getCachedActionRowKey($idAction, $actionType);
- self::$cacheParsedAction[$cacheLabel] = $actionRow;
- }
-
- /**
- * Returns the default columns for a row in an Actions DataTable.
- *
- * @return array
- */
- private static function getDefaultRowColumns()
- {
- return array(Piwik_Archive::INDEX_NB_VISITS => 0,
- Piwik_Archive::INDEX_NB_UNIQ_VISITORS => 0,
- Piwik_Archive::INDEX_PAGE_NB_HITS => 0,
- Piwik_Archive::INDEX_PAGE_SUM_TIME_SPENT => 0);
- }
-
- /**
- * Creates a summary row for an Actions DataTable.
- *
- * @return Piwik_DataTable_Row
- */
- private static function createSummaryRow()
- {
- return new Piwik_DataTable_Row(array(
- Piwik_DataTable_Row::COLUMNS =>
- array('label' => Piwik_DataTable::LABEL_SUMMARY_ROW) + self::getDefaultRowColumns()
- ));
- }
+ const OTHERS_ROW_KEY = '';
+
+ /**
+ * @param Zend_Db_Statement|PDOStatement $query
+ * @param string|bool $fieldQueried
+ * @param array $actionsTablesByType
+ * @return int
+ */
+ static public function updateActionsTableWithRowQuery($query, $fieldQueried, & $actionsTablesByType)
+ {
+ $rowsProcessed = 0;
+ while ($row = $query->fetch()) {
+ if (empty($row['idaction'])) {
+ $row['type'] = ($fieldQueried == 'idaction_url' ? Piwik_Tracker_Action::TYPE_ACTION_URL : Piwik_Tracker_Action::TYPE_ACTION_NAME);
+ // This will be replaced with 'X not defined' later
+ $row['name'] = '';
+ // Yes, this is kind of a hack, so we don't mix 'page url not defined' with 'page title not defined' etc.
+ $row['idaction'] = -$row['type'];
+ }
+
+ if ($row['type'] != Piwik_Tracker_Action::TYPE_SITE_SEARCH) {
+ unset($row[Piwik_Archive::INDEX_SITE_SEARCH_HAS_NO_RESULT]);
+ }
+
+ // This will appear as <url /> in the API, which is actually very important to keep
+ // eg. When there's at least one row in a report that does not have a URL, not having this <url/> would break HTML/PDF reports.
+ $url = '';
+ if ($row['type'] == Piwik_Tracker_Action::TYPE_SITE_SEARCH
+ || $row['type'] == Piwik_Tracker_Action::TYPE_ACTION_NAME
+ ) {
+ $url = null;
+ } elseif (!empty($row['name'])
+ && $row['name'] != Piwik_DataTable::LABEL_SUMMARY_ROW
+ ) {
+ $url = Piwik_Tracker_Action::reconstructNormalizedUrl((string)$row['name'], $row['url_prefix']);
+ }
+
+ if (isset($row['name'])
+ && isset($row['type'])
+ ) {
+ $actionName = $row['name'];
+ $actionType = $row['type'];
+ $urlPrefix = $row['url_prefix'];
+ $idaction = $row['idaction'];
+
+ // in some unknown case, the type field is NULL, as reported in #1082 - we ignore this page view
+ if (empty($actionType)) {
+ if ($idaction != Piwik_DataTable::LABEL_SUMMARY_ROW) {
+ self::setCachedActionRow($idaction, $actionType, false);
+ }
+ continue;
+ }
+
+ $actionRow = self::getActionRow($actionName, $actionType, $urlPrefix, $actionsTablesByType);
+
+ self::setCachedActionRow($idaction, $actionType, $actionRow);
+ } else {
+ $actionRow = self::getCachedActionRow($row['idaction'], $row['type']);
+
+ // Action processed as "to skip" for some reasons
+ if ($actionRow === false) {
+ continue;
+ }
+ }
+
+
+ if (is_null($actionRow)) {
+ continue;
+ }
+
+ // Here we do ensure that, the Metadata URL set for a given row, is the one from the Pageview with the most hits.
+ // This is to ensure that when, different URLs are loaded with the same page name.
+ // For example http://piwik.org and http://id.piwik.org are reported in Piwik > Actions > Pages with /index
+ // But, we must make sure http://piwik.org is used to link & for transitions
+ // Note: this code is partly duplicated from Piwik_DataTable_Row->sumRowMetadata()
+ if (!is_null($url)
+ && !$actionRow->isSummaryRow()
+ ) {
+ if (($existingUrl = $actionRow->getMetadata('url')) !== false) {
+ if (!empty($row[Piwik_Archive::INDEX_PAGE_NB_HITS])
+ && $row[Piwik_Archive::INDEX_PAGE_NB_HITS] > $actionRow->maxVisitsSummed
+ ) {
+ $actionRow->setMetadata('url', $url);
+ $actionRow->maxVisitsSummed = $row[Piwik_Archive::INDEX_PAGE_NB_HITS];
+ }
+ } else {
+ $actionRow->setMetadata('url', $url);
+ $actionRow->maxVisitsSummed = !empty($row[Piwik_Archive::INDEX_PAGE_NB_HITS]) ? $row[Piwik_Archive::INDEX_PAGE_NB_HITS] : 0;
+ }
+ }
+
+ if ($row['type'] != Piwik_Tracker_Action::TYPE_ACTION_URL
+ && $row['type'] != Piwik_Tracker_Action::TYPE_ACTION_NAME
+ ) {
+ // only keep performance metrics when they're used (i.e. for URLs and page titles)
+ if (array_key_exists(Piwik_Archive::INDEX_PAGE_SUM_TIME_GENERATION, $row)) {
+ unset($row[Piwik_Archive::INDEX_PAGE_SUM_TIME_GENERATION]);
+ }
+ if (array_key_exists(Piwik_Archive::INDEX_PAGE_NB_HITS_WITH_TIME_GENERATION, $row)) {
+ unset($row[Piwik_Archive::INDEX_PAGE_NB_HITS_WITH_TIME_GENERATION]);
+ }
+ }
+
+ unset($row['name']);
+ unset($row['type']);
+ unset($row['idaction']);
+ unset($row['url_prefix']);
+
+ foreach ($row as $name => $value) {
+ // in some edge cases, we have twice the same action name with 2 different idaction
+ // - this happens when 2 visitors visit the same new page at the same time, and 2 actions get recorded for the same name
+ // - this could also happen when 2 URLs end up having the same label (eg. 2 subdomains get aggregated to the "/index" page name)
+ if (($alreadyValue = $actionRow->getColumn($name)) !== false) {
+ $actionRow->setColumn($name, $alreadyValue + $value);
+ } else {
+ $actionRow->addColumn($name, $value);
+ }
+ }
+
+ // if the exit_action was not recorded properly in the log_link_visit_action
+ // there would be an error message when getting the nb_hits column
+ // we must fake the record and add the columns
+ if ($actionRow->getColumn(Piwik_Archive::INDEX_PAGE_NB_HITS) === false) {
+ // to test this code: delete the entries in log_link_action_visit for
+ // a given exit_idaction_url
+ foreach (self::getDefaultRow()->getColumns() as $name => $value) {
+ $actionRow->addColumn($name, $value);
+ }
+ }
+ $rowsProcessed++;
+ }
+
+ // just to make sure php copies the last $actionRow in the $parentTable array
+ $actionRow =& $actionsTablesByType;
+ return $rowsProcessed;
+ }
+
+ static public $maximumRowsInDataTableLevelZero;
+ static public $maximumRowsInSubDataTable;
+ static public $columnToSortByBeforeTruncation;
+
+ static protected $actionUrlCategoryDelimiter = null;
+ static protected $actionTitleCategoryDelimiter = null;
+ static protected $defaultActionName = null;
+ static protected $defaultActionNameWhenNotDefined = null;
+ static protected $defaultActionUrlWhenNotDefined = null;
+
+ static public function reloadConfig()
+ {
+ // for BC, we read the old style delimiter first (see #1067)Row
+ $actionDelimiter = @Piwik_Config::getInstance()->General['action_category_delimiter'];
+ if (empty($actionDelimiter)) {
+ self::$actionUrlCategoryDelimiter = Piwik_Config::getInstance()->General['action_url_category_delimiter'];
+ self::$actionTitleCategoryDelimiter = Piwik_Config::getInstance()->General['action_title_category_delimiter'];
+ } else {
+ self::$actionUrlCategoryDelimiter = self::$actionTitleCategoryDelimiter = $actionDelimiter;
+ }
+
+ self::$defaultActionName = Piwik_Config::getInstance()->General['action_default_name'];
+ self::$columnToSortByBeforeTruncation = Piwik_Archive::INDEX_NB_VISITS;
+ self::$maximumRowsInDataTableLevelZero = Piwik_Config::getInstance()->General['datatable_archiving_maximum_rows_actions'];
+ self::$maximumRowsInSubDataTable = Piwik_Config::getInstance()->General['datatable_archiving_maximum_rows_subtable_actions'];
+
+ Piwik_DataTable::setMaximumDepthLevelAllowedAtLeast(self::getSubCategoryLevelLimit() + 1);
+ }
+
+
+ /**
+ * The default row is used when archiving, if data is inconsistent in the DB,
+ * there could be pages that have exit/entry hits, but don't yet
+ * have a record in the table (or the record was truncated).
+ *
+ * @return Piwik_DataTable_Row
+ */
+ static private function getDefaultRow()
+ {
+ static $row = false;
+ if ($row === false) {
+ // This row is used in the case where an action is know as an exit_action
+ // but this action was not properly recorded when it was hit in the first place
+ // so we add this fake row information to make sure there is a nb_hits, etc. column for every action
+ $row = new Piwik_DataTable_Row(array(
+ Piwik_DataTable_Row::COLUMNS => array(
+ Piwik_Archive::INDEX_NB_VISITS => 1,
+ Piwik_Archive::INDEX_NB_UNIQ_VISITORS => 1,
+ Piwik_Archive::INDEX_PAGE_NB_HITS => 1,
+ )));
+ }
+ return $row;
+ }
+
+ /**
+ * Given a page name and type, builds a recursive datatable where
+ * each level of the tree is a category, based on the page name split by a delimiter (slash / by default)
+ *
+ * @param string $actionName
+ * @param int $actionType
+ * @param int $urlPrefix
+ * @param array $actionsTablesByType
+ * @return Piwik_DataTable
+ */
+ protected static function getActionRow($actionName, $actionType, $urlPrefix = null, &$actionsTablesByType)
+ {
+ // we work on the root table of the given TYPE (either ACTION_URL or DOWNLOAD or OUTLINK etc.)
+ /* @var Piwik_DataTable $currentTable */
+ $currentTable =& $actionsTablesByType[$actionType];
+
+ // check for ranking query cut-off
+ if ($actionName == Piwik_DataTable::LABEL_SUMMARY_ROW) {
+ $summaryRow = $currentTable->getRowFromId(Piwik_DataTable::ID_SUMMARY_ROW);
+ if ($summaryRow === false) {
+ $summaryRow = $currentTable->addSummaryRow(self::createSummaryRow());
+ }
+ return $summaryRow;
+ }
+
+ // go to the level of the subcategory
+ $actionExplodedNames = self::getActionExplodedNames($actionName, $actionType, $urlPrefix);
+ list($row, $level) = $currentTable->walkPath(
+ $actionExplodedNames, self::getDefaultRowColumns(), self::$maximumRowsInSubDataTable);
+
+ return $row;
+ }
+
+ /**
+ * Explodes action name into an array of elements.
+ *
+ * NOTE: before calling this function make sure Piwik_Actions_ArchivingHelper::reloadConfig(); is called
+ *
+ * for downloads:
+ * we explode link http://piwik.org/some/path/piwik.zip into an array( 'piwik.org', '/some/path/piwik.zip' );
+ *
+ * for outlinks:
+ * we explode link http://dev.piwik.org/some/path into an array( 'dev.piwik.org', '/some/path' );
+ *
+ * for action urls:
+ * we explode link http://piwik.org/some/path into an array( 'some', 'path' );
+ *
+ * for action names:
+ * we explode name 'Piwik / Category 1 / Category 2' into an array('Piwik', 'Category 1', 'Category 2');
+ *
+ * @param string action name
+ * @param int action type
+ * @param int url prefix (only used for TYPE_ACTION_URL)
+ * @return array of exploded elements from $name
+ */
+ static public function getActionExplodedNames($name, $type, $urlPrefix = null)
+ {
+ // Site Search does not split Search keywords
+ if ($type == Piwik_Tracker_Action::TYPE_SITE_SEARCH) {
+ return array($name);
+ }
+
+ $matches = array();
+ $isUrl = false;
+ $name = str_replace("\n", "", $name);
+
+ $urlRegexAfterDomain = '([^/]+)[/]?([^#]*)[#]?(.*)';
+ if ($urlPrefix === null) {
+ // match url with protocol (used for outlinks / downloads)
+ $urlRegex = '@^http[s]?://' . $urlRegexAfterDomain . '$@i';
+ } else {
+ // the name is a url that does not contain protocol and www anymore
+ // we know that normalization has been done on db level because $urlPrefix is set
+ $urlRegex = '@^' . $urlRegexAfterDomain . '$@i';
+ }
+
+ preg_match($urlRegex, $name, $matches);
+ if (count($matches)) {
+ $isUrl = true;
+ $urlHost = $matches[1];
+ $urlPath = $matches[2];
+ $urlFragment = $matches[3];
+ }
+
+ if ($type == Piwik_Tracker_Action::TYPE_DOWNLOAD
+ || $type == Piwik_Tracker_Action::TYPE_OUTLINK
+ ) {
+ if ($isUrl) {
+ return array(trim($urlHost), '/' . trim($urlPath));
+ }
+ }
+
+ if ($isUrl) {
+ $name = $urlPath;
+
+ if ($name === '' || substr($name, -1) == '/') {
+ $name .= self::$defaultActionName;
+ }
+ }
+
+ if ($type == Piwik_Tracker_Action::TYPE_ACTION_NAME) {
+ $categoryDelimiter = self::$actionTitleCategoryDelimiter;
+ } else {
+ $categoryDelimiter = self::$actionUrlCategoryDelimiter;
+ }
+
+
+ if ($isUrl) {
+ $urlFragment = Piwik_Tracker_Action::processUrlFragment($urlFragment);
+ if (!empty($urlFragment)) {
+ $name .= '#' . $urlFragment;
+ }
+ }
+
+ if (empty($categoryDelimiter)) {
+ return array(trim($name));
+ }
+
+ $split = explode($categoryDelimiter, $name, self::getSubCategoryLevelLimit());
+
+ // trim every category and remove empty categories
+ $split = array_map('trim', $split);
+ $split = array_filter($split, 'strlen');
+
+ // forces array key to start at 0
+ $split = array_values($split);
+
+ if (empty($split)) {
+ $defaultName = self::getUnknownActionName($type);
+ return array(trim($defaultName));
+ }
+
+ $lastPageName = end($split);
+ // we are careful to prefix the page URL / name with some value
+ // so that if a page has the same name as a category
+ // we don't merge both entries
+ if ($type != Piwik_Tracker_Action::TYPE_ACTION_NAME) {
+ $lastPageName = '/' . $lastPageName;
+ } else {
+ $lastPageName = ' ' . $lastPageName;
+ }
+ $split[count($split) - 1] = $lastPageName;
+ return array_values($split);
+ }
+
+ /**
+ * Gets the key for the cache of action rows from an action ID and type.
+ *
+ * @param int $idAction
+ * @param int $actionType
+ * @return string|int
+ */
+ private static function getCachedActionRowKey($idAction, $actionType)
+ {
+ return $idAction == Piwik_DataTable::LABEL_SUMMARY_ROW
+ ? $actionType . '_others'
+ : $idAction;
+ }
+
+ /**
+ * Returns the configured sub-category level limit.
+ *
+ * @return int
+ */
+ public static function getSubCategoryLevelLimit()
+ {
+ return Piwik_Config::getInstance()->General['action_category_level_limit'];
+ }
+
+ /**
+ * Returns default label for the action type
+ *
+ * @param $type
+ * @return string
+ */
+ static public function getUnknownActionName($type)
+ {
+ if (empty(self::$defaultActionNameWhenNotDefined)) {
+ self::$defaultActionNameWhenNotDefined = Piwik_Translate('General_NotDefined', Piwik_Translate('Actions_ColumnPageName'));
+ self::$defaultActionUrlWhenNotDefined = Piwik_Translate('General_NotDefined', Piwik_Translate('Actions_ColumnPageURL'));
+ }
+ if ($type == Piwik_Tracker_Action::TYPE_ACTION_NAME) {
+ return self::$defaultActionNameWhenNotDefined;
+ }
+ return self::$defaultActionUrlWhenNotDefined;
+ }
+
+ /**
+ * Static cache to store Rows during processing
+ */
+ static protected $cacheParsedAction = array();
+
+ public static function clearActionsCache()
+ {
+ self::$cacheParsedAction = array();
+ }
+
+ /**
+ * Get cached action row by id & type. If $idAction is set to -1, the 'Others' row
+ * for the specific action type will be returned.
+ *
+ * @param int $idAction
+ * @param int $actionType
+ * @return Piwik_DataTable_Row|false
+ */
+ private static function getCachedActionRow($idAction, $actionType)
+ {
+ $cacheLabel = self::getCachedActionRowKey($idAction, $actionType);
+
+ if (!isset(self::$cacheParsedAction[$cacheLabel])) {
+ // This can happen when
+ // - We select an entry page ID that was only seen yesterday, so wasn't selected in the first query
+ // - We count time spent on a page, when this page was only seen yesterday
+ return false;
+ }
+
+ return self::$cacheParsedAction[$cacheLabel];
+ }
+
+ /**
+ * Set cached action row for an id & type.
+ *
+ * @param int $idAction
+ * @param int $actionType
+ * @param Piwik_DataTable_Row
+ */
+ private static function setCachedActionRow($idAction, $actionType, $actionRow)
+ {
+ $cacheLabel = self::getCachedActionRowKey($idAction, $actionType);
+ self::$cacheParsedAction[$cacheLabel] = $actionRow;
+ }
+
+ /**
+ * Returns the default columns for a row in an Actions DataTable.
+ *
+ * @return array
+ */
+ private static function getDefaultRowColumns()
+ {
+ return array(Piwik_Archive::INDEX_NB_VISITS => 0,
+ Piwik_Archive::INDEX_NB_UNIQ_VISITORS => 0,
+ Piwik_Archive::INDEX_PAGE_NB_HITS => 0,
+ Piwik_Archive::INDEX_PAGE_SUM_TIME_SPENT => 0);
+ }
+
+ /**
+ * Creates a summary row for an Actions DataTable.
+ *
+ * @return Piwik_DataTable_Row
+ */
+ private static function createSummaryRow()
+ {
+ return new Piwik_DataTable_Row(array(
+ Piwik_DataTable_Row::COLUMNS =>
+ array('label' => Piwik_DataTable::LABEL_SUMMARY_ROW) + self::getDefaultRowColumns()
+ ));
+ }
}