fetch()) {
if (empty($row['idaction'])) {
$row['type'] = ($fieldQueried == 'idaction_url' ? Action::TYPE_PAGE_URL : Action::TYPE_PAGE_TITLE);
// This will be replaced with 'X not defined' later
$row['name'] = '';
// Yes, this is kind of a hack, so we don't mix 'page url not defined' with 'page title not defined' etc.
$row['idaction'] = -$row['type'];
}
if ($row['type'] != Action::TYPE_SITE_SEARCH) {
unset($row[Metrics::INDEX_SITE_SEARCH_HAS_NO_RESULT]);
}
if ($row['type'] == Action::TYPE_CONTENT) {
continue;
}
// This will appear as in the API, which is actually very important to keep
// eg. When there's at least one row in a report that does not have a URL, not having this would break HTML/PDF reports.
$url = '';
if ($row['type'] == Action::TYPE_SITE_SEARCH
|| $row['type'] == Action::TYPE_PAGE_TITLE
) {
$url = null;
} elseif (!empty($row['name'])
&& $row['name'] != DataTable::LABEL_SUMMARY_ROW) {
$url = PageUrl::reconstructNormalizedUrl((string)$row['name'], $row['url_prefix']);
}
if (isset($row['name'])
&& isset($row['type'])
) {
$actionName = $row['name'];
$actionType = $row['type'];
$urlPrefix = $row['url_prefix'];
$idaction = $row['idaction'];
// in some unknown case, the type field is NULL, as reported in #1082 - we ignore this page view
if (empty($actionType)) {
if ($idaction != DataTable::LABEL_SUMMARY_ROW) {
self::setCachedActionRow($idaction, $actionType, false);
}
continue;
}
$actionRow = self::getActionRow($actionName, $actionType, $urlPrefix, $actionsTablesByType);
self::setCachedActionRow($idaction, $actionType, $actionRow);
} else {
$actionRow = self::getCachedActionRow($row['idaction'], $row['type']);
// Action processed as "to skip" for some reasons
if ($actionRow === false) {
continue;
}
}
if (is_null($actionRow)) {
continue;
}
// Here we do ensure that, the Metadata URL set for a given row, is the one from the Pageview with the most hits.
// This is to ensure that when, different URLs are loaded with the same page name.
// For example http://piwik.org and http://id.piwik.org are reported in Piwik > Actions > Pages with /index
// But, we must make sure http://piwik.org is used to link & for transitions
// Note: this code is partly duplicated from Row->sumRowMetadata()
if (!is_null($url)
&& !$actionRow->isSummaryRow()
) {
if (($existingUrl = $actionRow->getMetadata('url')) !== false) {
if (!empty($row[Metrics::INDEX_PAGE_NB_HITS])
&& $row[Metrics::INDEX_PAGE_NB_HITS] > $actionRow->maxVisitsSummed
) {
$actionRow->setMetadata('url', $url);
$actionRow->maxVisitsSummed = $row[Metrics::INDEX_PAGE_NB_HITS];
}
} else {
$actionRow->setMetadata('url', $url);
$actionRow->maxVisitsSummed = !empty($row[Metrics::INDEX_PAGE_NB_HITS]) ? $row[Metrics::INDEX_PAGE_NB_HITS] : 0;
}
}
if ($row['type'] != Action::TYPE_PAGE_URL
&& $row['type'] != Action::TYPE_PAGE_TITLE
) {
// only keep performance metrics when they're used (i.e. for URLs and page titles)
if (array_key_exists(Metrics::INDEX_PAGE_SUM_TIME_GENERATION, $row)) {
unset($row[Metrics::INDEX_PAGE_SUM_TIME_GENERATION]);
}
if (array_key_exists(Metrics::INDEX_PAGE_NB_HITS_WITH_TIME_GENERATION, $row)) {
unset($row[Metrics::INDEX_PAGE_NB_HITS_WITH_TIME_GENERATION]);
}
if (array_key_exists(Metrics::INDEX_PAGE_MIN_TIME_GENERATION, $row)) {
unset($row[Metrics::INDEX_PAGE_MIN_TIME_GENERATION]);
}
if (array_key_exists(Metrics::INDEX_PAGE_MAX_TIME_GENERATION, $row)) {
unset($row[Metrics::INDEX_PAGE_MAX_TIME_GENERATION]);
}
}
unset($row['name']);
unset($row['type']);
unset($row['idaction']);
unset($row['url_prefix']);
foreach ($row as $name => $value) {
// in some edge cases, we have twice the same action name with 2 different idaction
// - this happens when 2 visitors visit the same new page at the same time, and 2 actions get recorded for the same name
// - this could also happen when 2 URLs end up having the same label (eg. 2 subdomains get aggregated to the "/index" page name)
if (($alreadyValue = $actionRow->getColumn($name)) !== false) {
$newValue = self::getColumnValuesMerged($name, $alreadyValue, $value);
$actionRow->setColumn($name, $newValue);
} else {
$actionRow->addColumn($name, $value);
}
}
// if the exit_action was not recorded properly in the log_link_visit_action
// there would be an error message when getting the nb_hits column
// we must fake the record and add the columns
if ($actionRow->getColumn(Metrics::INDEX_PAGE_NB_HITS) === false) {
// to test this code: delete the entries in log_link_action_visit for
// a given exit_idaction_url
foreach (self::getDefaultRow()->getColumns() as $name => $value) {
$actionRow->addColumn($name, $value);
}
}
$rowsProcessed++;
}
// just to make sure php copies the last $actionRow in the $parentTable array
$actionRow =& $actionsTablesByType;
return $rowsProcessed;
}
public static function removeEmptyColumns($dataTable)
{
// Delete all columns that have a value of zero
$dataTable->filter('ColumnDelete', array(
$columnsToRemove = array(Metrics::INDEX_PAGE_IS_FOLLOWING_SITE_SEARCH_NB_HITS),
$columnsToKeep = array(),
$deleteIfZeroOnly = true
));
}
/**
* For rows which have subtables (eg. directories with sub pages),
* deletes columns which don't make sense when all values of sub pages are summed.
*
* @param $dataTable DataTable
*/
public static function deleteInvalidSummedColumnsFromDataTable($dataTable)
{
foreach ($dataTable->getRows() as $id => $row) {
if (($idSubtable = $row->getIdSubDataTable()) !== null
|| $id === DataTable::ID_SUMMARY_ROW
) {
if ($idSubtable !== null) {
$subtable = Manager::getInstance()->getTable($idSubtable);
self::deleteInvalidSummedColumnsFromDataTable($subtable);
}
if ($row instanceof DataTableSummaryRow) {
$row->recalculate();
}
foreach (Archiver::$columnsToDeleteAfterAggregation as $name) {
$row->deleteColumn($name);
}
}
}
// And this as well
ArchivingHelper::removeEmptyColumns($dataTable);
}
/**
* Returns the limit to use with RankingQuery for this plugin.
*
* @return int
*/
public static function getRankingQueryLimit()
{
$configGeneral = Config::getInstance()->General;
$configLimit = $configGeneral['archiving_ranking_query_row_limit'];
$limit = $configLimit == 0 ? 0 : max(
$configLimit,
$configGeneral['datatable_archiving_maximum_rows_actions'],
$configGeneral['datatable_archiving_maximum_rows_subtable_actions']
);
// FIXME: This is a quick fix for #3482. The actual cause of the bug is that
// the site search & performance metrics additions to
// ArchivingHelper::updateActionsTableWithRowQuery expect every
// row to have 'type' data, but not all of the SQL queries that are run w/o
// ranking query join on the log_action table and thus do not select the
// log_action.type column.
//
// NOTES: Archiving logic can be generalized as follows:
// 0) Do SQL query over log_link_visit_action & join on log_action to select
// some metrics (like visits, hits, etc.)
// 1) For each row, cache the action row & metrics. (This is done by
// updateActionsTableWithRowQuery for result set rows that have
// name & type columns.)
// 2) Do other SQL queries for metrics we can't put in the first query (like
// entry visits, exit vists, etc.) w/o joining log_action.
// 3) For each row, find the cached row by idaction & add the new metrics to
// it. (This is done by updateActionsTableWithRowQuery for result set rows
// that DO NOT have name & type columns.)
//
// The site search & performance metrics additions expect a 'type' all the time
// which breaks the original pre-rankingquery logic. Ranking query requires a
// join, so the bug is only seen when ranking query is disabled.
if ($limit === 0) {
$limit = 100000;
}
return $limit;
}
/**
* @param $columnName
* @param $alreadyValue
* @param $value
* @return mixed
*/
private static function getColumnValuesMerged($columnName, $alreadyValue, $value)
{
if ($columnName == Metrics::INDEX_PAGE_MIN_TIME_GENERATION) {
if (empty($alreadyValue)) {
$newValue = $value;
} else if (empty($value)) {
$newValue = $alreadyValue;
} else {
$newValue = min($alreadyValue, $value);
}
return $newValue;
}
if ($columnName == Metrics::INDEX_PAGE_MAX_TIME_GENERATION) {
$newValue = max($alreadyValue, $value);
return $newValue;
}
$newValue = $alreadyValue + $value;
return $newValue;
}
public static $maximumRowsInDataTableLevelZero;
public static $maximumRowsInSubDataTable;
public static $columnToSortByBeforeTruncation;
protected static $actionUrlCategoryDelimiter = null;
protected static $actionTitleCategoryDelimiter = null;
protected static $defaultActionName = null;
protected static $defaultActionNameWhenNotDefined = null;
protected static $defaultActionUrlWhenNotDefined = null;
public static function reloadConfig()
{
// for BC, we read the old style delimiter first (see #1067)Row
$actionDelimiter = @Config::getInstance()->General['action_category_delimiter'];
if (empty($actionDelimiter)) {
self::$actionUrlCategoryDelimiter = Config::getInstance()->General['action_url_category_delimiter'];
self::$actionTitleCategoryDelimiter = Config::getInstance()->General['action_title_category_delimiter'];
} else {
self::$actionUrlCategoryDelimiter = self::$actionTitleCategoryDelimiter = $actionDelimiter;
}
self::$defaultActionName = Config::getInstance()->General['action_default_name'];
self::$columnToSortByBeforeTruncation = Metrics::INDEX_NB_VISITS;
self::$maximumRowsInDataTableLevelZero = Config::getInstance()->General['datatable_archiving_maximum_rows_actions'];
self::$maximumRowsInSubDataTable = Config::getInstance()->General['datatable_archiving_maximum_rows_subtable_actions'];
DataTable::setMaximumDepthLevelAllowedAtLeast(self::getSubCategoryLevelLimit() + 1);
}
/**
* The default row is used when archiving, if data is inconsistent in the DB,
* there could be pages that have exit/entry hits, but don't yet
* have a record in the table (or the record was truncated).
*
* @return Row
*/
private static function getDefaultRow()
{
static $row = false;
if ($row === false) {
// This row is used in the case where an action is know as an exit_action
// but this action was not properly recorded when it was hit in the first place
// so we add this fake row information to make sure there is a nb_hits, etc. column for every action
$row = new Row(array(
Row::COLUMNS => array(
Metrics::INDEX_NB_VISITS => 1,
Metrics::INDEX_NB_UNIQ_VISITORS => 1,
Metrics::INDEX_PAGE_NB_HITS => 1,
)));
}
return $row;
}
/**
* Given a page name and type, builds a recursive datatable where
* each level of the tree is a category, based on the page name split by a delimiter (slash / by default)
*
* @param string $actionName
* @param int $actionType
* @param int $urlPrefix
* @param array $actionsTablesByType
* @return DataTable
*/
private static function getActionRow($actionName, $actionType, $urlPrefix = null, &$actionsTablesByType)
{
// we work on the root table of the given TYPE (either ACTION_URL or DOWNLOAD or OUTLINK etc.)
/* @var DataTable $currentTable */
$currentTable =& $actionsTablesByType[$actionType];
if(is_null($currentTable)) {
throw new \Exception("Action table for type '$actionType' was not found during Actions archiving.");
}
// check for ranking query cut-off
if ($actionName == DataTable::LABEL_SUMMARY_ROW) {
$summaryRow = $currentTable->getRowFromId(DataTable::ID_SUMMARY_ROW);
if ($summaryRow === false) {
$summaryRow = $currentTable->addSummaryRow(self::createSummaryRow());
}
return $summaryRow;
}
// go to the level of the subcategory
$actionExplodedNames = self::getActionExplodedNames($actionName, $actionType, $urlPrefix);
list($row, $level) = $currentTable->walkPath(
$actionExplodedNames, self::getDefaultRowColumns(), self::$maximumRowsInSubDataTable);
return $row;
}
/**
* Returns the configured sub-category level limit.
*
* @return int
*/
public static function getSubCategoryLevelLimit()
{
return Config::getInstance()->General['action_category_level_limit'];
}
/**
* Returns default label for the action type
*
* @param $type
* @return string
*/
public static function getUnknownActionName($type)
{
if (empty(self::$defaultActionNameWhenNotDefined)) {
self::$defaultActionNameWhenNotDefined = Piwik::translate('General_NotDefined', Piwik::translate('Actions_ColumnPageName'));
self::$defaultActionUrlWhenNotDefined = Piwik::translate('General_NotDefined', Piwik::translate('Actions_ColumnPageURL'));
}
if ($type == Action::TYPE_PAGE_TITLE) {
return self::$defaultActionNameWhenNotDefined;
}
return self::$defaultActionUrlWhenNotDefined;
}
/**
* Explodes action name into an array of elements.
*
* NOTE: before calling this function make sure ArchivingHelper::reloadConfig(); is called
*
* for downloads:
* we explode link http://piwik.org/some/path/piwik.zip into an array( 'piwik.org', '/some/path/piwik.zip' );
*
* for outlinks:
* we explode link http://dev.piwik.org/some/path into an array( 'dev.piwik.org', '/some/path' );
*
* for action urls:
* we explode link http://piwik.org/some/path into an array( 'some', 'path' );
*
* for action names:
* we explode name 'Piwik / Category 1 / Category 2' into an array('Piwik', 'Category 1', 'Category 2');
*
* @param string $name action name
* @param int $type action type
* @param int $urlPrefix url prefix (only used for TYPE_PAGE_URL)
* @return array of exploded elements from $name
*/
public static function getActionExplodedNames($name, $type, $urlPrefix = null)
{
// Site Search does not split Search keywords
if ($type == Action::TYPE_SITE_SEARCH) {
return array($name);
}
$name = str_replace("\n", "", $name);
$name = self::parseNameFromPageUrl($name, $type, $urlPrefix);
// outlinks and downloads
if(is_array($name)) {
return $name;
}
$split = self::splitNameByDelimiter($name, $type);
if (empty($split)) {
$defaultName = self::getUnknownActionName($type);
return array(trim($defaultName));
}
$lastPageName = end($split);
// we are careful to prefix the page URL / name with some value
// so that if a page has the same name as a category
// we don't merge both entries
if ($type != Action::TYPE_PAGE_TITLE) {
$lastPageName = '/' . $lastPageName;
} else {
$lastPageName = ' ' . $lastPageName;
}
$split[count($split) - 1] = $lastPageName;
return array_values($split);
}
/**
* Gets the key for the cache of action rows from an action ID and type.
*
* @param int $idAction
* @param int $actionType
* @return string|int
*/
private static function getCachedActionRowKey($idAction, $actionType)
{
return $idAction == DataTable::LABEL_SUMMARY_ROW
? $actionType . '_others'
: $idAction;
}
/**
* Static cache to store Rows during processing
*/
protected static $cacheParsedAction = array();
public static function clearActionsCache()
{
self::$cacheParsedAction = array();
}
/**
* Get cached action row by id & type. If $idAction is set to -1, the 'Others' row
* for the specific action type will be returned.
*
* @param int $idAction
* @param int $actionType
* @return Row|false
*/
private static function getCachedActionRow($idAction, $actionType)
{
$cacheLabel = self::getCachedActionRowKey($idAction, $actionType);
if (!isset(self::$cacheParsedAction[$cacheLabel])) {
// This can happen when
// - We select an entry page ID that was only seen yesterday, so wasn't selected in the first query
// - We count time spent on a page, when this page was only seen yesterday
return false;
}
return self::$cacheParsedAction[$cacheLabel];
}
/**
* Set cached action row for an id & type.
*
* @param int $idAction
* @param int $actionType
* @param \DataTable\Row
*/
private static function setCachedActionRow($idAction, $actionType, $actionRow)
{
$cacheLabel = self::getCachedActionRowKey($idAction, $actionType);
self::$cacheParsedAction[$cacheLabel] = $actionRow;
}
/**
* Returns the default columns for a row in an Actions DataTable.
*
* @return array
*/
private static function getDefaultRowColumns()
{
return array(Metrics::INDEX_NB_VISITS => 0,
Metrics::INDEX_NB_UNIQ_VISITORS => 0,
Metrics::INDEX_PAGE_NB_HITS => 0,
Metrics::INDEX_PAGE_SUM_TIME_SPENT => 0);
}
/**
* Creates a summary row for an Actions DataTable.
*
* @return Row
*/
private static function createSummaryRow()
{
return new Row(array(
Row::COLUMNS =>
array('label' => DataTable::LABEL_SUMMARY_ROW) + self::getDefaultRowColumns()
));
}
private static function splitNameByDelimiter($name, $type)
{
if(is_array($name)) {
return $name;
}
if ($type == Action::TYPE_PAGE_TITLE) {
$categoryDelimiter = self::$actionTitleCategoryDelimiter;
} else {
$categoryDelimiter = self::$actionUrlCategoryDelimiter;
}
if (empty($categoryDelimiter)) {
return array(trim($name));
}
$split = explode($categoryDelimiter, $name, self::getSubCategoryLevelLimit());
// trim every category and remove empty categories
$split = array_map('trim', $split);
$split = array_filter($split, 'strlen');
// forces array key to start at 0
$split = array_values($split);
return $split;
}
private static function parseNameFromPageUrl($name, $type, $urlPrefix)
{
$urlRegexAfterDomain = '([^/]+)[/]?([^#]*)[#]?(.*)';
if ($urlPrefix === null) {
// match url with protocol (used for outlinks / downloads)
$urlRegex = '@^http[s]?://' . $urlRegexAfterDomain . '$@i';
} else {
// the name is a url that does not contain protocol and www anymore
// we know that normalization has been done on db level because $urlPrefix is set
$urlRegex = '@^' . $urlRegexAfterDomain . '$@i';
}
$matches = array();
preg_match($urlRegex, $name, $matches);
if (!count($matches)) {
return $name;
}
$urlHost = $matches[1];
$urlPath = $matches[2];
$urlFragment = $matches[3];
if (in_array($type, array(Action::TYPE_DOWNLOAD, Action::TYPE_OUTLINK))) {
return array(trim($urlHost), '/' . trim($urlPath));
}
$name = $urlPath;
if ($name === '' || substr($name, -1) == '/') {
$name .= self::$defaultActionName;
}
$urlFragment = PageUrl::processUrlFragment($urlFragment);
if (!empty($urlFragment)) {
$name .= '#' . $urlFragment;
}
return $name;
}
}