diff options
author | BeezyT <timo@ezdesign.de> | 2012-08-16 17:59:58 +0400 |
---|---|---|
committer | BeezyT <timo@ezdesign.de> | 2012-08-16 17:59:58 +0400 |
commit | 9c53e8d56240c8ad7dc953664b216cd117b73ead (patch) | |
tree | 186ad2cfd843d08d0293d2774e4749809a76c6a1 /plugins | |
parent | 54f0489e51c1ec149f2f02880560711f85b0233a (diff) |
refs #2976 url normalization: store protocol and www in the url_prefix column of log_action. treat pages with different protocol or with/without www as the same action. includes a major db transformation and tests.
git-svn-id: http://dev.piwik.org/svn/trunk@6792 59fd770c-687e-43c8-a1e3-f5a4ff64c105
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/Actions/Actions.php | 40 | ||||
-rw-r--r-- | plugins/Actions/tests/Actions.test.php | 28 | ||||
-rw-r--r-- | plugins/Live/API.php | 7 |
3 files changed, 60 insertions, 15 deletions
diff --git a/plugins/Actions/Actions.php b/plugins/Actions/Actions.php index a6e7b50806..9672070d6e 100644 --- a/plugins/Actions/Actions.php +++ b/plugins/Actions/Actions.php @@ -134,6 +134,12 @@ class Piwik_Actions extends Piwik_Plugin ? Piwik_Tracker_Action::TYPE_ACTION_URL : Piwik_Tracker_Action::TYPE_ACTION_NAME; + if ($actionType == Piwik_Tracker_Action::TYPE_ACTION_URL) + { + // for urls trim protocol and www because it is not recorded in the db + $string = preg_replace('@^http[s]?://(www\.)?@i', '', $string); + } + // exact matches work by returning the id directly if ($matchType == Piwik_SegmentExpression::MATCH_EQUAL || $matchType == Piwik_SegmentExpression::MATCH_NOT_EQUAL) @@ -514,6 +520,7 @@ class Piwik_Actions extends Piwik_Plugin $select = "log_action.name, log_action.type, log_action.idaction, + log_action.url_prefix, count(distinct log_link_visit_action.idvisit) as `". Piwik_Archive::INDEX_NB_VISITS ."`, count(distinct log_link_visit_action.idvisitor) as `". Piwik_Archive::INDEX_NB_UNIQ_VISITORS ."`, count(*) as `". Piwik_Archive::INDEX_PAGE_NB_HITS ."`"; @@ -721,15 +728,29 @@ class Piwik_Actions extends Piwik_Plugin * * @param string action name * @param int action type + * @param int url prefix (only used for TYPE_ACTION_URL) * @return array of exploded elements from $name */ - static public function getActionExplodedNames($name, $type) + static public function getActionExplodedNames($name, $type, $urlPrefix=null) { $matches = array(); $isUrl = false; $name = str_replace("\n", "", $name); - preg_match('@^http[s]?://([^/]+)[/]?([^#]*)[#]?(.*)$@i', $name, $matches); - + + $urlRegexAfterDomain = '([^/]+)[/]?([^#]*)[#]?(.*)'; + if ($urlPrefix === null) + { + // match url with protocol (used for outlinks / downloads) + $urlRegex = '@^http[s]?://'.$urlRegexAfterDomain.'$@i'; + } + else + { + // the name is a url that does not contain protocol and www anymore + // we know that normalization has been done on db level because $urlPrefix is set + $urlRegex = '@^'.$urlRegexAfterDomain.'$@i'; + } + + preg_match($urlRegex, $name, $matches); if( count($matches) ) { $isUrl = true; @@ -843,6 +864,8 @@ class Piwik_Actions extends Piwik_Plugin { $actionName = $row['name']; $actionType = $row['type']; + $urlPrefix = $row['url_prefix']; + // in some unknown case, the type field is NULL, as reported in #1082 - we ignore this page view if(empty($actionType)) { @@ -850,7 +873,7 @@ class Piwik_Actions extends Piwik_Plugin continue; } - $currentTable = $this->parseActionNameCategoriesInDataTable($actionName, $actionType); + $currentTable = $this->parseActionNameCategoriesInDataTable($actionName, $actionType, $urlPrefix); self::$cacheParsedAction[$row['idaction']] = $currentTable; } @@ -874,6 +897,7 @@ class Piwik_Actions extends Piwik_Plugin unset($row['name']); unset($row['type']); unset($row['idaction']); + unset($row['url_prefix']); foreach($row as $name => $value) { // in some edge cases, we have twice the same action name with 2 different idaction @@ -916,15 +940,16 @@ class Piwik_Actions extends Piwik_Plugin * * @param string $actionName * @param int $actionType + * @param int $urlPrefix * @return Piwik_DataTable */ - protected function parseActionNameCategoriesInDataTable($actionName, $actionType) + protected function parseActionNameCategoriesInDataTable($actionName, $actionType, $urlPrefix=null) { // we work on the root table of the given TYPE (either ACTION_URL or DOWNLOAD or OUTLINK etc.) $currentTable =& $this->actionsTablesByType[$actionType]; // go to the level of the subcategory - $actionExplodedNames = $this->getActionExplodedNames($actionName, $actionType); + $actionExplodedNames = $this->getActionExplodedNames($actionName, $actionType, $urlPrefix); $end = count($actionExplodedNames)-1; for($level = 0 ; $level < $end; $level++) { @@ -957,7 +982,8 @@ class Piwik_Actions extends Piwik_Plugin { $currentTable = new Piwik_DataTable_Row(array( Piwik_DataTable_Row::COLUMNS => $defaultColumnsNewRow, - Piwik_DataTable_Row::METADATA => array('url' => (string)$actionName), + Piwik_DataTable_Row::METADATA => array('url' => + Piwik_Tracker_Action::reconstructNormalizedUrl((string)$actionName, $urlPrefix)), )); } } diff --git a/plugins/Actions/tests/Actions.test.php b/plugins/Actions/tests/Actions.test.php index 8deefaadeb..43919f4a69 100644 --- a/plugins/Actions/tests/Actions.test.php +++ b/plugins/Actions/tests/Actions.test.php @@ -30,15 +30,31 @@ class Test_Piwik_Actions extends UnitTestCase $tests = array( array( - 'params' => array( 'name' => 'http://example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL), + 'params' => array( 'name' => 'http://example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => null ), 'expected' => array('/index' ), ), array( - 'params' => array( 'name' => 'http://example.org/path/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL), + 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 1 ), + 'expected' => array('/index' ), + ), + array( + 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 2 ), + 'expected' => array('/index' ), + ), + array( + 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 3 ), + 'expected' => array('/index' ), + ), + array( + 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 4 ), + 'expected' => array('/index' ), + ), + array( + 'params' => array( 'name' => 'example.org/path/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 4 ), 'expected' => array( 'path', '/index' ), ), array( - 'params' => array( 'name' => 'http://example.org/test/path', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL), + 'params' => array( 'name' => 'example.org/test/path', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 1 ), 'expected' => array( 'test', '/path' ), ), array( @@ -90,15 +106,15 @@ class Test_Piwik_Actions extends UnitTestCase foreach($tests as $test) { $params = $test['params']; $expected = $test['expected']; - $processed = $action->public_getActionExplodedNames($params['name'],$params['type']); + $processed = $action->public_getActionExplodedNames($params['name'],$params['type'],isset($params['urlPrefix'])?$params['urlPrefix']:null); $this->assertEqual($processed, $expected, "Processed: ".var_export($processed, true) . " | Expected: ". var_export($expected, true)); } } } class Test_Piwik_Actions_getActionExplodedNames extends Piwik_Actions { - public function public_getActionExplodedNames($name, $type) + public function public_getActionExplodedNames($name, $type, $urlPrefix) { - return self::getActionExplodedNames($name, $type); + return self::getActionExplodedNames($name, $type, $urlPrefix); } } diff --git a/plugins/Live/API.php b/plugins/Live/API.php index 800ae963aa..753b4d9aff 100644 --- a/plugins/Live/API.php +++ b/plugins/Live/API.php @@ -184,8 +184,9 @@ class Piwik_Live_API // eg. Downloads, Outlinks. For these, idaction_name is set to 0 $sql = " SELECT - log_action.type as type, + log_action.type AS type, log_action.name AS url, + log_action.url_prefix, log_action_title.name AS pageTitle, log_action.idaction AS pageIdAction, log_link_visit_action.idlink_va AS pageId, @@ -221,7 +222,9 @@ class Piwik_Live_API { $actionDetail['customVariables'] = $customVariablesPage; } - + // reconstruct url from prefix + $actionDetail['url'] = Piwik_Tracker_Action::reconstructNormalizedUrl($actionDetail['url'], $actionDetail['url_prefix']); + unset($actionDetail['url_prefix']); // set the time spent for this action (which is the timeSpentRef of the next action) if (isset($actionDetails[$actionIdx + 1])) { |