Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBeezyT <timo@ezdesign.de>2012-08-16 17:59:58 +0400
committerBeezyT <timo@ezdesign.de>2012-08-16 17:59:58 +0400
commit9c53e8d56240c8ad7dc953664b216cd117b73ead (patch)
tree186ad2cfd843d08d0293d2774e4749809a76c6a1 /plugins
parent54f0489e51c1ec149f2f02880560711f85b0233a (diff)
refs #2976 url normalization: store protocol and www in the url_prefix column of log_action. treat pages with different protocol or with/without www as the same action. includes a major db transformation and tests.
git-svn-id: http://dev.piwik.org/svn/trunk@6792 59fd770c-687e-43c8-a1e3-f5a4ff64c105
Diffstat (limited to 'plugins')
-rw-r--r--plugins/Actions/Actions.php40
-rw-r--r--plugins/Actions/tests/Actions.test.php28
-rw-r--r--plugins/Live/API.php7
3 files changed, 60 insertions, 15 deletions
diff --git a/plugins/Actions/Actions.php b/plugins/Actions/Actions.php
index a6e7b50806..9672070d6e 100644
--- a/plugins/Actions/Actions.php
+++ b/plugins/Actions/Actions.php
@@ -134,6 +134,12 @@ class Piwik_Actions extends Piwik_Plugin
? Piwik_Tracker_Action::TYPE_ACTION_URL
: Piwik_Tracker_Action::TYPE_ACTION_NAME;
+ if ($actionType == Piwik_Tracker_Action::TYPE_ACTION_URL)
+ {
+ // for urls trim protocol and www because it is not recorded in the db
+ $string = preg_replace('@^http[s]?://(www\.)?@i', '', $string);
+ }
+
// exact matches work by returning the id directly
if ($matchType == Piwik_SegmentExpression::MATCH_EQUAL
|| $matchType == Piwik_SegmentExpression::MATCH_NOT_EQUAL)
@@ -514,6 +520,7 @@ class Piwik_Actions extends Piwik_Plugin
$select = "log_action.name,
log_action.type,
log_action.idaction,
+ log_action.url_prefix,
count(distinct log_link_visit_action.idvisit) as `". Piwik_Archive::INDEX_NB_VISITS ."`,
count(distinct log_link_visit_action.idvisitor) as `". Piwik_Archive::INDEX_NB_UNIQ_VISITORS ."`,
count(*) as `". Piwik_Archive::INDEX_PAGE_NB_HITS ."`";
@@ -721,15 +728,29 @@ class Piwik_Actions extends Piwik_Plugin
*
* @param string action name
* @param int action type
+ * @param int url prefix (only used for TYPE_ACTION_URL)
* @return array of exploded elements from $name
*/
- static public function getActionExplodedNames($name, $type)
+ static public function getActionExplodedNames($name, $type, $urlPrefix=null)
{
$matches = array();
$isUrl = false;
$name = str_replace("\n", "", $name);
- preg_match('@^http[s]?://([^/]+)[/]?([^#]*)[#]?(.*)$@i', $name, $matches);
-
+
+ $urlRegexAfterDomain = '([^/]+)[/]?([^#]*)[#]?(.*)';
+ if ($urlPrefix === null)
+ {
+ // match url with protocol (used for outlinks / downloads)
+ $urlRegex = '@^http[s]?://'.$urlRegexAfterDomain.'$@i';
+ }
+ else
+ {
+ // the name is a url that does not contain protocol and www anymore
+ // we know that normalization has been done on db level because $urlPrefix is set
+ $urlRegex = '@^'.$urlRegexAfterDomain.'$@i';
+ }
+
+ preg_match($urlRegex, $name, $matches);
if( count($matches) )
{
$isUrl = true;
@@ -843,6 +864,8 @@ class Piwik_Actions extends Piwik_Plugin
{
$actionName = $row['name'];
$actionType = $row['type'];
+ $urlPrefix = $row['url_prefix'];
+
// in some unknown case, the type field is NULL, as reported in #1082 - we ignore this page view
if(empty($actionType))
{
@@ -850,7 +873,7 @@ class Piwik_Actions extends Piwik_Plugin
continue;
}
- $currentTable = $this->parseActionNameCategoriesInDataTable($actionName, $actionType);
+ $currentTable = $this->parseActionNameCategoriesInDataTable($actionName, $actionType, $urlPrefix);
self::$cacheParsedAction[$row['idaction']] = $currentTable;
}
@@ -874,6 +897,7 @@ class Piwik_Actions extends Piwik_Plugin
unset($row['name']);
unset($row['type']);
unset($row['idaction']);
+ unset($row['url_prefix']);
foreach($row as $name => $value)
{
// in some edge cases, we have twice the same action name with 2 different idaction
@@ -916,15 +940,16 @@ class Piwik_Actions extends Piwik_Plugin
*
* @param string $actionName
* @param int $actionType
+ * @param int $urlPrefix
* @return Piwik_DataTable
*/
- protected function parseActionNameCategoriesInDataTable($actionName, $actionType)
+ protected function parseActionNameCategoriesInDataTable($actionName, $actionType, $urlPrefix=null)
{
// we work on the root table of the given TYPE (either ACTION_URL or DOWNLOAD or OUTLINK etc.)
$currentTable =& $this->actionsTablesByType[$actionType];
// go to the level of the subcategory
- $actionExplodedNames = $this->getActionExplodedNames($actionName, $actionType);
+ $actionExplodedNames = $this->getActionExplodedNames($actionName, $actionType, $urlPrefix);
$end = count($actionExplodedNames)-1;
for($level = 0 ; $level < $end; $level++)
{
@@ -957,7 +982,8 @@ class Piwik_Actions extends Piwik_Plugin
{
$currentTable = new Piwik_DataTable_Row(array(
Piwik_DataTable_Row::COLUMNS => $defaultColumnsNewRow,
- Piwik_DataTable_Row::METADATA => array('url' => (string)$actionName),
+ Piwik_DataTable_Row::METADATA => array('url' =>
+ Piwik_Tracker_Action::reconstructNormalizedUrl((string)$actionName, $urlPrefix)),
));
}
}
diff --git a/plugins/Actions/tests/Actions.test.php b/plugins/Actions/tests/Actions.test.php
index 8deefaadeb..43919f4a69 100644
--- a/plugins/Actions/tests/Actions.test.php
+++ b/plugins/Actions/tests/Actions.test.php
@@ -30,15 +30,31 @@ class Test_Piwik_Actions extends UnitTestCase
$tests = array(
array(
- 'params' => array( 'name' => 'http://example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL),
+ 'params' => array( 'name' => 'http://example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => null ),
'expected' => array('/index' ),
),
array(
- 'params' => array( 'name' => 'http://example.org/path/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL),
+ 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 1 ),
+ 'expected' => array('/index' ),
+ ),
+ array(
+ 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 2 ),
+ 'expected' => array('/index' ),
+ ),
+ array(
+ 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 3 ),
+ 'expected' => array('/index' ),
+ ),
+ array(
+ 'params' => array( 'name' => 'example.org/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 4 ),
+ 'expected' => array('/index' ),
+ ),
+ array(
+ 'params' => array( 'name' => 'example.org/path/', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 4 ),
'expected' => array( 'path', '/index' ),
),
array(
- 'params' => array( 'name' => 'http://example.org/test/path', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL),
+ 'params' => array( 'name' => 'example.org/test/path', 'type' => Piwik_Tracker_Action::TYPE_ACTION_URL, 'urlPrefix' => 1 ),
'expected' => array( 'test', '/path' ),
),
array(
@@ -90,15 +106,15 @@ class Test_Piwik_Actions extends UnitTestCase
foreach($tests as $test) {
$params = $test['params'];
$expected = $test['expected'];
- $processed = $action->public_getActionExplodedNames($params['name'],$params['type']);
+ $processed = $action->public_getActionExplodedNames($params['name'],$params['type'],isset($params['urlPrefix'])?$params['urlPrefix']:null);
$this->assertEqual($processed, $expected, "Processed: ".var_export($processed, true) . " | Expected: ". var_export($expected, true));
}
}
}
class Test_Piwik_Actions_getActionExplodedNames extends Piwik_Actions {
- public function public_getActionExplodedNames($name, $type)
+ public function public_getActionExplodedNames($name, $type, $urlPrefix)
{
- return self::getActionExplodedNames($name, $type);
+ return self::getActionExplodedNames($name, $type, $urlPrefix);
}
}
diff --git a/plugins/Live/API.php b/plugins/Live/API.php
index 800ae963aa..753b4d9aff 100644
--- a/plugins/Live/API.php
+++ b/plugins/Live/API.php
@@ -184,8 +184,9 @@ class Piwik_Live_API
// eg. Downloads, Outlinks. For these, idaction_name is set to 0
$sql = "
SELECT
- log_action.type as type,
+ log_action.type AS type,
log_action.name AS url,
+ log_action.url_prefix,
log_action_title.name AS pageTitle,
log_action.idaction AS pageIdAction,
log_link_visit_action.idlink_va AS pageId,
@@ -221,7 +222,9 @@ class Piwik_Live_API
{
$actionDetail['customVariables'] = $customVariablesPage;
}
-
+ // reconstruct url from prefix
+ $actionDetail['url'] = Piwik_Tracker_Action::reconstructNormalizedUrl($actionDetail['url'], $actionDetail['url_prefix']);
+ unset($actionDetail['url_prefix']);
// set the time spent for this action (which is the timeSpentRef of the next action)
if (isset($actionDetails[$actionIdx + 1]))
{