Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Aubry <matt@piwik.org>2014-11-13 07:28:30 +0300
committerMatthieu Aubry <matt@piwik.org>2014-11-13 07:28:30 +0300
commitdba0659953f563481f6792ba9ea41f79f2d7ff8b (patch)
treef86ad7071a558dbdcf0b472ba663bca7de00a4f6
parent52443868c14140b0cd85742590a3a6d1701ccfd7 (diff)
parent6c9573cf716a446a8b3a61d5d4c352633bb83c5c (diff)
Merge pull request #6657 from piwik/bugfix/5365
Do not store Accept-language full string, only store one detected language
-rw-r--r--core/Common.php90
-rw-r--r--plugins/UserSettings/Archiver.php3
-rw-r--r--plugins/UserSettings/Columns/Language.php19
-rw-r--r--plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguageCode_day.xml6
-rw-r--r--plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguage_day.xml2
-rw-r--r--tests/PHPUnit/Unit/CommonTest.php53
6 files changed, 137 insertions, 36 deletions
diff --git a/core/Common.php b/core/Common.php
index 6dfa2d22fa..823e884af1 100644
--- a/core/Common.php
+++ b/core/Common.php
@@ -34,6 +34,7 @@ class Common
/*
* Database
*/
+ const LANGUAGE_CODE_INVALID = 'xx';
/**
* Hashes a string into an integer which should be very low collision risks
@@ -937,8 +938,8 @@ class Common
*/
public static function getCountry($lang, $enableLanguageToCountryGuess, $ip)
{
- if (empty($lang) || strlen($lang) < 2 || $lang == 'xx') {
- return 'xx';
+ if (empty($lang) || strlen($lang) < 2 || $lang == self::LANGUAGE_CODE_INVALID) {
+ return self::LANGUAGE_CODE_INVALID;
}
$validCountries = self::getCountriesList();
@@ -974,35 +975,73 @@ class Common
}
}
}
- return 'xx';
+ return self::LANGUAGE_CODE_INVALID;
}
/**
- * Returns the visitor language based only on the Browser 'accepted language' information
+ * Returns the language and region string, based only on the Browser 'accepted language' information.
+ * * The language tag is defined by ISO 639-1
*
* @param string $browserLanguage Browser's accepted langauge header
* @param array $validLanguages array of valid language codes
- * @return string 2 letter ISO 639 code
+ * @return string 2 letter ISO 639 code 'es' (Spanish)
*/
- public static function extractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages)
+ public static function extractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages = array())
{
- // assumes language preference is sorted;
- // does not handle language-script-region tags or language range (*)
- if (!empty($validLanguages) && preg_match_all('/(?:^|,)([a-z]{2,3})([-][a-z]{2})?/', $browserLanguage, $matches, PREG_SET_ORDER)) {
- foreach ($matches as $parts) {
- if (count($parts) == 3) {
- // match locale (language and location)
- if (in_array($parts[1] . $parts[2], $validLanguages)) {
- return $parts[1] . $parts[2];
- }
+ $validLanguages = self::checkValidLanguagesIsSet($validLanguages);
+ $languageRegionCode = self::extractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages);
+
+ if(strlen($languageRegionCode) == 2) {
+ $languageCode = $languageRegionCode;
+ } else {
+ $languageCode = substr($languageRegionCode, 0, 2);
+ }
+ if(in_array($languageCode, $validLanguages)) {
+ return $languageCode;
+ }
+ return self::LANGUAGE_CODE_INVALID;
+ }
+
+ /**
+ * Returns the language and region string, based only on the Browser 'accepted language' information.
+ * * The language tag is defined by ISO 639-1
+ * * The region tag is defined by ISO 3166-1
+ *
+ * @param string $browserLanguage Browser's accepted langauge header
+ * @param array $validLanguages array of valid language codes. Note that if the array includes "fr" then it will consider all regional variants of this language valid, such as "fr-ca" etc.
+ * @return string 2 letter ISO 639 code 'es' (Spanish) or if found, includes the region as well: 'es-ar'
+ */
+ public static function extractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages = array() )
+ {
+ $validLanguages = self::checkValidLanguagesIsSet($validLanguages);
+
+ if(!preg_match_all('/(?:^|,)([a-z]{2,3})([-][a-z]{2})?/', $browserLanguage, $matches, PREG_SET_ORDER)) {
+ return self::LANGUAGE_CODE_INVALID;
+ }
+ foreach ($matches as $parts) {
+ $langIso639 = $parts[1];
+ if(empty($langIso639)) {
+ continue;
+ }
+
+ // If a region tag is found eg. "fr-ca"
+ if (count($parts) == 3) {
+ $regionIso3166 = $parts[2]; // eg. "-ca"
+
+ if (in_array($langIso639 . $regionIso3166, $validLanguages)) {
+ return $langIso639 . $regionIso3166;
}
- // match language only (where no region provided)
- if (in_array($parts[1], $validLanguages)) {
- return $parts[1];
+
+ if (in_array($langIso639, $validLanguages)) {
+ return $langIso639 . $regionIso3166;
}
}
+ // eg. "fr" or "es"
+ if (in_array($langIso639, $validLanguages)) {
+ return $langIso639;
+ }
}
- return 'xx';
+ return self::LANGUAGE_CODE_INVALID;
}
/**
@@ -1161,4 +1200,17 @@ class Common
}
}
}
+
+ /**
+ * @param $validLanguages
+ * @return array
+ */
+ protected static function checkValidLanguagesIsSet($validLanguages)
+ {
+ if (empty($validLanguages)) {
+ $validLanguages = array_keys(Common::getLanguagesList());
+ return $validLanguages;
+ }
+ return $validLanguages;
+ }
}
diff --git a/plugins/UserSettings/Archiver.php b/plugins/UserSettings/Archiver.php
index 777f536dd9..ea4496b16a 100644
--- a/plugins/UserSettings/Archiver.php
+++ b/plugins/UserSettings/Archiver.php
@@ -140,12 +140,11 @@ class Archiver extends \Piwik\Plugin\Archiver
protected function aggregateByLanguage()
{
$query = $this->getLogAggregator()->queryVisitsByDimension(array("label" => self::LANGUAGE_DIMENSION));
- $languageCodes = array_keys(Common::getLanguagesList());
$countryCodes = Common::getCountriesList($includeInternalCodes = true);
$metricsByLanguage = new DataArray();
while ($row = $query->fetch()) {
- $langCode = Common::extractLanguageCodeFromBrowserLanguage($row['label'], $languageCodes);
+ $langCode = Common::extractLanguageCodeFromBrowserLanguage($row['label']);
$countryCode = Common::extractCountryCodeFromBrowserLanguage($row['label'], $countryCodes, $enableLanguageToCountryGuess = true);
if ($countryCode == 'xx' || $countryCode == $langCode) {
diff --git a/plugins/UserSettings/Columns/Language.php b/plugins/UserSettings/Columns/Language.php
index f61154c7c4..4f31778e2d 100644
--- a/plugins/UserSettings/Columns/Language.php
+++ b/plugins/UserSettings/Columns/Language.php
@@ -8,6 +8,7 @@
*/
namespace Piwik\Plugins\UserSettings\Columns;
+use Piwik\Common;
use Piwik\Piwik;
use Piwik\Plugin\Dimension\VisitDimension;
use Piwik\Tracker\Action;
@@ -32,12 +33,22 @@ class Language extends VisitDimension
*/
public function onNewVisit(Request $request, Visitor $visitor, $action)
{
- $language = $request->getBrowserLanguage();
+ return $this->getSingleLanguageFromAcceptedLanguages($request->getBrowserLanguage());
+ }
- if (empty($language)) {
+ /**
+ * For better privacy we store only the main language code, instead of the whole browser language string.
+ *
+ * @param $acceptLanguagesString
+ * @return string
+ */
+ protected function getSingleLanguageFromAcceptedLanguages($acceptLanguagesString)
+ {
+ if (empty($acceptLanguagesString)) {
return '';
}
- return substr($language, 0, 20);
+ $languageCode = Common::extractLanguageAndRegionCodeFromBrowserLanguage($acceptLanguagesString);
+ return $languageCode;
}
-} \ No newline at end of file
+}
diff --git a/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguageCode_day.xml b/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguageCode_day.xml
index 689e3d7d37..02c15ad520 100644
--- a/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguageCode_day.xml
+++ b/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguageCode_day.xml
@@ -2,7 +2,7 @@
<result>
<row>
<label>Polish (pl)</label>
- <nb_uniq_visitors>2</nb_uniq_visitors>
+ <nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>3</nb_visits>
<nb_actions>3</nb_actions>
<nb_users>0</nb_users>
@@ -12,7 +12,7 @@
<nb_visits_converted>0</nb_visits_converted>
</row>
<row>
- <label>English - United States (en-us)</label>
+ <label>English (en)</label>
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>2</nb_visits>
<nb_actions>2</nb_actions>
@@ -133,7 +133,7 @@
<nb_visits_converted>0</nb_visits_converted>
</row>
<row>
- <label>Unknown - Liberia (xx-lr)</label>
+ <label>Unknown (xx)</label>
<nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>1</nb_visits>
<nb_actions>1</nb_actions>
diff --git a/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguage_day.xml b/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguage_day.xml
index 6bb328c660..18d4468a2e 100644
--- a/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguage_day.xml
+++ b/plugins/UserSettings/tests/System/expected/test___UserSettings.getLanguage_day.xml
@@ -2,7 +2,7 @@
<result>
<row>
<label>Polish</label>
- <nb_uniq_visitors>2</nb_uniq_visitors>
+ <nb_uniq_visitors>1</nb_uniq_visitors>
<nb_visits>3</nb_visits>
<nb_actions>3</nb_actions>
<nb_users>0</nb_users>
diff --git a/tests/PHPUnit/Unit/CommonTest.php b/tests/PHPUnit/Unit/CommonTest.php
index 2994cf3abc..22ee2c52da 100644
--- a/tests/PHPUnit/Unit/CommonTest.php
+++ b/tests/PHPUnit/Unit/CommonTest.php
@@ -395,25 +395,29 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase
}
/**
- * Dataprovider for testExtractLanguageCodeFromBrowserLanguage
+ * Dataprovider for testExtractLanguageAndRegionCodeFromBrowserLanguage
*/
- public function getLanguageDataToExtract()
+ public function getLanguageDataToExtractLanguageRegionCode()
{
return array(
- // browser language, valid languages, expected result
- array("fr-ca", array("fr"), "fr"),
+ // browser language, valid languages (with optional region), expected result
+ array("fr-ca", array("fr"), "fr-ca"),
+ array("fr-ca", array("ca"), "xx"),
array("", array(), "xx"),
array("", array("en"), "xx"),
array("fr", array("en"), "xx"),
array("en", array("en"), "en"),
+ array("en", array("en-ca"), "xx"),
array("en-ca", array("en-ca"), "en-ca"),
- array("en-ca", array("en"), "en"),
+ array("en-ca", array("en"), "en-ca"),
array("fr,en-us", array("fr", "en"), "fr"),
array("fr,en-us", array("en", "fr"), "fr"),
- array("fr-fr,fr-ca", array("fr"), "fr"),
+ array("fr-fr,fr-ca", array("fr"), "fr-fr"),
array("fr-fr,fr-ca", array("fr-ca"), "fr-ca"),
+ array("-ca", array("fr","ca"), "xx"),
array("fr-fr;q=1.0,fr-ca;q=0.9", array("fr-ca"), "fr-ca"),
array("es,en,fr;q=0.7,de;q=0.3", array("fr", "es", "de", "en"), "es"),
+ array("zh-sg,de;q=0.3", array("zh", "es", "de"), "zh-sg"),
array("fr-ca,fr;q=0.1", array("fr-ca"), "fr-ca"),
array("r5,fr;q=1,de", array("fr", "de"), "fr"),
array("Zen§gq1", array("en"), "xx"),
@@ -421,7 +425,42 @@ class Core_CommonTest extends PHPUnit_Framework_TestCase
}
/**
- * @dataProvider getLanguageDataToExtract
+ * @dataProvider getLanguageDataToExtractLanguageRegionCode
+ * @group Core
+ */
+ public function testExtractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages, $expected)
+ {
+ $this->assertEquals($expected, Common::extractLanguageAndRegionCodeFromBrowserLanguage($browserLanguage, $validLanguages), "test with {$browserLanguage} failed, expected {$expected}");
+ }
+
+
+ /**
+ * Dataprovider for testExtractLanguageCodeFromBrowserLanguage
+ */
+ public function getLanguageDataToExtractLanguageCode()
+ {
+ return array(
+ // browser language, valid languages, expected result
+ array("fr-ca", array("fr"), "fr"),
+ array("fr-ca", array("ca"), "xx"),
+ array("", array("en"), "xx"),
+ array("fr", array("en"), "xx"),
+ array("en", array("en"), "en"),
+ array("en", array("en-ca"), "xx"),
+ array("en-ca", array("en"), "en"),
+ array("fr,en-us", array("fr", "en"), "fr"),
+ array("fr,en-us", array("en", "fr"), "fr"),
+ array("fr-fr,fr-ca", array("fr"), "fr"),
+ array("-ca", array("fr","ca"), "xx"),
+ array("es,en,fr;q=0.7,de;q=0.3", array("fr", "es", "de", "en"), "es"),
+ array("zh-sg,de;q=0.3", array("zh", "es", "de"), "zh"),
+ array("r5,fr;q=1,de", array("fr", "de"), "fr"),
+ array("Zen§gq1", array("en"), "xx"),
+ );
+ }
+
+ /**
+ * @dataProvider getLanguageDataToExtractLanguageCode
* @group Core
*/
public function testExtractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages, $expected)