Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--composer.json19
-rw-r--r--composer.lock12
-rw-r--r--core/Common.php54
-rw-r--r--core/UrlHelper.php236
-rw-r--r--plugins/CoreAdminHome/Tasks.php1
-rw-r--r--plugins/Referrers/Columns/Base.php5
-rw-r--r--plugins/Referrers/SearchEngine.php397
-rw-r--r--plugins/Referrers/Tasks.php35
-rw-r--r--plugins/Referrers/functions.php6
-rw-r--r--plugins/Referrers/tests/Unit/ReferrersTest.php33
-rw-r--r--plugins/Referrers/tests/Unit/SearchEngineTest.php81
-rw-r--r--tests/PHPUnit/Framework/Fixture.php1
-rw-r--r--tests/PHPUnit/Unit/CommonTest.php35
-rw-r--r--tests/PHPUnit/Unit/UrlHelperTest.php31
14 files changed, 569 insertions, 377 deletions
diff --git a/composer.json b/composer.json
index 42e741f8fd..28b407ed5a 100644
--- a/composer.json
+++ b/composer.json
@@ -54,7 +54,8 @@
"symfony/event-dispatcher": "~2.6.0",
"pear/pear_exception": "~1.0.0",
"piwik/referrer-spam-blacklist": "~1.0",
- "tecnickcom/tcpdf": "~6.0"
+ "tecnickcom/tcpdf": "~6.0",
+ "piwik/searchengine-and-social-definitions": "dev-master"
},
"require-dev": {
"aws/aws-sdk-php": "2.7.1",
@@ -90,8 +91,20 @@
"reference": "master"
}
}
- }
- ],
+ },
+ {
+ "type": "package",
+ "package": {
+ "name": "piwik/searchengine-and-social-definitions",
+ "type": "library",
+ "version": "master",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/sgiehl/searchengine-and-social-definitions",
+ "reference": "master"
+ }
+ }
+ } ],
"scripts": {
"pre-update-cmd": [
"Piwik\\Composer\\ScriptHandler::cleanXhprof"
diff --git a/composer.lock b/composer.lock
index a6ee66b134..1a5ba52b51 100644
--- a/composer.lock
+++ b/composer.lock
@@ -959,6 +959,17 @@
"time": "2015-10-07 10:17:59"
},
{
+ "name": "piwik/searchengine-and-social-definitions",
+ "version": "master",
+ "source": {
+ "type": "git",
+ "url": "https://github.com/sgiehl/searchengine-and-social-definitions",
+ "reference": "master"
+ },
+ "type": "library",
+ "time": "2015-10-31 15:36:36"
+ },
+ {
"name": "psr/log",
"version": "1.0.0",
"source": {
@@ -2645,6 +2656,7 @@
"minimum-stability": "stable",
"stability-flags": {
"php-di/php-di": 10,
+ "piwik/searchengine-and-social-definitions": 20,
"facebook/xhprof": 20
},
"prefer-stable": false,
diff --git a/core/Common.php b/core/Common.php
index 7e3296bee1..6bb4298e93 100644
--- a/core/Common.php
+++ b/core/Common.php
@@ -816,60 +816,6 @@ class Common
}
/**
- * Returns list of search engines by URL
- *
- * @see core/DataFiles/SearchEngines.php
- *
- * @return array Array of ( URL => array( searchEngineName, keywordParameter, path, charset ) )
- */
- public static function getSearchEngineUrls()
- {
- $cacheId = 'Common.getSearchEngineUrls';
- $cache = Cache::getTransientCache();
- $searchEngines = $cache->fetch($cacheId);
-
- if (empty($searchEngines)) {
- require_once PIWIK_INCLUDE_PATH . '/core/DataFiles/SearchEngines.php';
-
- $searchEngines = $GLOBALS['Piwik_SearchEngines'];
-
- Piwik::postEvent('Referrer.addSearchEngineUrls', array(&$searchEngines));
-
- $cache->save($cacheId, $searchEngines);
- }
-
- return $searchEngines;
- }
-
- /**
- * Returns list of search engines by name
- *
- * @see core/DataFiles/SearchEngines.php
- *
- * @return array Array of ( searchEngineName => URL )
- */
- public static function getSearchEngineNames()
- {
- $cacheId = 'Common.getSearchEngineNames';
- $cache = Cache::getTransientCache();
- $nameToUrl = $cache->fetch($cacheId);
-
- if (empty($nameToUrl)) {
- $searchEngines = self::getSearchEngineUrls();
-
- $nameToUrl = array();
- foreach ($searchEngines as $url => $info) {
- if (!isset($nameToUrl[$info[0]])) {
- $nameToUrl[$info[0]] = $url;
- }
- }
- $cache->save($cacheId, $nameToUrl);
- }
-
- return $nameToUrl;
- }
-
- /**
* Returns list of social networks by URL
*
* @see core/DataFiles/Socials.php
diff --git a/core/UrlHelper.php b/core/UrlHelper.php
index 4a0ac0fa0a..66a0e64e25 100644
--- a/core/UrlHelper.php
+++ b/core/UrlHelper.php
@@ -259,242 +259,6 @@ class UrlHelper
}
/**
- * Extracts a keyword from a raw not encoded URL.
- * Will only extract keyword if a known search engine has been detected.
- * Returns the keyword:
- * - in UTF8: automatically converted from other charsets when applicable
- * - strtolowered: "QUErY test!" will return "query test!"
- * - trimmed: extra spaces before and after are removed
- *
- * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php
- * The function returns false when a keyword couldn't be found.
- * eg. if the url is "http://www.google.com/partners.html" this will return false,
- * as the google keyword parameter couldn't be found.
- *
- * @see unit tests in /tests/core/Common.test.php
- * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER']
- * @return array|bool false if a keyword couldn't be extracted,
- * or array(
- * 'name' => 'Google',
- * 'keywords' => 'my searched keywords')
- */
- public static function extractSearchEngineInformationFromUrl($referrerUrl)
- {
- $referrerParsed = @parse_url($referrerUrl);
- $referrerHost = '';
- if (isset($referrerParsed['host'])) {
- $referrerHost = $referrerParsed['host'];
- }
- if (empty($referrerHost)) {
- return false;
- }
- // some search engines (eg. Bing Images) use the same domain
- // as an existing search engine (eg. Bing), we must also use the url path
- $referrerPath = '';
- if (isset($referrerParsed['path'])) {
- $referrerPath = $referrerParsed['path'];
- }
-
- // no search query
- if (!isset($referrerParsed['query'])) {
- $referrerParsed['query'] = '';
- }
- $query = $referrerParsed['query'];
-
- // Google Referrers URLs sometimes have the fragment which contains the keyword
- if (!empty($referrerParsed['fragment'])) {
- $query .= '&' . $referrerParsed['fragment'];
- }
-
- $searchEngines = Common::getSearchEngineUrls();
-
- $hostPattern = self::getLossyUrl($referrerHost);
- /*
- * Try to get the best matching 'host' in definitions
- * 1. check if host + path matches an definition
- * 2. check if host only matches
- * 3. check if host pattern + path matches
- * 4. check if host pattern matches
- * 5. special handling
- */
- if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) {
- $referrerHost = $referrerHost . $referrerPath;
- } elseif (array_key_exists($referrerHost, $searchEngines)) {
- // no need to change host
- } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) {
- $referrerHost = $hostPattern . $referrerPath;
- } elseif (array_key_exists($hostPattern, $searchEngines)) {
- $referrerHost = $hostPattern;
- } elseif (!array_key_exists($referrerHost, $searchEngines)) {
- if (!strncmp($query, 'cx=partner-pub-', 15)) {
- // Google custom search engine
- $referrerHost = 'google.com/cse';
- } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) {
- // private-label search powered by InfoSpace Metasearch
- $referrerHost = 'wsdsold.infospace.com';
- } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) {
- // Yahoo! Images
- $referrerHost = 'images.search.yahoo.com';
- } elseif (strpos($referrerHost, '.search.yahoo.com') != false) {
- // Yahoo!
- $referrerHost = 'search.yahoo.com';
- } else {
- return false;
- }
- }
- $searchEngineName = $searchEngines[$referrerHost][0];
- $variableNames = null;
- if (isset($searchEngines[$referrerHost][1])) {
- $variableNames = $searchEngines[$referrerHost][1];
- }
- if (!$variableNames) {
- $searchEngineNames = Common::getSearchEngineNames();
- $url = $searchEngineNames[$searchEngineName];
- $variableNames = $searchEngines[$url][1];
- }
- if (!is_array($variableNames)) {
- $variableNames = array($variableNames);
- }
-
- $key = null;
- if ($searchEngineName === 'Google Images'
- || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false)
- ) {
- if (strpos($query, '&prev') !== false) {
- $query = urldecode(trim(self::getParameterFromQueryString($query, 'prev')));
- $query = str_replace('&', '&', strstr($query, '?'));
- }
- $searchEngineName = 'Google Images';
- } elseif ($searchEngineName === 'Google'
- && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)
- ) {
- $keys = array();
- $key = self::getParameterFromQueryString($query, 'as_q');
- if (!empty($key)) {
- array_push($keys, $key);
- }
- $key = self::getParameterFromQueryString($query, 'as_oq');
- if (!empty($key)) {
- array_push($keys, str_replace('+', ' OR ', $key));
- }
- $key = self::getParameterFromQueryString($query, 'as_epq');
- if (!empty($key)) {
- array_push($keys, "\"$key\"");
- }
- $key = self::getParameterFromQueryString($query, 'as_eq');
- if (!empty($key)) {
- array_push($keys, "-$key");
- }
- $key = trim(urldecode(implode(' ', $keys)));
- }
-
- if ($searchEngineName === 'Google') {
- // top bar menu
- $tbm = self::getParameterFromQueryString($query, 'tbm');
- switch ($tbm) {
- case 'isch':
- $searchEngineName = 'Google Images';
- break;
- case 'vid':
- $searchEngineName = 'Google Video';
- break;
- case 'shop':
- $searchEngineName = 'Google Shopping';
- break;
- }
- }
-
- if (empty($key)) {
- foreach ($variableNames as $variableName) {
- if ($variableName[0] == '/') {
- // regular expression match
- if (preg_match($variableName, $referrerUrl, $matches)) {
- $key = trim(urldecode($matches[1]));
- break;
- }
- } else {
- // search for keywords now &vname=keyword
- $key = self::getParameterFromQueryString($query, $variableName);
- $key = trim(urldecode($key));
-
- // Special cases: empty or no keywords
- if (empty($key)
- && (
- // Google search with no keyword
- ($searchEngineName == 'Google'
- && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment']))
- )
-
- // Yahoo search with no keyword
- || ($searchEngineName == 'Yahoo!'
- && ($referrerParsed['host'] == 'r.search.yahoo.com')
- )
-
- // empty keyword parameter
- || strpos($query, sprintf('&%s=', $variableName)) !== false
- || strpos($query, sprintf('?%s=', $variableName)) !== false
-
- // search engines with no keyword
- || $searchEngineName == 'Ixquick'
- || $searchEngineName == 'Google Images'
- || $searchEngineName == 'DuckDuckGo')
- ) {
- $key = false;
- }
- if (!empty($key)
- || $key === false
- ) {
- break;
- }
- }
- }
- }
-
- // $key === false is the special case "No keyword provided" which is a Search engine match
- if ($key === null
- || $key === ''
- ) {
- return false;
- }
-
- if (!empty($key)) {
- if (function_exists('iconv')
- && isset($searchEngines[$referrerHost][3])
- ) {
- // accepts string, array, or comma-separated list string in preferred order
- $charsets = $searchEngines[$referrerHost][3];
- if (!is_array($charsets)) {
- $charsets = explode(',', $charsets);
- }
-
- if (!empty($charsets)) {
- $charset = $charsets[0];
- if (count($charsets) > 1
- && function_exists('mb_detect_encoding')
- ) {
- $charset = mb_detect_encoding($key, $charsets);
- if ($charset === false) {
- $charset = $charsets[0];
- }
- }
-
- $newkey = @iconv($charset, 'UTF-8//IGNORE', $key);
- if (!empty($newkey)) {
- $key = $newkey;
- }
- }
- }
-
- $key = Common::mb_strtolower($key);
- }
-
- return array(
- 'name' => $searchEngineName,
- 'keywords' => $key,
- );
- }
-
- /**
* Returns the query part from any valid url and adds additional parameters to the query part if needed.
*
* @param string $url Any url eg `"http://example.com/piwik/?foo=bar"`
diff --git a/plugins/CoreAdminHome/Tasks.php b/plugins/CoreAdminHome/Tasks.php
index 01290f0cff..30f66a995d 100644
--- a/plugins/CoreAdminHome/Tasks.php
+++ b/plugins/CoreAdminHome/Tasks.php
@@ -49,6 +49,7 @@ class Tasks extends \Piwik\Plugin\Tasks
$this->daily('optimizeArchiveTable', null, self::LOWEST_PRIORITY);
$this->weekly('updateSpammerBlacklist');
+ $this->weekly('updateSearchEnginesAndSocials');
}
/**
diff --git a/plugins/Referrers/Columns/Base.php b/plugins/Referrers/Columns/Base.php
index 78fe27516c..1f4a0c7210 100644
--- a/plugins/Referrers/Columns/Base.php
+++ b/plugins/Referrers/Columns/Base.php
@@ -11,6 +11,7 @@ namespace Piwik\Plugins\Referrers\Columns;
use Piwik\Common;
use Piwik\Piwik;
use Piwik\Plugin\Dimension\VisitDimension;
+use Piwik\Plugins\Referrers\SearchEngine;
use Piwik\Tracker\PageUrl;
use Piwik\Tracker\Request;
use Piwik\Tracker\Visit;
@@ -139,7 +140,7 @@ abstract class Base extends VisitDimension
*/
protected function detectReferrerSearchEngine()
{
- $searchEngineInformation = UrlHelper::extractSearchEngineInformationFromUrl($this->referrerUrl);
+ $searchEngineInformation = SearchEngine::getInstance()->extractInformationFromUrl($this->referrerUrl);
/**
* Triggered when detecting the search engine of a referrer URL.
@@ -277,7 +278,7 @@ abstract class Base extends VisitDimension
// Set the Campaign keyword to the keyword found in the Referrer URL if any
if (!empty($this->nameReferrerAnalyzed)) {
- $referrerUrlInfo = UrlHelper::extractSearchEngineInformationFromUrl($this->referrerUrl);
+ $referrerUrlInfo = SearchEngine::getInstance()->extractInformationFromUrl($this->referrerUrl);
if (!empty($referrerUrlInfo['keywords'])) {
$this->keywordReferrerAnalyzed = $referrerUrlInfo['keywords'];
}
diff --git a/plugins/Referrers/SearchEngine.php b/plugins/Referrers/SearchEngine.php
new file mode 100644
index 0000000000..f2f12fe63c
--- /dev/null
+++ b/plugins/Referrers/SearchEngine.php
@@ -0,0 +1,397 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+namespace Piwik\Plugins\Referrers;
+use Piwik\Cache;
+use Piwik\Common;
+use Piwik\Option;
+use Piwik\Piwik;
+use Piwik\Singleton;
+use Piwik\UrlHelper;
+
+/**
+ * Contains methods to access search engine definition data.
+ */
+class SearchEngine extends Singleton
+{
+ const OPTION_STORAGE_NAME = 'SearchEngineDefinitions';
+
+ /** @var string location of definition file (relative to PIWIK_INCLUDE_PATH) */
+ const DEFINITION_FILE = '/vendor/piwik/searchengine-and-social-definitions/SearchEngines.yml';
+
+ protected $definitionList = null;
+
+ /**
+ * Returns list of search engines by URL
+ *
+ * @return array Array of ( URL => array( searchEngineName, keywordParameter, path, charset ) )
+ */
+ public function getSearchEngineDefinitions()
+ {
+ $cache = Cache::getEagerCache();
+ $cacheId = 'SearchEngine-' . self::OPTION_STORAGE_NAME;
+
+ if ($cache->contains($cacheId)) {
+ $list = $cache->fetch($cacheId);
+ } else {
+ $list = $this->loadSearchEngineDefinitions();
+ $cache->save($cacheId, $list);
+ }
+
+ return $list;
+ }
+
+ private function loadSearchEngineDefinitions()
+ {
+ if ($this->definitionList === null) {
+ // Read first from the auto-updated list in database
+ $list = Option::get(self::OPTION_STORAGE_NAME);
+
+ if ($list) {
+ $this->definitionList = unserialize($list);
+ } else {
+ // Fallback to reading the bundled list
+ $yml = file_get_contents(PIWIK_INCLUDE_PATH . self::DEFINITION_FILE);
+ $this->definitionList = $this->loadYmlData($yml);
+ Option::set(self::OPTION_STORAGE_NAME, serialize($this->definitionList));
+
+ }
+ }
+
+ Piwik::postEvent('Referrer.addSearchEngineUrls', array(&$this->definitionList));
+
+ return $this->definitionList;
+ }
+
+ /**
+ * Parses the given YML string and caches the resulting definitions
+ *
+ * @param string $yml
+ * @return array
+ */
+ public function loadYmlData($yml)
+ {
+ $searchEngines = \Spyc::YAMLLoadString($yml);
+
+ $this->definitionList = $this->transformData($searchEngines);
+
+ return $this->definitionList;
+ }
+
+ protected function transformData($searchEngines)
+ {
+ $urlToInfo = array();
+ foreach ($searchEngines as $name => $info) {
+ foreach ($info as $urlDefinitions) {
+ foreach ($urlDefinitions['urls'] as $url) {
+ $searchEngineData = $urlDefinitions;
+ unset($searchEngineData['urls']);
+ $searchEngineData['name'] = $name;
+ $urlToInfo[$url] = $searchEngineData;
+ }
+ }
+ }
+ return $urlToInfo;
+ }
+
+ /**
+ * Returns list of search engines by name
+ *
+ * @see core/DataFiles/SearchEngines.php
+ *
+ * @return array Array of ( searchEngineName => URL )
+ */
+ public function getSearchEngineNames()
+ {
+ $cacheId = 'SearchEngine.getSearchEngineNames';
+ $cache = Cache::getTransientCache();
+ $nameToUrl = $cache->fetch($cacheId);
+
+ if (empty($nameToUrl)) {
+ $searchEngines = $this->getSearchEngineDefinitions();
+
+ $nameToUrl = array();
+ foreach ($searchEngines as $url => $info) {
+ if (!isset($nameToUrl[$info['name']])) {
+ $nameToUrl[$info['name']] = $url;
+ }
+ }
+ $cache->save($cacheId, $nameToUrl);
+ }
+
+ return $nameToUrl;
+ }
+
+ /**
+ * Returns definitions for the given search engine host
+ *
+ * @param string $host
+ * @return array
+ */
+ public function getDefinitionByHost($host)
+ {
+ $searchEngines = $this->getSearchEngineDefinitions();
+
+ if (!array_key_exists($host, $searchEngines)) {
+ return array();
+ }
+
+ return $searchEngines[$host];
+ }
+
+ /**
+ * Returns defined parameters for the given search engine host
+ * @param string $host
+ * @return array
+ */
+ public function getParameterNamesByHost($host)
+ {
+ $definition = $this->getDefinitionByHost($host);
+
+ if (empty($definition['params'])) {
+ return array();
+ }
+
+ return $definition['params'];
+ }
+
+ /**
+ * Returns defined charsets for given search engine host
+ *
+ * @param string $host
+ * @return array
+ */
+ public function getCharsetsByHost($host)
+ {
+ $definition = $this->getDefinitionByHost($host);
+
+ if (empty($definition['charsets'])) {
+ return array();
+ }
+
+ return $definition['charsets'];
+ }
+
+ /**
+ * Extracts a keyword from a raw not encoded URL.
+ * Will only extract keyword if a known search engine has been detected.
+ * Returns the keyword:
+ * - in UTF8: automatically converted from other charsets when applicable
+ * - strtolowered: "QUErY test!" will return "query test!"
+ * - trimmed: extra spaces before and after are removed
+ *
+ * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php
+ * The function returns false when a keyword couldn't be found.
+ * eg. if the url is "http://www.google.com/partners.html" this will return false,
+ * as the google keyword parameter couldn't be found.
+ *
+ * @see unit tests in /tests/core/Common.test.php
+ * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER']
+ * @return array|bool false if a keyword couldn't be extracted,
+ * or array(
+ * 'name' => 'Google',
+ * 'keywords' => 'my searched keywords')
+ */
+ public function extractInformationFromUrl($referrerUrl)
+ {
+ $referrerParsed = @parse_url($referrerUrl);
+ $referrerHost = '';
+ if (isset($referrerParsed['host'])) {
+ $referrerHost = $referrerParsed['host'];
+ }
+ if (empty($referrerHost)) {
+ return false;
+ }
+ // some search engines (eg. Bing Images) use the same domain
+ // as an existing search engine (eg. Bing), we must also use the url path
+ $referrerPath = '';
+ if (isset($referrerParsed['path'])) {
+ $referrerPath = $referrerParsed['path'];
+ }
+
+ // no search query
+ if (!isset($referrerParsed['query'])) {
+ $referrerParsed['query'] = '';
+ }
+ $query = $referrerParsed['query'];
+
+ // Google Referrers URLs sometimes have the fragment which contains the keyword
+ if (!empty($referrerParsed['fragment'])) {
+ $query .= '&' . $referrerParsed['fragment'];
+ }
+
+ $searchEngines = $this->getSearchEngineDefinitions();
+
+ $hostPattern = UrlHelper::getLossyUrl($referrerHost);
+ /*
+ * Try to get the best matching 'host' in definitions
+ * 1. check if host + path matches an definition
+ * 2. check if host only matches
+ * 3. check if host pattern + path matches
+ * 4. check if host pattern matches
+ * 5. special handling
+ */
+ if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) {
+ $referrerHost = $referrerHost . $referrerPath;
+ } elseif (array_key_exists($referrerHost, $searchEngines)) {
+ // no need to change host
+ } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) {
+ $referrerHost = $hostPattern . $referrerPath;
+ } elseif (array_key_exists($hostPattern, $searchEngines)) {
+ $referrerHost = $hostPattern;
+ } elseif (!array_key_exists($referrerHost, $searchEngines)) {
+ if (!strncmp($query, 'cx=partner-pub-', 15)) {
+ // Google custom search engine
+ $referrerHost = 'google.com/cse';
+ } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) {
+ // private-label search powered by InfoSpace Metasearch
+ $referrerHost = 'wsdsold.infospace.com';
+ } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) {
+ // Yahoo! Images
+ $referrerHost = 'images.search.yahoo.com';
+ } elseif (strpos($referrerHost, '.search.yahoo.com') != false) {
+ // Yahoo!
+ $referrerHost = 'search.yahoo.com';
+ } else {
+ return false;
+ }
+ }
+ $searchEngineName = $searchEngines[$referrerHost]['name'];
+ $variableNames = $this->getParameterNamesByHost($referrerHost);
+
+ $key = null;
+ if ($searchEngineName === 'Google Images'
+ || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false)
+ ) {
+ if (strpos($query, '&prev') !== false) {
+ $query = urldecode(trim(UrlHelper::getParameterFromQueryString($query, 'prev')));
+ $query = str_replace('&', '&amp;', strstr($query, '?'));
+ }
+ $searchEngineName = 'Google Images';
+ } elseif ($searchEngineName === 'Google'
+ && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)
+ ) {
+ $keys = array();
+ $key = UrlHelper::getParameterFromQueryString($query, 'as_q');
+ if (!empty($key)) {
+ array_push($keys, $key);
+ }
+ $key = UrlHelper::getParameterFromQueryString($query, 'as_oq');
+ if (!empty($key)) {
+ array_push($keys, str_replace('+', ' OR ', $key));
+ }
+ $key = UrlHelper::getParameterFromQueryString($query, 'as_epq');
+ if (!empty($key)) {
+ array_push($keys, "\"$key\"");
+ }
+ $key = UrlHelper::getParameterFromQueryString($query, 'as_eq');
+ if (!empty($key)) {
+ array_push($keys, "-$key");
+ }
+ $key = trim(urldecode(implode(' ', $keys)));
+ }
+
+ if ($searchEngineName === 'Google') {
+ // top bar menu
+ $tbm = UrlHelper::getParameterFromQueryString($query, 'tbm');
+ switch ($tbm) {
+ case 'isch':
+ $searchEngineName = 'Google Images';
+ break;
+ case 'vid':
+ $searchEngineName = 'Google Video';
+ break;
+ case 'shop':
+ $searchEngineName = 'Google Shopping';
+ break;
+ }
+ }
+
+ if (empty($key)) {
+ foreach ($variableNames as $variableName) {
+ if ($variableName[0] == '/') {
+ // regular expression match
+ if (preg_match($variableName, $referrerUrl, $matches)) {
+ $key = trim(urldecode($matches[1]));
+ break;
+ }
+ } else {
+ // search for keywords now &vname=keyword
+ $key = UrlHelper::getParameterFromQueryString($query, $variableName);
+ $key = trim(urldecode($key));
+
+ // Special cases: empty or no keywords
+ if (empty($key)
+ && (
+ // Google search with no keyword
+ ($searchEngineName == 'Google'
+ && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment']))
+ )
+
+ // Yahoo search with no keyword
+ || ($searchEngineName == 'Yahoo!'
+ && ($referrerParsed['host'] == 'r.search.yahoo.com')
+ )
+
+ // empty keyword parameter
+ || strpos($query, sprintf('&%s=', $variableName)) !== false
+ || strpos($query, sprintf('?%s=', $variableName)) !== false
+
+ // search engines with no keyword
+ || $searchEngineName == 'Ixquick'
+ || $searchEngineName == 'Google Images'
+ || $searchEngineName == 'DuckDuckGo')
+ ) {
+ $key = false;
+ }
+ if (!empty($key)
+ || $key === false
+ ) {
+ break;
+ }
+ }
+ }
+ }
+
+ // $key === false is the special case "No keyword provided" which is a Search engine match
+ if ($key === null || $key === '') {
+ return false;
+ }
+
+ if (!empty($key)) {
+ $charsets = $this->getCharsetsByHost($referrerHost);
+
+ if (function_exists('iconv')
+ && !empty($charsets)
+ ) {
+ $charset = $charsets[0];
+ if (count($charsets) > 1
+ && function_exists('mb_detect_encoding')
+ ) {
+ $charset = mb_detect_encoding($key, $charsets);
+ if ($charset === false) {
+ $charset = $charsets[0];
+ }
+ }
+
+ $newkey = @iconv($charset, 'UTF-8//IGNORE', $key);
+ if (!empty($newkey)) {
+ $key = $newkey;
+ }
+ }
+
+ $key = Common::mb_strtolower($key);
+ }
+
+ return array(
+ 'name' => $searchEngineName,
+ 'keywords' => $key,
+ );
+ }
+
+}
diff --git a/plugins/Referrers/Tasks.php b/plugins/Referrers/Tasks.php
new file mode 100644
index 0000000000..7481dbdca3
--- /dev/null
+++ b/plugins/Referrers/Tasks.php
@@ -0,0 +1,35 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+namespace Piwik\Plugins\Referrers;
+
+
+use Piwik\Http;
+use Piwik\Option;
+
+class Tasks extends \Piwik\Plugin\Tasks
+{
+ public function schedule()
+ {
+ $this->weekly('updateSearchEngines');
+ #$this->weekly('updateSocials');
+ }
+
+ /**
+ * Update the search engine and social definitions
+ *
+ * @see https://github.com/piwik/searchengine-and-social-definitions
+ */
+ public function updateSearchEngines()
+ {
+ $url = 'https://raw.githubusercontent.com/piwik/searchengine-and-social-definitions/master/SearchEngines.yml';
+ $list = Http::sendHttpRequest($url, 30);
+ $searchEngines = SearchEngine::getInstance()->loadYmlData($list);
+ Option::set(SearchEngine::OPTION_STORAGE_NAME, serialize($searchEngines));
+ }
+} \ No newline at end of file
diff --git a/plugins/Referrers/functions.php b/plugins/Referrers/functions.php
index e0fee30833..2a39f8d1d4 100644
--- a/plugins/Referrers/functions.php
+++ b/plugins/Referrers/functions.php
@@ -120,7 +120,7 @@ function getSocialsLogoFromUrl($domain)
*/
function getSearchEngineUrlFromName($name)
{
- $searchEngineNames = Common::getSearchEngineNames();
+ $searchEngineNames = SearchEngine::getInstance()->getSearchEngineNames();
if (isset($searchEngineNames[$name])) {
$url = 'http://' . $searchEngineNames[$name];
} else {
@@ -190,10 +190,10 @@ function getSearchEngineUrlFromUrlAndKeyword($url, $keyword)
if ($keyword === API::LABEL_KEYWORD_NOT_DEFINED) {
return 'http://piwik.org/faq/general/#faq_144';
}
- $searchEngineUrls = Common::getSearchEngineUrls();
+ $searchEngineUrls = SearchEngine::getInstance()->getSearchEngineDefinitions();
$keyword = urlencode($keyword);
$keyword = str_replace(urlencode('+'), urlencode(' '), $keyword);
- $path = @$searchEngineUrls[getSearchEngineHostPathFromUrl($url)][2];
+ $path = @$searchEngineUrls[getSearchEngineHostPathFromUrl($url)]['backlink'];
if (empty($path)) {
return false;
}
diff --git a/plugins/Referrers/tests/Unit/ReferrersTest.php b/plugins/Referrers/tests/Unit/ReferrersTest.php
index 909e6bf65e..22e7dbe216 100644
--- a/plugins/Referrers/tests/Unit/ReferrersTest.php
+++ b/plugins/Referrers/tests/Unit/ReferrersTest.php
@@ -11,20 +11,32 @@ namespace Piwik\Plugins\Referrers\tests;
use Piwik\DataTable;
use Piwik\DataTable\Row;
use Piwik\Period;
+use Piwik\Plugins\Referrers\SearchEngine;
require_once PIWIK_INCLUDE_PATH . '/plugins/Referrers/Referrers.php';
+/**
+ * @group Referererer
+ */
class ReferrersTest extends \PHPUnit_Framework_TestCase
{
+
+ public static function setUpBeforeClass()
+ {
+ // inject definitions to avoid database usage
+ $yml = file_get_contents(PIWIK_INCLUDE_PATH . SearchEngine::DEFINITION_FILE);
+ SearchEngine::getInstance()->loadYmlData($yml);
+
+ parent::setUpBeforeClass();
+ }
+
/**
* Dataprovider serving all search engine data
*/
public function getSearchEngines()
{
- include PIWIK_PATH_TEST_TO_ROOT . '/core/DataFiles/SearchEngines.php';
-
$searchEngines = array();
- foreach ($GLOBALS['Piwik_SearchEngines'] as $url => $searchEngine) {
+ foreach (SearchEngine::getInstance()->getSearchEngineDefinitions() as $url => $searchEngine) {
$searchEngines[] = array($url, $searchEngine);
}
return $searchEngines;
@@ -43,10 +55,10 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
static $searchEngines = array();
$name = parse_url('http://' . $url);
- if (!array_key_exists($searchEngine[0], $searchEngines)) {
- $searchEngines[$searchEngine[0]] = $url;
+ if (!array_key_exists($searchEngine['name'], $searchEngines)) {
+ $searchEngines[$searchEngine['name']] = $url;
- $this->assertTrue(!empty($searchEngine[1]), $name['host']);
+ $this->assertTrue(!empty($searchEngine['params']), $name['host']);
}
}
@@ -66,8 +78,8 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
static $searchEngines = array();
$name = parse_url('http://' . $url);
- if (!array_key_exists($searchEngine[0], $searchEngines)) {
- $searchEngines[$searchEngine[0]] = $url;
+ if (!array_key_exists($searchEngine['name'], $searchEngines)) {
+ $searchEngines[$searchEngine['name']] = $url;
$this->assertTrue(in_array($name['host'] . '.png', $favicons), $name['host']);
}
@@ -80,11 +92,9 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
*/
public function testObsoleteSearchEngineIcons()
{
- include PIWIK_PATH_TEST_TO_ROOT . '/core/DataFiles/SearchEngines.php';
-
// Get list of search engines and first appearing URL
$searchEngines = array();
- foreach ($GLOBALS['Piwik_SearchEngines'] as $url => $searchEngine) {
+ foreach (SearchEngine::getInstance()->getSearchEngineDefinitions() as $url => $searchEngine) {
$name = parse_url('http://' . $url);
if (!array_key_exists($name['host'], $searchEngines)) {
$searchEngines[$name['host']] = true;
@@ -142,7 +152,6 @@ class ReferrersTest extends \PHPUnit_Framework_TestCase
*/
public function testGetSearchEngineUrlFromUrlAndKeyword($url, $keyword, $expected)
{
- include PIWIK_PATH_TEST_TO_ROOT . '/core/DataFiles/SearchEngines.php';
$this->assertEquals($expected, \Piwik\Plugins\Referrers\getSearchEngineUrlFromUrlAndKeyword($url, $keyword));
}
diff --git a/plugins/Referrers/tests/Unit/SearchEngineTest.php b/plugins/Referrers/tests/Unit/SearchEngineTest.php
new file mode 100644
index 0000000000..508068feda
--- /dev/null
+++ b/plugins/Referrers/tests/Unit/SearchEngineTest.php
@@ -0,0 +1,81 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ */
+
+namespace Piwik\Plugins\Referrers\tests;
+
+use Piwik\Plugins\Referrers\SearchEngine;
+use Spyc;
+
+/**
+ * @group SearchEngine
+ */
+class SearchEngineTest extends \PHPUnit_Framework_TestCase
+{
+ public function getSearchEngineUrls()
+ {
+ return Spyc::YAMLLoad(PIWIK_PATH_TEST_TO_ROOT .'/tests/resources/extractSearchEngineInformationFromUrlTests.yml');
+ }
+
+ public static function setUpBeforeClass()
+ {
+ // inject definitions to avoid database usage
+ $yml = file_get_contents(PIWIK_INCLUDE_PATH . SearchEngine::DEFINITION_FILE);
+ SearchEngine::getInstance()->loadYmlData($yml);
+
+ parent::setUpBeforeClass();
+ }
+
+ /**
+ * @dataProvider getSearchEngineUrls
+ * @group Core
+ */
+ public function testExtractInformationFromUrl($url, $engine, $keywords)
+ {
+ $returnedValue = SearchEngine::getInstance()->extractInformationFromUrl($url);
+
+ $expectedValue = false;
+
+ if (!empty($engine)) {
+ $expectedValue = array('name' => $engine, 'keywords' => $keywords);
+ }
+
+ $this->assertEquals($expectedValue, $returnedValue);
+ }
+
+ public function testSearchEnginesDefinedCorrectly()
+ {
+ $searchEngines = array();
+ foreach (SearchEngine::getInstance()->getSearchEngineDefinitions() as $host => $info) {
+ if (isset($info['backlink']) && $info['backlink'] !== false) {
+ $this->assertTrue(strrpos($info['backlink'], "{k}") !== false, $host . " search URL is not defined correctly, must contain the macro {k}");
+ }
+
+ if (!array_key_exists($info['name'], $searchEngines)) {
+ $searchEngines[$info['name']] = true;
+
+ $this->assertTrue(strpos($host, '{}') === false, $host . " search URL is the master record and should not contain {}");
+ }
+
+ if (isset($info['charsets']) && $info['charsets'] !== false) {
+ $this->assertTrue(is_array($info['charsets']) || is_string($info['charsets']), $host . ' charsets must be either a string or an array');
+
+ if (is_string($info['charsets'])) {
+ $this->assertTrue(trim($info['charsets']) !== '', $host . ' charsets cannot be an empty string');
+ $this->assertTrue(strpos($info['charsets'], ' ') === false, $host . ' charsets cannot contain spaces');
+
+ }
+
+ if (is_array($info['charsets'])) {
+ $this->assertTrue(count($info['charsets']) > 0, $host . ' charsets cannot be an empty array');
+ $this->assertTrue(strpos(serialize($info['charsets']), '""') === false, $host . ' charsets in array cannot be empty stringss');
+ $this->assertTrue(strpos(serialize($info['charsets']), ' ') === false, $host . ' charsets in array cannot contain spaces');
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/tests/PHPUnit/Framework/Fixture.php b/tests/PHPUnit/Framework/Fixture.php
index 04c52371da..d66cfdac6e 100644
--- a/tests/PHPUnit/Framework/Fixture.php
+++ b/tests/PHPUnit/Framework/Fixture.php
@@ -243,7 +243,6 @@ class Fixture extends \PHPUnit_Framework_Assert
static::fail("TEST INITIALIZATION FAILED: " . $e->getMessage() . "\n" . $e->getTraceAsString());
}
- include "DataFiles/SearchEngines.php";
include "DataFiles/Socials.php";
include "DataFiles/Providers.php";
diff --git a/tests/PHPUnit/Unit/CommonTest.php b/tests/PHPUnit/Unit/CommonTest.php
index 8aa85ed550..d8817ec3db 100644
--- a/tests/PHPUnit/Unit/CommonTest.php
+++ b/tests/PHPUnit/Unit/CommonTest.php
@@ -465,39 +465,4 @@ class CommonTest extends PHPUnit_Framework_TestCase
{
$this->assertEquals($expected, Common::extractLanguageCodeFromBrowserLanguage($browserLanguage, $validLanguages), "test with {$browserLanguage} failed, expected {$expected}");
}
-
- public function testSearchEnginesDefinedCorrectly()
- {
- include "DataFiles/SearchEngines.php";
-
- $searchEngines = array();
- foreach ($GLOBALS['Piwik_SearchEngines'] as $host => $info) {
- if (isset($info[2]) && $info[2] !== false) {
- $this->assertTrue(strrpos($info[2], "{k}") !== false, $host . " search URL is not defined correctly, must contain the macro {k}");
- }
-
- if (!array_key_exists($info[0], $searchEngines)) {
- $searchEngines[$info[0]] = true;
-
- $this->assertTrue(strpos($host, '{}') === false, $host . " search URL is the master record and should not contain {}");
- }
-
- if (isset($info[3]) && $info[3] !== false) {
- $this->assertTrue(is_array($info[3]) || is_string($info[3]), $host . ' encoding must be either a string or an array');
-
- if (is_string($info[3])) {
- $this->assertTrue(trim($info[3]) !== '', $host . ' encoding cannot be an empty string');
- $this->assertTrue(strpos($info[3], ' ') === false, $host . ' encoding cannot contain spaces');
-
- }
-
- if (is_array($info[3])) {
- $this->assertTrue(count($info[3]) > 0, $host . ' encodings cannot be an empty array');
- $this->assertTrue(strpos(serialize($info[3]), '""') === false, $host . ' encodings in array cannot be empty stringss');
- $this->assertTrue(strpos(serialize($info[3]), ' ') === false, $host . ' encodings in array cannot contain spaces');
- }
- }
- }
- }
-
}
diff --git a/tests/PHPUnit/Unit/UrlHelperTest.php b/tests/PHPUnit/Unit/UrlHelperTest.php
index e972d20d54..cc15d6eeb1 100644
--- a/tests/PHPUnit/Unit/UrlHelperTest.php
+++ b/tests/PHPUnit/Unit/UrlHelperTest.php
@@ -150,32 +150,6 @@ class UrlHelperTest extends \PHPUnit_Framework_TestCase
}
/**
- * Dataprovider for testExtractSearchEngineInformationFromUrl
- */
- public function getSearchEngineUrls()
- {
- return Spyc::YAMLLoad(PIWIK_PATH_TEST_TO_ROOT .'/tests/resources/extractSearchEngineInformationFromUrlTests.yml');
- }
-
- /**
- * @dataProvider getSearchEngineUrls
- * @group Core
- */
- public function testExtractSearchEngineInformationFromUrl($url, $engine, $keywords)
- {
- $this->includeDataFilesForSearchEngineTest();
- $returnedValue = UrlHelper::extractSearchEngineInformationFromUrl($url);
-
- $exptectedValue = false;
-
- if (!empty($engine)) {
- $exptectedValue = array('name' => $engine, 'keywords' => $keywords);
- }
-
- $this->assertEquals($exptectedValue, $returnedValue);
- }
-
- /**
* Dataprovider for testGetLossyUrl
*/
public function getLossyUrls()
@@ -203,11 +177,6 @@ class UrlHelperTest extends \PHPUnit_Framework_TestCase
$this->assertEquals($expected, UrlHelper::getLossyUrl($input));
}
- private function includeDataFilesForSearchEngineTest()
- {
- include "DataFiles/SearchEngines.php";
- }
-
/**
* @group Core
*/