Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/Referrers/SearchEngine.php')
-rw-r--r--plugins/Referrers/SearchEngine.php243
1 files changed, 108 insertions, 135 deletions
diff --git a/plugins/Referrers/SearchEngine.php b/plugins/Referrers/SearchEngine.php
index 81c53f8db7..df48d9663a 100644
--- a/plugins/Referrers/SearchEngine.php
+++ b/plugins/Referrers/SearchEngine.php
@@ -7,6 +7,7 @@
*
*/
namespace Piwik\Plugins\Referrers;
+
use Piwik\Cache;
use Piwik\Common;
use Piwik\Option;
@@ -31,35 +32,34 @@ class SearchEngine extends Singleton
*
* @return array Array of ( URL => array( searchEngineName, keywordParameter, path, charset ) )
*/
- public function getSearchEngineDefinitions()
+ public function getDefinitions()
{
- $cache = Cache::getEagerCache();
+ $cache = Cache::getEagerCache();
$cacheId = 'SearchEngine-' . self::OPTION_STORAGE_NAME;
if ($cache->contains($cacheId)) {
$list = $cache->fetch($cacheId);
} else {
- $list = $this->loadSearchEngineDefinitions();
+ $list = $this->loadDefinitions();
$cache->save($cacheId, $list);
}
return $list;
}
- private function loadSearchEngineDefinitions()
+ private function loadDefinitions()
{
if ($this->definitionList === null) {
// Read first from the auto-updated list in database
$list = Option::get(self::OPTION_STORAGE_NAME);
if ($list) {
- $this->definitionList = unserialize($list);
+ $this->definitionList = unserialize(base64_decode($list));
} else {
// Fallback to reading the bundled list
- $yml = file_get_contents(PIWIK_INCLUDE_PATH . self::DEFINITION_FILE);
+ $yml = file_get_contents(PIWIK_INCLUDE_PATH . self::DEFINITION_FILE);
$this->definitionList = $this->loadYmlData($yml);
- Option::set(self::OPTION_STORAGE_NAME, serialize($this->definitionList));
-
+ Option::set(self::OPTION_STORAGE_NAME, base64_encode(serialize($this->definitionList)));
}
}
@@ -92,7 +92,7 @@ class SearchEngine extends Singleton
$searchEngineData = $urlDefinitions;
unset($searchEngineData['urls']);
$searchEngineData['name'] = $name;
- $urlToInfo[$url] = $searchEngineData;
+ $urlToInfo[$url] = $searchEngineData;
}
}
}
@@ -102,18 +102,16 @@ class SearchEngine extends Singleton
/**
* Returns list of search engines by name
*
- * @see core/DataFiles/SearchEngines.php
- *
* @return array Array of ( searchEngineName => URL )
*/
- public function getSearchEngineNames()
+ public function getNames()
{
- $cacheId = 'SearchEngine.getSearchEngineNames';
- $cache = Cache::getTransientCache();
+ $cacheId = 'SearchEngine.getSearchEngineNames';
+ $cache = Cache::getTransientCache();
$nameToUrl = $cache->fetch($cacheId);
if (empty($nameToUrl)) {
- $searchEngines = $this->getSearchEngineDefinitions();
+ $searchEngines = $this->getDefinitions();
$nameToUrl = array();
foreach ($searchEngines as $url => $info) {
@@ -135,7 +133,7 @@ class SearchEngine extends Singleton
*/
public function getDefinitionByHost($host)
{
- $searchEngines = $this->getSearchEngineDefinitions();
+ $searchEngines = $this->getDefinitions();
if (!array_key_exists($host, $searchEngines)) {
return array();
@@ -145,55 +143,6 @@ class SearchEngine extends Singleton
}
/**
- * Returns defined parameters for the given search engine host
- * @param string $host
- * @return array
- */
- public function getParameterNamesByHost($host)
- {
- $definition = $this->getDefinitionByHost($host);
-
- if (empty($definition['params'])) {
- return array();
- }
-
- return $definition['params'];
- }
-
- /**
- * Returns defined backlink for the given search engine host
- * @param string $host
- * @return string|null
- */
- public function getBackLinkPatternByHost($host)
- {
- $definition = $this->getDefinitionByHost($host);
-
- if (empty($definition['backlink'])) {
- return null;
- }
-
- return $definition['backlink'];
- }
-
- /**
- * Returns defined charsets for given search engine host
- *
- * @param string $host
- * @return array
- */
- public function getCharsetsByHost($host)
- {
- $definition = $this->getDefinitionByHost($host);
-
- if (empty($definition['charsets'])) {
- return array();
- }
-
- return $definition['charsets'];
- }
-
- /**
* Extracts a keyword from a raw not encoded URL.
* Will only extract keyword if a known search engine has been detected.
* Returns the keyword:
@@ -201,7 +150,6 @@ class SearchEngine extends Singleton
* - strtolowered: "QUErY test!" will return "query test!"
* - trimmed: extra spaces before and after are removed
*
- * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php
* The function returns false when a keyword couldn't be found.
* eg. if the url is "http://www.google.com/partners.html" this will return false,
* as the google keyword parameter couldn't be found.
@@ -216,7 +164,7 @@ class SearchEngine extends Singleton
public function extractInformationFromUrl($referrerUrl)
{
$referrerParsed = @parse_url($referrerUrl);
- $referrerHost = '';
+ $referrerHost = '';
if (isset($referrerParsed['host'])) {
$referrerHost = $referrerParsed['host'];
}
@@ -230,55 +178,26 @@ class SearchEngine extends Singleton
$referrerPath = $referrerParsed['path'];
}
- // no search query
- if (!isset($referrerParsed['query'])) {
- $referrerParsed['query'] = '';
+ $query = '';
+ if (isset($referrerParsed['query'])) {
+ $query = $referrerParsed['query'];
}
- $query = $referrerParsed['query'];
// Google Referrers URLs sometimes have the fragment which contains the keyword
if (!empty($referrerParsed['fragment'])) {
$query .= '&' . $referrerParsed['fragment'];
}
- $searchEngines = $this->getSearchEngineDefinitions();
+ $referrerHost = $this->getEngineHostFromUrl($referrerHost, $referrerPath, $query);
- $hostPattern = UrlHelper::getLossyUrl($referrerHost);
- /*
- * Try to get the best matching 'host' in definitions
- * 1. check if host + path matches an definition
- * 2. check if host only matches
- * 3. check if host pattern + path matches
- * 4. check if host pattern matches
- * 5. special handling
- */
- if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) {
- $referrerHost = $referrerHost . $referrerPath;
- } elseif (array_key_exists($referrerHost, $searchEngines)) {
- // no need to change host
- } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) {
- $referrerHost = $hostPattern . $referrerPath;
- } elseif (array_key_exists($hostPattern, $searchEngines)) {
- $referrerHost = $hostPattern;
- } elseif (!array_key_exists($referrerHost, $searchEngines)) {
- if (!strncmp($query, 'cx=partner-pub-', 15)) {
- // Google custom search engine
- $referrerHost = 'google.com/cse';
- } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) {
- // private-label search powered by InfoSpace Metasearch
- $referrerHost = 'wsdsold.infospace.com';
- } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) {
- // Yahoo! Images
- $referrerHost = 'images.search.yahoo.com';
- } elseif (strpos($referrerHost, '.search.yahoo.com') != false) {
- // Yahoo!
- $referrerHost = 'search.yahoo.com';
- } else {
- return false;
- }
+ if (empty($referrerHost)) {
+ return false;
}
- $searchEngineName = $searchEngines[$referrerHost]['name'];
- $variableNames = $this->getParameterNamesByHost($referrerHost);
+
+ $definitions = $this->getDefinitionByHost($referrerHost);
+
+ $searchEngineName = $definitions['name'];
+ $variableNames = $definitions['params'];
$key = null;
if ($searchEngineName === 'Google Images'
@@ -293,7 +212,7 @@ class SearchEngine extends Singleton
&& (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)
) {
$keys = array();
- $key = UrlHelper::getParameterFromQueryString($query, 'as_q');
+ $key = UrlHelper::getParameterFromQueryString($query, 'as_q');
if (!empty($key)) {
array_push($keys, $key);
}
@@ -380,36 +299,90 @@ class SearchEngine extends Singleton
}
if (!empty($key)) {
- $charsets = $this->getCharsetsByHost($referrerHost);
-
- if (function_exists('iconv')
- && !empty($charsets)
- ) {
- $charset = $charsets[0];
- if (count($charsets) > 1
- && function_exists('mb_detect_encoding')
- ) {
- $charset = mb_detect_encoding($key, $charsets);
- if ($charset === false) {
- $charset = $charsets[0];
- }
- }
-
- $newkey = @iconv($charset, 'UTF-8//IGNORE', $key);
- if (!empty($newkey)) {
- $key = $newkey;
- }
+ if (!empty($definitions['charsets'])) {
+ $key = $this->convertCharset($key, $definitions['charsets']);
}
-
$key = Common::mb_strtolower($key);
}
return array(
- 'name' => $searchEngineName,
+ 'name' => $searchEngineName,
'keywords' => $key,
);
}
+ protected function getEngineHostFromUrl($host, $path, $query)
+ {
+ $searchEngines = $this->getDefinitions();
+
+ $hostPattern = UrlHelper::getLossyUrl($host);
+ /*
+ * Try to get the best matching 'host' in definitions
+ * 1. check if host + path matches an definition
+ * 2. check if host only matches
+ * 3. check if host pattern + path matches
+ * 4. check if host pattern matches
+ * 5. special handling
+ */
+ if (array_key_exists($host . $path, $searchEngines)) {
+ $host = $host . $path;
+ } elseif (array_key_exists($host, $searchEngines)) {
+ // no need to change host
+ } elseif (array_key_exists($hostPattern . $path, $searchEngines)) {
+ $host = $hostPattern . $path;
+ } elseif (array_key_exists($hostPattern, $searchEngines)) {
+ $host = $hostPattern;
+ } elseif (!array_key_exists($host, $searchEngines)) {
+ if (!strncmp($query, 'cx=partner-pub-', 15)) {
+ // Google custom search engine
+ $host = 'google.com/cse';
+ } elseif (!strncmp($path, '/pemonitorhosted/ws/results/', 28)) {
+ // private-label search powered by InfoSpace Metasearch
+ $host = 'wsdsold.infospace.com';
+ } elseif (strpos($host, '.images.search.yahoo.com') != false) {
+ // Yahoo! Images
+ $host = 'images.search.yahoo.com';
+ } elseif (strpos($host, '.search.yahoo.com') != false) {
+ // Yahoo!
+ $host = 'search.yahoo.com';
+ } else {
+ return false;
+ }
+ }
+
+ return $host;
+ }
+
+ /**
+ * Tries to convert the given string from one of the given charsets to UTF-8
+ * @param string $string
+ * @param array $charsets
+ * @return string
+ */
+ protected function convertCharset($string, $charsets)
+ {
+ if (function_exists('iconv')
+ && !empty($charsets)
+ ) {
+ $charset = $charsets[0];
+ if (count($charsets) > 1
+ && function_exists('mb_detect_encoding')
+ ) {
+ $charset = mb_detect_encoding($string, $charsets);
+ if ($charset === false) {
+ $charset = $charsets[0];
+ }
+ }
+
+ $newKey = @iconv($charset, 'UTF-8//IGNORE', $string);
+ if (!empty($newKey)) {
+ $string = $newKey;
+ }
+ }
+
+ return $string;
+ }
+
/**
* Return search engine URL by name
*
@@ -420,7 +393,7 @@ class SearchEngine extends Singleton
*/
public function getUrlFromName($name)
{
- $searchEngineNames = $this->getSearchEngineNames();
+ $searchEngineNames = $this->getNames();
if (isset($searchEngineNames[$name])) {
$url = 'http://' . $searchEngineNames[$name];
} else {
@@ -456,7 +429,7 @@ class SearchEngine extends Singleton
*/
public function getLogoFromUrl($url)
{
- $pathInPiwik = 'plugins/Referrers/images/searchEngines/%s.png';
+ $pathInPiwik = 'plugins/Referrers/images/searchEngines/%s.png';
$pathWithCode = sprintf($pathInPiwik, $this->getHostFromUrl($url));
$absolutePath = PIWIK_INCLUDE_PATH . '/' . $pathWithCode;
if (file_exists($absolutePath)) {
@@ -481,12 +454,12 @@ class SearchEngine extends Singleton
}
$keyword = urlencode($keyword);
$keyword = str_replace(urlencode('+'), urlencode(' '), $keyword);
- $host = substr($url, strpos($url, '//') + 2);
- $path = SearchEngine::getInstance()->getBackLinkPatternByHost($host);
- if (empty($path)) {
+ $host = substr($url, strpos($url, '//') + 2);
+ $definition = $this->getDefinitionByHost($host);
+ if (empty($definition['backlink'])) {
return false;
}
- $path = str_replace("{k}", $keyword, $path);
+ $path = str_replace("{k}", $keyword, $definition['backlink']);
return $url . (substr($url, -1) != '/' ? '/' : '') . $path;
}
}