Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsgiehl <stefan@piwik.org>2015-10-31 21:51:52 +0300
committersgiehl <stefan@piwik.org>2015-11-19 01:39:25 +0300
commit2e9aaa9496d38b586d9495c0a39473328630d903 (patch)
treef19f4813aa8dacc5f3335f86a75188d32cfa22c8 /core/UrlHelper.php
parent1f2820a442cce72e5d576b50aa3762174ebe3a77 (diff)
moved definitions of search engines to new repo and converted them to better readable yml format
Diffstat (limited to 'core/UrlHelper.php')
-rw-r--r--core/UrlHelper.php236
1 files changed, 0 insertions, 236 deletions
diff --git a/core/UrlHelper.php b/core/UrlHelper.php
index 4a0ac0fa0a..66a0e64e25 100644
--- a/core/UrlHelper.php
+++ b/core/UrlHelper.php
@@ -259,242 +259,6 @@ class UrlHelper
}
/**
- * Extracts a keyword from a raw not encoded URL.
- * Will only extract keyword if a known search engine has been detected.
- * Returns the keyword:
- * - in UTF8: automatically converted from other charsets when applicable
- * - strtolowered: "QUErY test!" will return "query test!"
- * - trimmed: extra spaces before and after are removed
- *
- * Lists of supported search engines can be found in /core/DataFiles/SearchEngines.php
- * The function returns false when a keyword couldn't be found.
- * eg. if the url is "http://www.google.com/partners.html" this will return false,
- * as the google keyword parameter couldn't be found.
- *
- * @see unit tests in /tests/core/Common.test.php
- * @param string $referrerUrl URL referrer URL, eg. $_SERVER['HTTP_REFERER']
- * @return array|bool false if a keyword couldn't be extracted,
- * or array(
- * 'name' => 'Google',
- * 'keywords' => 'my searched keywords')
- */
- public static function extractSearchEngineInformationFromUrl($referrerUrl)
- {
- $referrerParsed = @parse_url($referrerUrl);
- $referrerHost = '';
- if (isset($referrerParsed['host'])) {
- $referrerHost = $referrerParsed['host'];
- }
- if (empty($referrerHost)) {
- return false;
- }
- // some search engines (eg. Bing Images) use the same domain
- // as an existing search engine (eg. Bing), we must also use the url path
- $referrerPath = '';
- if (isset($referrerParsed['path'])) {
- $referrerPath = $referrerParsed['path'];
- }
-
- // no search query
- if (!isset($referrerParsed['query'])) {
- $referrerParsed['query'] = '';
- }
- $query = $referrerParsed['query'];
-
- // Google Referrers URLs sometimes have the fragment which contains the keyword
- if (!empty($referrerParsed['fragment'])) {
- $query .= '&' . $referrerParsed['fragment'];
- }
-
- $searchEngines = Common::getSearchEngineUrls();
-
- $hostPattern = self::getLossyUrl($referrerHost);
- /*
- * Try to get the best matching 'host' in definitions
- * 1. check if host + path matches an definition
- * 2. check if host only matches
- * 3. check if host pattern + path matches
- * 4. check if host pattern matches
- * 5. special handling
- */
- if (array_key_exists($referrerHost . $referrerPath, $searchEngines)) {
- $referrerHost = $referrerHost . $referrerPath;
- } elseif (array_key_exists($referrerHost, $searchEngines)) {
- // no need to change host
- } elseif (array_key_exists($hostPattern . $referrerPath, $searchEngines)) {
- $referrerHost = $hostPattern . $referrerPath;
- } elseif (array_key_exists($hostPattern, $searchEngines)) {
- $referrerHost = $hostPattern;
- } elseif (!array_key_exists($referrerHost, $searchEngines)) {
- if (!strncmp($query, 'cx=partner-pub-', 15)) {
- // Google custom search engine
- $referrerHost = 'google.com/cse';
- } elseif (!strncmp($referrerPath, '/pemonitorhosted/ws/results/', 28)) {
- // private-label search powered by InfoSpace Metasearch
- $referrerHost = 'wsdsold.infospace.com';
- } elseif (strpos($referrerHost, '.images.search.yahoo.com') != false) {
- // Yahoo! Images
- $referrerHost = 'images.search.yahoo.com';
- } elseif (strpos($referrerHost, '.search.yahoo.com') != false) {
- // Yahoo!
- $referrerHost = 'search.yahoo.com';
- } else {
- return false;
- }
- }
- $searchEngineName = $searchEngines[$referrerHost][0];
- $variableNames = null;
- if (isset($searchEngines[$referrerHost][1])) {
- $variableNames = $searchEngines[$referrerHost][1];
- }
- if (!$variableNames) {
- $searchEngineNames = Common::getSearchEngineNames();
- $url = $searchEngineNames[$searchEngineName];
- $variableNames = $searchEngines[$url][1];
- }
- if (!is_array($variableNames)) {
- $variableNames = array($variableNames);
- }
-
- $key = null;
- if ($searchEngineName === 'Google Images'
- || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false)
- ) {
- if (strpos($query, '&prev') !== false) {
- $query = urldecode(trim(self::getParameterFromQueryString($query, 'prev')));
- $query = str_replace('&', '&amp;', strstr($query, '?'));
- }
- $searchEngineName = 'Google Images';
- } elseif ($searchEngineName === 'Google'
- && (strpos($query, '&as_') !== false || strpos($query, 'as_') === 0)
- ) {
- $keys = array();
- $key = self::getParameterFromQueryString($query, 'as_q');
- if (!empty($key)) {
- array_push($keys, $key);
- }
- $key = self::getParameterFromQueryString($query, 'as_oq');
- if (!empty($key)) {
- array_push($keys, str_replace('+', ' OR ', $key));
- }
- $key = self::getParameterFromQueryString($query, 'as_epq');
- if (!empty($key)) {
- array_push($keys, "\"$key\"");
- }
- $key = self::getParameterFromQueryString($query, 'as_eq');
- if (!empty($key)) {
- array_push($keys, "-$key");
- }
- $key = trim(urldecode(implode(' ', $keys)));
- }
-
- if ($searchEngineName === 'Google') {
- // top bar menu
- $tbm = self::getParameterFromQueryString($query, 'tbm');
- switch ($tbm) {
- case 'isch':
- $searchEngineName = 'Google Images';
- break;
- case 'vid':
- $searchEngineName = 'Google Video';
- break;
- case 'shop':
- $searchEngineName = 'Google Shopping';
- break;
- }
- }
-
- if (empty($key)) {
- foreach ($variableNames as $variableName) {
- if ($variableName[0] == '/') {
- // regular expression match
- if (preg_match($variableName, $referrerUrl, $matches)) {
- $key = trim(urldecode($matches[1]));
- break;
- }
- } else {
- // search for keywords now &vname=keyword
- $key = self::getParameterFromQueryString($query, $variableName);
- $key = trim(urldecode($key));
-
- // Special cases: empty or no keywords
- if (empty($key)
- && (
- // Google search with no keyword
- ($searchEngineName == 'Google'
- && (empty($query) && (empty($referrerPath) || $referrerPath == '/') && empty($referrerParsed['fragment']))
- )
-
- // Yahoo search with no keyword
- || ($searchEngineName == 'Yahoo!'
- && ($referrerParsed['host'] == 'r.search.yahoo.com')
- )
-
- // empty keyword parameter
- || strpos($query, sprintf('&%s=', $variableName)) !== false
- || strpos($query, sprintf('?%s=', $variableName)) !== false
-
- // search engines with no keyword
- || $searchEngineName == 'Ixquick'
- || $searchEngineName == 'Google Images'
- || $searchEngineName == 'DuckDuckGo')
- ) {
- $key = false;
- }
- if (!empty($key)
- || $key === false
- ) {
- break;
- }
- }
- }
- }
-
- // $key === false is the special case "No keyword provided" which is a Search engine match
- if ($key === null
- || $key === ''
- ) {
- return false;
- }
-
- if (!empty($key)) {
- if (function_exists('iconv')
- && isset($searchEngines[$referrerHost][3])
- ) {
- // accepts string, array, or comma-separated list string in preferred order
- $charsets = $searchEngines[$referrerHost][3];
- if (!is_array($charsets)) {
- $charsets = explode(',', $charsets);
- }
-
- if (!empty($charsets)) {
- $charset = $charsets[0];
- if (count($charsets) > 1
- && function_exists('mb_detect_encoding')
- ) {
- $charset = mb_detect_encoding($key, $charsets);
- if ($charset === false) {
- $charset = $charsets[0];
- }
- }
-
- $newkey = @iconv($charset, 'UTF-8//IGNORE', $key);
- if (!empty($newkey)) {
- $key = $newkey;
- }
- }
- }
-
- $key = Common::mb_strtolower($key);
- }
-
- return array(
- 'name' => $searchEngineName,
- 'keywords' => $key,
- );
- }
-
- /**
* Returns the query part from any valid url and adds additional parameters to the query part if needed.
*
* @param string $url Any url eg `"http://example.com/piwik/?foo=bar"`