Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsgiehl <stefan@piwik.org>2016-05-06 21:49:23 +0300
committersgiehl <stefan@piwik.org>2016-07-13 20:22:14 +0300
commit10e8f5ebdf39180bb19636961f52ffc45a395076 (patch)
tree046689af51f8171d6ab1902a23a6c0f53e3a79ac
parent7e5d7148e91514a192c11ea2b5a94097de63b557 (diff)
Improve detection for empty keywords by allowing to define that in search engine definitions
-rw-r--r--plugins/Referrers/SearchEngine.php49
-rw-r--r--plugins/Referrers/images/searchEngines/search.disconnect.me.pngbin0 -> 505 bytes
-rw-r--r--plugins/Referrers/images/searchEngines/search.lookseek.com.pngbin0 -> 991 bytes
-rw-r--r--plugins/Referrers/images/searchEngines/search.smartshopping.com.pngbin0 -> 576 bytes
-rw-r--r--plugins/Referrers/images/searchEngines/searchlock.com.pngbin0 -> 3295 bytes
-rw-r--r--plugins/Referrers/images/searchEngines/startpage.com.pngbin0 -> 738 bytes
-rw-r--r--plugins/Referrers/images/searchEngines/www.only-search.com.pngbin0 -> 789 bytes
-rw-r--r--plugins/Referrers/tests/Unit/SearchEngineTest.php2
-rw-r--r--tests/resources/extractSearchEngineInformationFromUrlTests.yml79
9 files changed, 74 insertions, 56 deletions
diff --git a/plugins/Referrers/SearchEngine.php b/plugins/Referrers/SearchEngine.php
index 6b8cea6eb5..e2970c61ad 100644
--- a/plugins/Referrers/SearchEngine.php
+++ b/plugins/Referrers/SearchEngine.php
@@ -224,11 +224,10 @@ class SearchEngine extends Singleton
$searchEngineName = $definitions['name'];
$variableNames = $definitions['params'];
+ $keywordsHiddenFor = !empty($definitions['hiddenkeyword']) ? $definitions['hiddenkeyword'] : array();
$key = null;
- if ($searchEngineName === 'Google Images'
- || ($searchEngineName === 'Google' && strpos($referrerUrl, '/imgres') !== false)
- ) {
+ if ($searchEngineName === 'Google Images') {
if (strpos($query, '&prev') !== false) {
$query = urldecode(trim(UrlHelper::getParameterFromQueryString($query, 'prev')));
$query = str_replace('&', '&amp;', strstr($query, '?'));
@@ -286,27 +285,13 @@ class SearchEngine extends Singleton
$key = UrlHelper::getParameterFromQueryString($query, $variableName);
$key = trim(urldecode($key));
- // Special cases: empty or no keywords
+ // Special cases: empty keywords
if (empty($key)
&& (
- // Google / Yahoo search with no keyword
- (($searchEngineName == 'Google' || $searchEngineName == 'Yahoo!' || $searchEngineName == 'Yahoo! Japan')
- && (empty($query) && (empty($referrerPath) || $referrerPath == '/' || $referrerPath == '/search') && empty($referrerParsed['fragment']))
- )
-
- // Yahoo search with no keyword
- || ($searchEngineName == 'Yahoo!'
- && ($referrerParsed['host'] == 'r.search.yahoo.com')
- )
-
// empty keyword parameter
- || strpos($query, sprintf('&%s=', $variableName)) !== false
+ strpos($query, sprintf('&%s=', $variableName)) !== false
|| strpos($query, sprintf('?%s=', $variableName)) !== false
-
- // search engines with no keyword
- || $searchEngineName == 'Ixquick'
- || $searchEngineName == 'Google Images'
- || $searchEngineName == 'DuckDuckGo')
+ )
) {
$key = false;
}
@@ -319,6 +304,30 @@ class SearchEngine extends Singleton
}
}
+ // if no keyword found, but empty keywords are allowed
+ if (!empty($keywordsHiddenFor) && ($key === null || $key === '')) {
+
+ $pathWithQueryAndFragment = $referrerPath;
+ if (!empty($query)) {
+ $pathWithQueryAndFragment .= '?'.$query;
+ }
+ if (!empty($referrerParsed['fragment'])) {
+ $pathWithQueryAndFragment .= '#'.$referrerParsed['fragment'];
+ }
+
+ foreach ($keywordsHiddenFor as $path) {
+ if (strlen($path) > 1 && substr($path, 0, 1) == '/' && substr($path, -1, 1) == '/') {
+ if (preg_match($path, $pathWithQueryAndFragment)) {
+ $key = false;
+ break;
+ }
+ } elseif ($path == $pathWithQueryAndFragment) {
+ $key = false;
+ break;
+ }
+ }
+ }
+
// $key === false is the special case "No keyword provided" which is a Search engine match
if ($key === null || $key === '') {
return false;
diff --git a/plugins/Referrers/images/searchEngines/search.disconnect.me.png b/plugins/Referrers/images/searchEngines/search.disconnect.me.png
new file mode 100644
index 0000000000..54b0eac351
--- /dev/null
+++ b/plugins/Referrers/images/searchEngines/search.disconnect.me.png
Binary files differ
diff --git a/plugins/Referrers/images/searchEngines/search.lookseek.com.png b/plugins/Referrers/images/searchEngines/search.lookseek.com.png
new file mode 100644
index 0000000000..ac1ff37a17
--- /dev/null
+++ b/plugins/Referrers/images/searchEngines/search.lookseek.com.png
Binary files differ
diff --git a/plugins/Referrers/images/searchEngines/search.smartshopping.com.png b/plugins/Referrers/images/searchEngines/search.smartshopping.com.png
new file mode 100644
index 0000000000..320ede63d5
--- /dev/null
+++ b/plugins/Referrers/images/searchEngines/search.smartshopping.com.png
Binary files differ
diff --git a/plugins/Referrers/images/searchEngines/searchlock.com.png b/plugins/Referrers/images/searchEngines/searchlock.com.png
new file mode 100644
index 0000000000..54f98183bc
--- /dev/null
+++ b/plugins/Referrers/images/searchEngines/searchlock.com.png
Binary files differ
diff --git a/plugins/Referrers/images/searchEngines/startpage.com.png b/plugins/Referrers/images/searchEngines/startpage.com.png
new file mode 100644
index 0000000000..916df91599
--- /dev/null
+++ b/plugins/Referrers/images/searchEngines/startpage.com.png
Binary files differ
diff --git a/plugins/Referrers/images/searchEngines/www.only-search.com.png b/plugins/Referrers/images/searchEngines/www.only-search.com.png
new file mode 100644
index 0000000000..543c95d88e
--- /dev/null
+++ b/plugins/Referrers/images/searchEngines/www.only-search.com.png
Binary files differ
diff --git a/plugins/Referrers/tests/Unit/SearchEngineTest.php b/plugins/Referrers/tests/Unit/SearchEngineTest.php
index e9f0c926ea..1b5cab4dbc 100644
--- a/plugins/Referrers/tests/Unit/SearchEngineTest.php
+++ b/plugins/Referrers/tests/Unit/SearchEngineTest.php
@@ -121,7 +121,7 @@ class SearchEngineTest extends \PHPUnit_Framework_TestCase
public function testMissingSearchEngineKeyword($url, $searchEngine)
{
$name = parse_url('http://' . $url);
- $this->assertTrue(!empty($searchEngine['params']), $name['host']);
+ $this->assertTrue(!empty($searchEngine['params']) || !empty($searchEngine['hiddenkeyword']), $name['host']);
}
/**
diff --git a/tests/resources/extractSearchEngineInformationFromUrlTests.yml b/tests/resources/extractSearchEngineInformationFromUrlTests.yml
index 6043e0d32f..c300e768bc 100644
--- a/tests/resources/extractSearchEngineInformationFromUrlTests.yml
+++ b/tests/resources/extractSearchEngineInformationFromUrlTests.yml
@@ -284,25 +284,6 @@
engine: 'Baidu'
keywords: 'test3'
-# Google SSL hidden keyword not defined
-- url: 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0CC&url=http%3A%2F%2Fpiwik.org%2F&ei=&usg='
- engine: 'Google'
- keywords: false
-
-# Yet another change http://googlewebmastercentral.blogspot.ca/2012/03/upcoming-changes-in-googles-http.html
-- url: 'https://www.google.com/'
- engine: 'Google'
- keywords: false
-
-- url: 'https://www.google.co.uk/'
- engine: 'Google'
- keywords: false
-
-# without trailing slash
-- url: 'https://www.google.co.uk'
- engine: 'Google'
- keywords: false
-
- url: 'http://search.naver.com/search.naver?where=nexearch&query=FAU+&x=0&y=0&sm=top_hty&fbm=1&ie=utf8'
engine: 'Naver'
keywords: 'fau'
@@ -320,21 +301,6 @@
engine: 'Daum'
keywords: '검색 질문형 검색어 결혼 후 걱정 1위'
-# DDG
-- url: 'http://duckduckgo.com/post.html'
- engine: 'DuckDuckGo'
- keywords: false
-
-# Google images no keyword
-- url: 'http://www.google.com/imgres?hl=en&client=ubuntu&hs=xDb&sa=X&channel=fs&biw=1920&bih=1084&tbm=isch&prmd=imvns&tbnid=5i7iz7u4LPSSrM:&imgrefurl=http://helloworld/trac/wiki/HowToSetupDevelopmentEnvironmentWindows&docid=tWN9OesMyOTqsM&imgurl=http://helloworld.org/trac/raw-attachment/wiki/HowToSetupDevelopmentEnvironmentWindows/eclipse-preview.jpg&w=1000&h=627&ei=pURoT67BEdT74QTUzYiSCQ&zoom=1&iact=hc&vpx=1379&vpy=548&dur=513&hovh=178&hovw=284&tx=134&ty=105&sig=108396332168858896950&page=1&tbnh=142&tbnw=227&start=0&ndsp=37&ved=1t:429,r:5,s:0'
- engine: 'Google Images'
- keywords: false
-
-# Google images no keyword next try
-- url: 'http://www.google.fr/imgres?hl=en&biw=1680&bih=925&gbv=2&tbm=isch&tbnid=kBma1eg8aVOKoM:&imgrefurl=http://www.squido.com/research-keywords&docid=YSY3GQh3O8dkjM&imgurl=http://i3.squidocdn.com/resize/squidoo_images/590/draft_lens10233921module148408128photo_1298307262Research_keywords_6.jpg&w=590&h=412&ei=_OVZT4_3EInQ8gOWuqXbDg&zoom=1&iact=hc&vpx=164&vpy=205&dur=33&hovh=188&hovw=269&tx=137&ty=89&sig=113944581904793140725&page=1&tbnh=109&tbnw=156&start=0&ndsp=42&ved=1t:429,r:0,s:0www.google.fr/imgres?hl=en&biw=1680&bih=925&gbv=2&tbm=isch&tbnid=kBma1eg8aVOKoM:&imgrefurl=http://www.squido.com/research-keywords&docid=YSY3GQh3O8dkjM&imgurl=http://i3.squidocdn.com/resize/squidoo_images/590/draft_lens10233921module148408128photo_1298307262Research_keywords_6.jpg&w=590&h=412&ei=_OVZT4_3EInQ8gOWuqXbDg&zoom=1&iact=hc&vpx=164&vpy=205&dur=33&hovh=188&hovw=269&tx=137&ty=89&sig=113944581904793140725&page=1&tbnh=109&tbnw=156&start=0&ndsp=42&ved=1t:429,r:0,s:0'
- engine: 'Google Images'
- keywords: false
-
# conduit.com
- url: 'http://images.search.conduit.com/ImagePreview/?q=test+5&ctid=CT2431245&SearchSource=13&PageSource=HomePage&start=105&pos=33'
engine: 'Conduit.com'
@@ -484,15 +450,58 @@
engine: 'Sputnik'
keywords: 'микро'
-# No keyword
- url: 'http://yandex.ru/clck/jsredir?from=yandex.ru;yandsearch;web;;&text=&etext=385.4_2Hh6u_q9NEfpXpGpdVughcGncWYG-_kwHJA-7QxQ8v4xvt5Q2aAB7TvvUxHLtacHMltCoYGQFFmIdiXaIT-_yiHqjEJoZKVdHIXJylYsQ5TJuxRtqCDA0zUi_xlatVD6kx219rIP4Q4a7j9E7-2U88ZpCZwGXuhRws6LASTZUIJRfiPbVdxjIn2Qu5bCtcGKIQBqGa567Czx019cxaPvNAWQQ_8MIJjUgFHzg2vO_XvlSMmKOcooZNX5UzqgJAnaioMW7884jsYEKwXebrij39unXyWKnLXDX15607fkXqQFGIC_tp8zvjXq0ynizqcdQcfkHnZG-zxxPqCoALAWj47hwRCZtLGinfqMatmzFWG7Yo7eWxScEHyMI2J89OU2ZjpuHog0VyZpSb3hN17-CdHWEeN_ii1mLG_J24ftGMEpbWOeH-M3fZeAtCzmq0XUFchFAbVvm9Xmk8I2M-4A.66cd118e1c9292f7ec030c8580f6912eae4ac700&uuid=&state=AiuY0DBWFJ4ePaEse6rgeAjgs2pI3DW99KUdgowt9XsGes-COYeAtjuEaMUoBSHP2gxXC4630Mz4aEvXYUCXRTGAgAQwM7IGD-gsizkhSmBNCEfle91ZI3guOwMFOli3aeHzkqoQeuyYhvz_XwXodFz8gB8yMp6IgAL52sHwR5edKVNpZtbPIFNbLDRYIxJbYQciYGnLnCw_i584OfCtQO-zjBBGMlwoQFtGet-Xvmw&data=UlNrNmk5WktYejR0eWJFYk1LdmtxdE5aS05CUWU0alhkSkF1MEpOb0Jrc0dpbmNsUGhaVjljRWt6R0VackFURk5sM1psNlVKMWh6djhYazhRT1psQTdHamFGSFJacDFhQjdfbHJQU05jeDJMRHV0MTJmRG53Zw&b64e=2&sign=9072743a841f27dd5e766c4b57fa5138&keyno=0&l10n=ru'
engine: 'Yandex'
keywords: ''
+##########################################
+# No keyword tests
- url: 'http://r.search.yahoo.com/_ylt=A9mSs2YFMt1ThykAAaOA3YlQ;_ylu=X3oDMTBydWpobjZlBHNlYwNzcgRwb3MDMQRjb2xvA2lyMgR2dGlkAw--/RV=2/RE=1407033989/RO=10/RU=http://www.something.com//RK=0/RS=YOw3nEcdnM8kysqLyl4DzpAHnDo-'
engine: 'Yahoo!'
keywords: false
+- url: 'https://search.disconnect.me/searchTerms/serp?search=da616d2a-c376-4469-84be-8f38e4573e32'
+ engine: 'DisconnectSearch'
+ keywords: false
+
+- url: 'https://ixquick.com/do/asearch'
+ engine: 'Ixquick'
+ keywords: false
+
+# DuckDuckGo never provides a keyword
+- url: 'http://duckduckgo.com/post.html'
+ engine: 'DuckDuckGo'
+ keywords: false
+
+## Google Images provides none
+- url: 'http://www.google.com/imgres?hl=en&client=ubuntu&hs=xDb&sa=X&channel=fs&biw=1920&bih=1084&tbm=isch&prmd=imvns&tbnid=5i7iz7u4LPSSrM:&imgrefurl=http://helloworld/trac/wiki/HowToSetupDevelopmentEnvironmentWindows&docid=tWN9OesMyOTqsM&imgurl=http://helloworld.org/trac/raw-attachment/wiki/HowToSetupDevelopmentEnvironmentWindows/eclipse-preview.jpg&w=1000&h=627&ei=pURoT67BEdT74QTUzYiSCQ&zoom=1&iact=hc&vpx=1379&vpy=548&dur=513&hovh=178&hovw=284&tx=134&ty=105&sig=108396332168858896950&page=1&tbnh=142&tbnw=227&start=0&ndsp=37&ved=1t:429,r:5,s:0'
+ engine: 'Google Images'
+ keywords: false
+
+- url: 'http://www.google.fr/imgres?hl=en&biw=1680&bih=925&gbv=2&tbm=isch&tbnid=kBma1eg8aVOKoM:&imgrefurl=http://www.squido.com/research-keywords&docid=YSY3GQh3O8dkjM&imgurl=http://i3.squidocdn.com/resize/squidoo_images/590/draft_lens10233921module148408128photo_1298307262Research_keywords_6.jpg&w=590&h=412&ei=_OVZT4_3EInQ8gOWuqXbDg&zoom=1&iact=hc&vpx=164&vpy=205&dur=33&hovh=188&hovw=269&tx=137&ty=89&sig=113944581904793140725&page=1&tbnh=109&tbnw=156&start=0&ndsp=42&ved=1t:429,r:0,s:0www.google.fr/imgres?hl=en&biw=1680&bih=925&gbv=2&tbm=isch&tbnid=kBma1eg8aVOKoM:&imgrefurl=http://www.squido.com/research-keywords&docid=YSY3GQh3O8dkjM&imgurl=http://i3.squidocdn.com/resize/squidoo_images/590/draft_lens10233921module148408128photo_1298307262Research_keywords_6.jpg&w=590&h=412&ei=_OVZT4_3EInQ8gOWuqXbDg&zoom=1&iact=hc&vpx=164&vpy=205&dur=33&hovh=188&hovw=269&tx=137&ty=89&sig=113944581904793140725&page=1&tbnh=109&tbnw=156&start=0&ndsp=42&ved=1t:429,r:0,s:0'
+ engine: 'Google Images'
+ keywords: false
+
+# Google SSL hidden keyword not defined
+- url: 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&ved=0CC&url=http%3A%2F%2Fpiwik.org%2F&ei=&usg='
+ engine: 'Google'
+ keywords: false
+
+# Yet another change http://googlewebmastercentral.blogspot.ca/2012/03/upcoming-changes-in-googles-http.html
+- url: 'https://www.google.com/'
+ engine: 'Google'
+ keywords: false
+
+- url: 'https://www.google.co.uk/'
+ engine: 'Google'
+ keywords: false
+
+# without trailing slash
+- url: 'https://www.google.co.uk'
+ engine: 'Google'
+ keywords: false
+
+
# No search
- url: 'http://googleads.g.doubleclick.net/pagead/ads?client=ca-pub-x&output=html&h=15&slotname=2973049897&adk=3777420323&w=728&lmt=1381755030&flash=11.9.900.117&url=http%3A%2F%2Fexample.com%2F&dt=1381755030169&bpp=8&bdt=2592&shv=r20131008&cbv=r20130906&saldr=sa&correlator=1381755030200&frm=20&ga_vid=1659309719.1381755030&ga_sid=1381755030&ga_hid=1569070879&ga_fc=0&u_tz=660&u_his=3&u_java=1&u_h=768&u_w=1366&u_ah=728&u_aw=1366&u_cd=24&u_nplug=0&u_nmime=0&dff=times%20new%20roman&dfs=13&adx=311&ady=107&biw=1349&bih=673&oid=2&ref=http%3A%2F%2Fwww.google.com.au%2Furl%3Fsa%3Dt%26rct%3Dj%26q%3D%26esrc%3Ds%26frm%3D1%26source%3Dweb%26cd%3D10%26ved%3D0CGcQFjAJ%26url%3Dhttp%253A%252F%252Fexample.com%252F%26ei%3DXNtbUvrJPKXOiAfw1IH4Bw%26usg%3DAFQjCNE66zRf2zaUw8FKf0JWxiM1FiXHVg&vis=1&fu=0&ifi=1&pfi=32&dtd=122&xpc=tBekiCZTWM&p=http%3A//example.com&rl_rc=true&adsense_enabled=true&ad_type=text_image&oe=utf8&height=15&width=728&format=fp_al_lp&kw_type=radlink&prev_fmts=728x15_0ads_al&rt=ChBSW-iYAADltAqmmOfZAA2SEg1BbmltYXRlZCBUZXh0Ggj019wBciBqgSgBUhMI8OHhzq6WugIVhJOmCh2NYQBO&hl=en&kw0=Animated+Text&kw1=Animated+GIF&kw2=Animated+Graphics&kw3=Fonts&okw=Animated+Text'
engine: