diff options
11 files changed, 45 insertions, 16 deletions
diff --git a/core/Tracker/VisitExcluded.php b/core/Tracker/VisitExcluded.php index 243618bc51..8d412cef26 100644 --- a/core/Tracker/VisitExcluded.php +++ b/core/Tracker/VisitExcluded.php @@ -349,12 +349,13 @@ class VisitExcluded * Returns true if the specified user agent should be excluded for the current site or not. * * Visits whose user agent string contains one of the excluded_user_agents strings for the - * site being tracked (or one of the global strings) will be excluded. + * site being tracked (or one of the global strings) will be excluded. Regular expressions + * are also supported. * * @internal param string $this ->userAgent The user agent string. * @return bool */ - protected function isUserAgentExcluded() + protected function isUserAgentExcluded(): bool { $excludedAgents = $this->getAttributes('excluded_user_agents', 'global_excluded_user_agents'); @@ -364,6 +365,10 @@ class VisitExcluded if (stripos($this->userAgent, $excludedUserAgent) !== false) { return true; } + // if the string is a valid regex, and the user agent matches, this visit should be excluded + if (@preg_match($excludedUserAgent, null) !== false) { + return preg_match($excludedUserAgent, $this->userAgent) ? true : false; + } } } diff --git a/plugins/IntranetMeasurable/tests/UI/expected-screenshots/IntranetMeasurable_intranet_create.png b/plugins/IntranetMeasurable/tests/UI/expected-screenshots/IntranetMeasurable_intranet_create.png index 946407d4c8..060b6eca41 100644 --- a/plugins/IntranetMeasurable/tests/UI/expected-screenshots/IntranetMeasurable_intranet_create.png +++ b/plugins/IntranetMeasurable/tests/UI/expected-screenshots/IntranetMeasurable_intranet_create.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7e4ac1b4f6062ecf0c3f675b58b859d8d1caef1b8eabb8214d4d8a325e00697e -size 295334 +oid sha256:f96fc9c2c0aaec0dd0264b6c24e09c40da7d2b3b17b218584b8e8bdf1d109e0e +size 301800 diff --git a/plugins/SitesManager/SitesManager.php b/plugins/SitesManager/SitesManager.php index fbf6af7278..8a40bc4a98 100644 --- a/plugins/SitesManager/SitesManager.php +++ b/plugins/SitesManager/SitesManager.php @@ -385,6 +385,7 @@ class SitesManager extends \Piwik\Plugin $translationKeys[] = "SitesManager_GlobalExcludedUserAgentHelp1"; $translationKeys[] = "SitesManager_GlobalListExcludedUserAgents_Desc"; $translationKeys[] = "SitesManager_GlobalExcludedUserAgentHelp2"; + $translationKeys[] = "SitesManager_GlobalExcludedUserAgentHelp3"; $translationKeys[] = "SitesManager_WebsitesManagement"; $translationKeys[] = "SitesManager_MainDescription"; $translationKeys[] = "SitesManager_YouCurrentlyHaveAccessToNWebsites"; diff --git a/plugins/SitesManager/lang/en.json b/plugins/SitesManager/lang/en.json index 3dd108c0b8..08b7131082 100644 --- a/plugins/SitesManager/lang/en.json +++ b/plugins/SitesManager/lang/en.json @@ -28,6 +28,7 @@ "Format_Utc": "UTC%s", "GlobalExcludedUserAgentHelp1": "Enter the list of user agents to exclude from being tracked by Matomo.", "GlobalExcludedUserAgentHelp2": "You can use this to exclude some bots from being tracked.", + "GlobalExcludedUserAgentHelp3": "Regular expressions such as %s are supported.", "GlobalListExcludedIps": "Global list of Excluded IPs", "GlobalListExcludedQueryParameters": "Global list of Query URL parameters to exclude", "GlobalListExcludedUserAgents": "Global list of user agents to exclude", @@ -46,7 +47,7 @@ "KeepURLFragmentsLong": "Keep Page URL fragments when tracking Page URLs", "ListOfIpsToBeExcludedOnAllWebsites": "The IPs below will be excluded from being tracked on all websites.", "ListOfQueryParametersToBeExcludedOnAllWebsites": "The Query URLs parameters below will be excluded from URLs on all websites.", - "ListOfQueryParametersToExclude": "Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as %s are suported.", + "ListOfQueryParametersToExclude": "Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as %s are supported.", "LogAnalytics": "Log Analytics", "LogAnalyticsDescription": "If the Javascript tracking method isn’t feasible, you can %1$suse server log analytics%2$s as an alternative method for tracking your website’s users.", "MainDescription": "Your Web Analytics reports need Websites! Add, update, delete Websites, and show the JavaScript to insert in your pages.", diff --git a/plugins/SitesManager/templates/help/excluded-user-agents-help.html b/plugins/SitesManager/templates/help/excluded-user-agents-help.html index 52c49a17cc..ef2fa2aad4 100644 --- a/plugins/SitesManager/templates/help/excluded-user-agents-help.html +++ b/plugins/SitesManager/templates/help/excluded-user-agents-help.html @@ -4,4 +4,5 @@ <br/><br/> {{ 'SitesManager_GlobalListExcludedUserAgents_Desc'|translate }} {{ 'SitesManager_GlobalExcludedUserAgentHelp2'|translate }} + {{ 'SitesManager_GlobalExcludedUserAgentHelp3'|translate:'/bot|spider|crawl|scanner/i' }} </div> diff --git a/plugins/SitesManager/tests/System/expected/test_SitesManager__SitesManager.getSiteSettings.xml b/plugins/SitesManager/tests/System/expected/test_SitesManager__SitesManager.getSiteSettings.xml index 0fd12d6bff..624774cb7d 100644 --- a/plugins/SitesManager/tests/System/expected/test_SitesManager__SitesManager.getSiteSettings.xml +++ b/plugins/SitesManager/tests/System/expected/test_SitesManager__SitesManager.getSiteSettings.xml @@ -99,7 +99,7 @@ https://www.example.org/</placeholder> </uiControlAttributes> <availableValues /> <description /> - <inlineHelp>Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as /^sess.*|.*[dD]ate$/ are suported.<br /><br />Matomo will automatically exclude the common session parameters (phpsessid, sessionid, ...).</inlineHelp> + <inlineHelp>Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as /^sess.*|.*[dD]ate$/ are supported.<br /><br />Matomo will automatically exclude the common session parameters (phpsessid, sessionid, ...).</inlineHelp> <templateFile /> <introduction /> <condition /> @@ -119,7 +119,7 @@ https://www.example.org/</placeholder> </uiControlAttributes> <availableValues /> <description /> - <inlineHelp>Enter the list of user agents to exclude from being tracked by Matomo.<br /><br />If the visitor's user agent string contains any of the strings you specify, the visitor will be excluded from Matomo.<br />You can use this to exclude some bots from being tracked.</inlineHelp> + <inlineHelp>Enter the list of user agents to exclude from being tracked by Matomo.<br /><br />If the visitor's user agent string contains any of the strings you specify, the visitor will be excluded from Matomo.<br />You can use this to exclude some bots from being tracked. Regular expressions such as /bot|spider|crawl|scanner/i are supported.</inlineHelp> <templateFile /> <introduction /> <condition /> diff --git a/plugins/SitesManager/tests/UI/expected-screenshots/SitesManager_global_settings.png b/plugins/SitesManager/tests/UI/expected-screenshots/SitesManager_global_settings.png index a5355451c6..2db69b918e 100644 --- a/plugins/SitesManager/tests/UI/expected-screenshots/SitesManager_global_settings.png +++ b/plugins/SitesManager/tests/UI/expected-screenshots/SitesManager_global_settings.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:922124586599585bbc71df0f9813ec59ea26e96de538f4d75cb009d4c272fe30 -size 247100 +oid sha256:de015ca1177509f696d12d07bfdc10c034f90d688c143d544b5fcb9fc997e8f7 +size 254454 diff --git a/plugins/WebsiteMeasurable/MeasurableSettings.php b/plugins/WebsiteMeasurable/MeasurableSettings.php index c4e384aa3c..749a32e8a3 100644 --- a/plugins/WebsiteMeasurable/MeasurableSettings.php +++ b/plugins/WebsiteMeasurable/MeasurableSettings.php @@ -235,7 +235,9 @@ class MeasurableSettings extends \Piwik\Settings\Measurable\MeasurableSettings . '<br /><br />' . Piwik::translate('SitesManager_GlobalListExcludedUserAgents_Desc') . '<br />' - . Piwik::translate('SitesManager_GlobalExcludedUserAgentHelp2'); + . Piwik::translate('SitesManager_GlobalExcludedUserAgentHelp2') . " " + . Piwik::translate('SitesManager_GlobalExcludedUserAgentHelp3', "/bot|spider|crawl|scanner/i") + ; $field->uiControl = FieldConfig::UI_CONTROL_TEXTAREA; $field->uiControlAttributes = array('cols' => '20', 'rows' => '4'); $field->transform = function ($value) use ($self) { diff --git a/tests/PHPUnit/Integration/Tracker/VisitTest.php b/tests/PHPUnit/Integration/Tracker/VisitTest.php index f7d34d3be6..8ca50e386a 100644 --- a/tests/PHPUnit/Integration/Tracker/VisitTest.php +++ b/tests/PHPUnit/Integration/Tracker/VisitTest.php @@ -234,6 +234,25 @@ class VisitTest extends IntegrationTestCase '12&^%345' => true, 'sfasdf' => false, )), + array( '/bot|spider|crawl|scanner/i', array( // case insensitive regex + 'Mozilla/5.0 (compatible; SISTRIX Crawler; http://crawler.sistrix.net/)' => true, + 'Googlebot/2.1 (+http://www.google.com/bot.html)' => true, + 'Mozilla/5.0 (compatible; adscanner/)' => true, + 'Baiduspider+(+http://www.baidu.com/search/spider.htm)' => true, + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36' => false, + 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko' => false, + )), + array('/google|yahoo/', array( // case sensitive regex + 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' => true, + 'Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)' => true, + 'Googlebot-Image/1.0' => false, + 'Yahoo! Slurp China' => false, + )), + array('/Mozilla/5.0/i)', array( // invalid regex + 'Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)' => false, + 'Wget/1.13.4 (linux-gnu)' => false, + 'Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)' => false, + )) ); } diff --git a/tests/PHPUnit/System/expected/test_apiGetReportMetadata__API.getAvailableMeasurableTypes.xml b/tests/PHPUnit/System/expected/test_apiGetReportMetadata__API.getAvailableMeasurableTypes.xml index 380f802785..91c5bb6333 100644 --- a/tests/PHPUnit/System/expected/test_apiGetReportMetadata__API.getAvailableMeasurableTypes.xml +++ b/tests/PHPUnit/System/expected/test_apiGetReportMetadata__API.getAvailableMeasurableTypes.xml @@ -104,7 +104,7 @@ https://www.example.org/</placeholder> </uiControlAttributes> <availableValues /> <description /> - <inlineHelp>Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as /^sess.*|.*[dD]ate$/ are suported.<br /><br />Matomo will automatically exclude the common session parameters (phpsessid, sessionid, ...).</inlineHelp> + <inlineHelp>Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as /^sess.*|.*[dD]ate$/ are supported.<br /><br />Matomo will automatically exclude the common session parameters (phpsessid, sessionid, ...).</inlineHelp> <templateFile /> <introduction /> <condition /> @@ -124,7 +124,7 @@ https://www.example.org/</placeholder> </uiControlAttributes> <availableValues /> <description /> - <inlineHelp>Enter the list of user agents to exclude from being tracked by Matomo.<br /><br />If the visitor's user agent string contains any of the strings you specify, the visitor will be excluded from Matomo.<br />You can use this to exclude some bots from being tracked.</inlineHelp> + <inlineHelp>Enter the list of user agents to exclude from being tracked by Matomo.<br /><br />If the visitor's user agent string contains any of the strings you specify, the visitor will be excluded from Matomo.<br />You can use this to exclude some bots from being tracked. Regular expressions such as /bot|spider|crawl|scanner/i are supported.</inlineHelp> <templateFile /> <introduction /> <condition /> @@ -389,7 +389,7 @@ https://www.example.org/</placeholder> </uiControlAttributes> <availableValues /> <description /> - <inlineHelp>Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as /^sess.*|.*[dD]ate$/ are suported.<br /><br />Matomo will automatically exclude the common session parameters (phpsessid, sessionid, ...).</inlineHelp> + <inlineHelp>Enter the list of URL Query Parameters, one per line, to exclude from the Page URLs reports. Regular expressions such as /^sess.*|.*[dD]ate$/ are supported.<br /><br />Matomo will automatically exclude the common session parameters (phpsessid, sessionid, ...).</inlineHelp> <templateFile /> <introduction /> <condition /> @@ -409,7 +409,7 @@ https://www.example.org/</placeholder> </uiControlAttributes> <availableValues /> <description /> - <inlineHelp>Enter the list of user agents to exclude from being tracked by Matomo.<br /><br />If the visitor's user agent string contains any of the strings you specify, the visitor will be excluded from Matomo.<br />You can use this to exclude some bots from being tracked.</inlineHelp> + <inlineHelp>Enter the list of user agents to exclude from being tracked by Matomo.<br /><br />If the visitor's user agent string contains any of the strings you specify, the visitor will be excluded from Matomo.<br />You can use this to exclude some bots from being tracked. Regular expressions such as /bot|spider|crawl|scanner/i are supported.</inlineHelp> <templateFile /> <introduction /> <condition /> diff --git a/tests/UI/expected-screenshots/MeasurableManager_add_measurable_view.png b/tests/UI/expected-screenshots/MeasurableManager_add_measurable_view.png index b7762124a8..fd15aa164e 100644 --- a/tests/UI/expected-screenshots/MeasurableManager_add_measurable_view.png +++ b/tests/UI/expected-screenshots/MeasurableManager_add_measurable_view.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bf5607d3659f13c612d520e55342efb42521804cd378d6983ebfb161c59ba5b7 -size 482292 +oid sha256:220b8c96106654860914095774a18e55a2777c3d060168166a8e7b1b30546c8e +size 488887 |