Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthieu Aubry <matt@piwik.org>2015-06-25 09:18:38 +0300
committerMatthieu Aubry <matt@piwik.org>2015-06-25 09:18:38 +0300
commitb801a758493ad0aeca33a9af3270f6bf3861af6b (patch)
tree9a4a26f5b1e2ea021c96125381f3f26c18a57a81
parent3c7fb382f4a3153ee3c78cad53dd374fd8c410b7 (diff)
parent3b5beec2069cec084ac49c36356473b47233fe0d (diff)
Merge pull request #8186 from piwik/spammer-list-update
Auto-update the referrer spammer blacklist
-rw-r--r--core/Scheduler/Scheduler.php61
-rw-r--r--core/Tracker/Visit/ReferrerSpamFilter.php32
-rw-r--r--plugins/CoreAdminHome/Commands/RunScheduledTasks.php39
-rw-r--r--plugins/CoreAdminHome/Tasks.php28
-rw-r--r--tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php101
-rw-r--r--tests/PHPUnit/Unit/Scheduler/SchedulerTest.php31
6 files changed, 269 insertions, 23 deletions
diff --git a/core/Scheduler/Scheduler.php b/core/Scheduler/Scheduler.php
index 4080cc89d1..86b8f44614 100644
--- a/core/Scheduler/Scheduler.php
+++ b/core/Scheduler/Scheduler.php
@@ -121,17 +121,7 @@ class Scheduler
}
if ($shouldExecuteTask) {
- $this->logger->info("Scheduler: executing task {taskName}...", array('taskName' => $taskName));
-
- $timer = new Timer();
-
- $this->isRunningTask = true;
$message = $this->executeTask($task);
- $this->isRunningTask = false;
-
- $this->logger->info("Scheduler: finished. {timeElapsed}", array(
- 'taskName' => $taskName, 'timeElapsed' => $timer
- ));
$executionResults[] = array('task' => $taskName, 'output' => $message);
}
@@ -144,6 +134,25 @@ class Scheduler
}
/**
+ * Run a specific task now. Will ignore the schedule completely.
+ *
+ * @param string $taskName
+ * @return string Task output.
+ */
+ public function runTaskNow($taskName)
+ {
+ $tasks = $this->loader->loadTasks();
+
+ foreach ($tasks as $task) {
+ if ($task->getName() === $taskName) {
+ return $this->executeTask($task);
+ }
+ }
+
+ throw new \InvalidArgumentException('Task ' . $taskName . ' not found');
+ }
+
+ /**
* Determines a task's scheduled time and persists it, overwriting the previous scheduled time.
*
* Call this method if your task's scheduled time has changed due to, for example, an option that
@@ -184,6 +193,20 @@ class Scheduler
}
/**
+ * Returns the list of the task names.
+ *
+ * @return string[]
+ */
+ public function getTaskList()
+ {
+ $tasks = $this->loader->loadTasks();
+
+ return array_map(function (Task $task) {
+ return $task->getName();
+ }, $tasks);
+ }
+
+ /**
* Executes the given task
*
* @param Task $task
@@ -191,16 +214,28 @@ class Scheduler
*/
private function executeTask($task)
{
- $this->logger->debug('Running task {task}', array('task' => $task->getName()));
+ $this->logger->info("Scheduler: executing task {taskName}...", array(
+ 'taskName' => $task->getName(),
+ ));
+
+ $this->isRunningTask = true;
+
+ $timer = new Timer();
try {
- $timer = new Timer();
- call_user_func(array($task->getObjectInstance(), $task->getMethodName()), $task->getMethodParameter());
+ $callable = array($task->getObjectInstance(), $task->getMethodName());
+ call_user_func($callable, $task->getMethodParameter());
$message = $timer->__toString();
} catch (Exception $e) {
$message = 'ERROR: ' . $e->getMessage();
}
+ $this->isRunningTask = false;
+
+ $this->logger->info("Scheduler: finished. {timeElapsed}", array(
+ 'timeElapsed' => $timer,
+ ));
+
return $message;
}
}
diff --git a/core/Tracker/Visit/ReferrerSpamFilter.php b/core/Tracker/Visit/ReferrerSpamFilter.php
index 0c6ee6204a..2decd7854a 100644
--- a/core/Tracker/Visit/ReferrerSpamFilter.php
+++ b/core/Tracker/Visit/ReferrerSpamFilter.php
@@ -2,7 +2,9 @@
namespace Piwik\Tracker\Visit;
+use Piwik\Cache;
use Piwik\Common;
+use Piwik\Option;
use Piwik\Tracker\Request;
/**
@@ -10,6 +12,7 @@ use Piwik\Tracker\Request;
*/
class ReferrerSpamFilter
{
+ const OPTION_STORAGE_NAME = 'referrer_spam_blacklist';
/**
* @var string[]
*/
@@ -23,7 +26,7 @@ class ReferrerSpamFilter
*/
public function isSpam(Request $request)
{
- $spammers = $this->loadSpammerList();
+ $spammers = $this->getSpammerListFromCache();
$referrerUrl = $request->getParam('urlref');
@@ -37,14 +40,37 @@ class ReferrerSpamFilter
return false;
}
+ private function getSpammerListFromCache()
+ {
+ $cache = Cache::getEagerCache();
+ $cacheId = 'ReferrerSpamFilter-' . self::OPTION_STORAGE_NAME;
+
+ if ($cache->contains($cacheId)) {
+ $list = $cache->fetch($cacheId);
+ } else {
+ $list = $this->loadSpammerList();
+ $cache->save($cacheId, $list);
+ }
+
+ return $list;
+ }
+
private function loadSpammerList()
{
if ($this->spammerList !== null) {
return $this->spammerList;
}
- $file = PIWIK_INCLUDE_PATH . '/vendor/piwik/referrer-spam-blacklist/spammers.txt';
- $this->spammerList = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+ // Read first from the auto-updated list in database
+ $list = Option::get(self::OPTION_STORAGE_NAME);
+
+ if ($list) {
+ $this->spammerList = unserialize($list);
+ } else {
+ // Fallback to reading the bundled list
+ $file = PIWIK_INCLUDE_PATH . '/vendor/piwik/referrer-spam-blacklist/spammers.txt';
+ $this->spammerList = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
+ }
return $this->spammerList;
}
diff --git a/plugins/CoreAdminHome/Commands/RunScheduledTasks.php b/plugins/CoreAdminHome/Commands/RunScheduledTasks.php
index 2d6a0e2439..4c231dd3f9 100644
--- a/plugins/CoreAdminHome/Commands/RunScheduledTasks.php
+++ b/plugins/CoreAdminHome/Commands/RunScheduledTasks.php
@@ -9,9 +9,11 @@
namespace Piwik\Plugins\CoreAdminHome\Commands;
+use Piwik\Container\StaticContainer;
use Piwik\FrontController;
use Piwik\Plugin\ConsoleCommand;
-use Piwik\Plugins\CoreAdminHome\API;
+use Piwik\Scheduler\Scheduler;
+use Symfony\Component\Console\Input\InputArgument;
use Symfony\Component\Console\Input\InputInterface;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Output\OutputInterface;
@@ -20,9 +22,10 @@ class RunScheduledTasks extends ConsoleCommand
{
protected function configure()
{
- $this->setName('core:run-scheduled-tasks');
- $this->setAliases(array('scheduled-tasks:run'));
+ $this->setName('scheduled-tasks:run');
+ $this->setAliases(array('core:run-scheduled-tasks'));
$this->setDescription('Will run all scheduled tasks due to run at this time.');
+ $this->addArgument('task', InputArgument::OPTIONAL, 'Optionally pass the name of a task to run (will run even if not scheduled to run now)');
$this->addOption('force', null, InputOption::VALUE_NONE, 'If set, it will execute all tasks even the ones not due to run at this time.');
}
@@ -34,7 +37,18 @@ class RunScheduledTasks extends ConsoleCommand
$this->forceRunAllTasksIfRequested($input);
FrontController::getInstance()->init();
- API::getInstance()->runScheduledTasks();
+
+ // TODO use dependency injection
+ /** @var Scheduler $scheduler */
+ $scheduler = StaticContainer::get('Piwik\Scheduler\Scheduler');
+
+ $task = $input->getArgument('task');
+
+ if ($task) {
+ $this->runSingleTask($scheduler, $task, $output);
+ } else {
+ $scheduler->run();
+ }
$this->writeSuccessMessage($output, array('Scheduled Tasks executed'));
}
@@ -47,4 +61,19 @@ class RunScheduledTasks extends ConsoleCommand
define('DEBUG_FORCE_SCHEDULED_TASKS', true);
}
}
-} \ No newline at end of file
+
+ private function runSingleTask(Scheduler $scheduler, $task, OutputInterface $output)
+ {
+ try {
+ $message = $scheduler->runTaskNow($task);
+ } catch (\InvalidArgumentException $e) {
+ $message = $e->getMessage() . PHP_EOL
+ . 'Available tasks:' . PHP_EOL
+ . implode(PHP_EOL, $scheduler->getTaskList());
+
+ throw new \Exception($message);
+ }
+
+ $output->writeln($message);
+ }
+}
diff --git a/plugins/CoreAdminHome/Tasks.php b/plugins/CoreAdminHome/Tasks.php
index 7a3ff406b5..2fe5a83ed9 100644
--- a/plugins/CoreAdminHome/Tasks.php
+++ b/plugins/CoreAdminHome/Tasks.php
@@ -10,11 +10,13 @@ namespace Piwik\Plugins\CoreAdminHome;
use Piwik\ArchiveProcessor\Rules;
use Piwik\Archive\ArchivePurger;
-use Piwik\Container\StaticContainer;
use Piwik\DataAccess\ArchiveTableCreator;
use Piwik\Date;
use Piwik\Db;
+use Piwik\Http;
+use Piwik\Option;
use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList;
+use Piwik\Tracker\Visit\ReferrerSpamFilter;
use Psr\Log\LoggerInterface;
class Tasks extends \Piwik\Plugin\Tasks
@@ -45,13 +47,15 @@ class Tasks extends \Piwik\Plugin\Tasks
// lowest priority since tables should be optimized after they are modified
$this->daily('optimizeArchiveTable', null, self::LOWEST_PRIORITY);
+
+ $this->weekly('updateSpammerBlacklist');
}
public function purgeOutdatedArchives()
{
if ($this->willPurgingCausePotentialProblemInUI()) {
$this->logger->info("Purging temporary archives: skipped (browser triggered archiving not enabled & not running after core:archive)");
- return false;
+ return;
}
$archiveTables = ArchiveTableCreator::getTablesArchivesInstalled();
@@ -101,6 +105,26 @@ class Tasks extends \Piwik\Plugin\Tasks
}
/**
+ * Update the referrer spam blacklist
+ *
+ * @see https://github.com/piwik/referrer-spam-blacklist
+ */
+ public function updateSpammerBlacklist()
+ {
+ $url = 'https://raw.githubusercontent.com/piwik/referrer-spam-blacklist/master/spammers.txt';
+ $list = Http::sendHttpRequest($url, 30);
+ $list = preg_split("/\r\n|\n|\r/", $list);
+ if (count($list) < 10) {
+ throw new \Exception(sprintf(
+ 'The spammers list downloaded from %s contains less than 10 entries, considering it a fail',
+ $url
+ ));
+ }
+
+ Option::set(ReferrerSpamFilter::OPTION_STORAGE_NAME, serialize($list));
+ }
+
+ /**
* we should only purge outdated & custom range archives if we know cron archiving has just run,
* or if browser triggered archiving is enabled. if cron archiving has run, then we know the latest
* archives are in the database, and we can remove temporary ones. if browser triggered archiving is
diff --git a/tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php b/tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php
new file mode 100644
index 0000000000..b5eec03bce
--- /dev/null
+++ b/tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php
@@ -0,0 +1,101 @@
+<?php
+/**
+ * Piwik - free/libre analytics platform
+ *
+ * @link http://piwik.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ */
+
+namespace Piwik\Tests\Integration\Tracker\Visit;
+
+use Piwik\Cache;
+use Piwik\Option;
+use Piwik\Tests\Framework\TestCase\IntegrationTestCase;
+use Piwik\Tracker\Request;
+use Piwik\Tracker\Visit\ReferrerSpamFilter;
+
+/**
+ * @group Tracker
+ * @group Visit
+ */
+class ReferrerSpamFilterTest extends IntegrationTestCase
+{
+ /**
+ * @var ReferrerSpamFilter
+ */
+ private $filter;
+
+ public function setUp()
+ {
+ parent::setUp();
+
+ Cache::flushAll();
+ $this->filter = new ReferrerSpamFilter;
+ }
+
+ public function tearDown()
+ {
+ parent::tearDown();
+
+ Cache::flushAll();
+ }
+
+ /**
+ * @test
+ */
+ public function should_detect_spam()
+ {
+ $request = new Request(array(
+ 'urlref' => 'semalt.com',
+ ));
+
+ $this->assertTrue($this->filter->isSpam($request));
+ }
+
+ /**
+ * @test
+ */
+ public function should_ignore_valid_referrers()
+ {
+ $request = new Request(array(
+ 'urlref' => 'google.com',
+ ));
+
+ $this->assertFalse($this->filter->isSpam($request));
+ }
+
+ /**
+ * @test
+ */
+ public function should_ignore_requests_with_empty_referrers()
+ {
+ $request = new Request(array());
+
+ $this->assertFalse($this->filter->isSpam($request));
+ }
+
+ /**
+ * @test
+ */
+ public function should_load_spammer_list_from_options_if_exists()
+ {
+ // We store google.com in the spammer blacklist
+ $list = serialize(array(
+ 'google.com',
+ ));
+ Option::set(ReferrerSpamFilter::OPTION_STORAGE_NAME, $list);
+
+ $request = new Request(array(
+ 'urlref' => 'semalt.com',
+ ));
+ $this->assertFalse($this->filter->isSpam($request));
+
+ // Now Google is blacklisted
+ $request = new Request(array(
+ 'urlref' => 'google.com',
+ ));
+ $this->assertTrue($this->filter->isSpam($request));
+
+ Option::delete(ReferrerSpamFilter::OPTION_STORAGE_NAME);
+ }
+}
diff --git a/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php b/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php
index 2547d3e5e9..236b8eb8ed 100644
--- a/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php
+++ b/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php
@@ -180,6 +180,37 @@ class SchedulerTest extends \PHPUnit_Framework_TestCase
self::resetPiwikOption();
}
+ /**
+ * @dataProvider runDataProvider
+ */
+ public function testRunTaskNow($expectedTimetable, $expectedExecutedTasks, $timetableBeforeTaskExecution, $configuredTasks)
+ {
+ $taskLoader = $this->getMock('Piwik\Scheduler\TaskLoader');
+ $taskLoader->expects($this->atLeastOnce())
+ ->method('loadTasks')
+ ->willReturn($configuredTasks);
+
+ // stub the piwik option object to control the returned option value
+ self::stubPiwikOption(serialize($timetableBeforeTaskExecution));
+
+ $timetable = new Timetable();
+ $initialTimetable = $timetable->getTimetable();
+
+ $scheduler = new Scheduler($taskLoader, new NullLogger());
+
+ foreach ($configuredTasks as $task) {
+ /** @var Task $task */
+ $result = $scheduler->runTaskNow($task->getName());
+
+ $this->assertNotEmpty($result);
+ }
+
+ // assert the timetable is NOT updated
+ $this->assertSame($initialTimetable, $timetable->getTimetable());
+
+ self::resetPiwikOption();
+ }
+
private static function stubPiwikOption($timetable)
{
self::getReflectedPiwikOptionInstance()->setValue(new PiwikOption($timetable));