diff options
author | Matthieu Aubry <matt@piwik.org> | 2015-06-25 09:18:38 +0300 |
---|---|---|
committer | Matthieu Aubry <matt@piwik.org> | 2015-06-25 09:18:38 +0300 |
commit | b801a758493ad0aeca33a9af3270f6bf3861af6b (patch) | |
tree | 9a4a26f5b1e2ea021c96125381f3f26c18a57a81 | |
parent | 3c7fb382f4a3153ee3c78cad53dd374fd8c410b7 (diff) | |
parent | 3b5beec2069cec084ac49c36356473b47233fe0d (diff) |
Merge pull request #8186 from piwik/spammer-list-update
Auto-update the referrer spammer blacklist
-rw-r--r-- | core/Scheduler/Scheduler.php | 61 | ||||
-rw-r--r-- | core/Tracker/Visit/ReferrerSpamFilter.php | 32 | ||||
-rw-r--r-- | plugins/CoreAdminHome/Commands/RunScheduledTasks.php | 39 | ||||
-rw-r--r-- | plugins/CoreAdminHome/Tasks.php | 28 | ||||
-rw-r--r-- | tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php | 101 | ||||
-rw-r--r-- | tests/PHPUnit/Unit/Scheduler/SchedulerTest.php | 31 |
6 files changed, 269 insertions, 23 deletions
diff --git a/core/Scheduler/Scheduler.php b/core/Scheduler/Scheduler.php index 4080cc89d1..86b8f44614 100644 --- a/core/Scheduler/Scheduler.php +++ b/core/Scheduler/Scheduler.php @@ -121,17 +121,7 @@ class Scheduler } if ($shouldExecuteTask) { - $this->logger->info("Scheduler: executing task {taskName}...", array('taskName' => $taskName)); - - $timer = new Timer(); - - $this->isRunningTask = true; $message = $this->executeTask($task); - $this->isRunningTask = false; - - $this->logger->info("Scheduler: finished. {timeElapsed}", array( - 'taskName' => $taskName, 'timeElapsed' => $timer - )); $executionResults[] = array('task' => $taskName, 'output' => $message); } @@ -144,6 +134,25 @@ class Scheduler } /** + * Run a specific task now. Will ignore the schedule completely. + * + * @param string $taskName + * @return string Task output. + */ + public function runTaskNow($taskName) + { + $tasks = $this->loader->loadTasks(); + + foreach ($tasks as $task) { + if ($task->getName() === $taskName) { + return $this->executeTask($task); + } + } + + throw new \InvalidArgumentException('Task ' . $taskName . ' not found'); + } + + /** * Determines a task's scheduled time and persists it, overwriting the previous scheduled time. * * Call this method if your task's scheduled time has changed due to, for example, an option that @@ -184,6 +193,20 @@ class Scheduler } /** + * Returns the list of the task names. + * + * @return string[] + */ + public function getTaskList() + { + $tasks = $this->loader->loadTasks(); + + return array_map(function (Task $task) { + return $task->getName(); + }, $tasks); + } + + /** * Executes the given task * * @param Task $task @@ -191,16 +214,28 @@ class Scheduler */ private function executeTask($task) { - $this->logger->debug('Running task {task}', array('task' => $task->getName())); + $this->logger->info("Scheduler: executing task {taskName}...", array( + 'taskName' => $task->getName(), + )); + + $this->isRunningTask = true; + + $timer = new Timer(); try { - $timer = new Timer(); - call_user_func(array($task->getObjectInstance(), $task->getMethodName()), $task->getMethodParameter()); + $callable = array($task->getObjectInstance(), $task->getMethodName()); + call_user_func($callable, $task->getMethodParameter()); $message = $timer->__toString(); } catch (Exception $e) { $message = 'ERROR: ' . $e->getMessage(); } + $this->isRunningTask = false; + + $this->logger->info("Scheduler: finished. {timeElapsed}", array( + 'timeElapsed' => $timer, + )); + return $message; } } diff --git a/core/Tracker/Visit/ReferrerSpamFilter.php b/core/Tracker/Visit/ReferrerSpamFilter.php index 0c6ee6204a..2decd7854a 100644 --- a/core/Tracker/Visit/ReferrerSpamFilter.php +++ b/core/Tracker/Visit/ReferrerSpamFilter.php @@ -2,7 +2,9 @@ namespace Piwik\Tracker\Visit; +use Piwik\Cache; use Piwik\Common; +use Piwik\Option; use Piwik\Tracker\Request; /** @@ -10,6 +12,7 @@ use Piwik\Tracker\Request; */ class ReferrerSpamFilter { + const OPTION_STORAGE_NAME = 'referrer_spam_blacklist'; /** * @var string[] */ @@ -23,7 +26,7 @@ class ReferrerSpamFilter */ public function isSpam(Request $request) { - $spammers = $this->loadSpammerList(); + $spammers = $this->getSpammerListFromCache(); $referrerUrl = $request->getParam('urlref'); @@ -37,14 +40,37 @@ class ReferrerSpamFilter return false; } + private function getSpammerListFromCache() + { + $cache = Cache::getEagerCache(); + $cacheId = 'ReferrerSpamFilter-' . self::OPTION_STORAGE_NAME; + + if ($cache->contains($cacheId)) { + $list = $cache->fetch($cacheId); + } else { + $list = $this->loadSpammerList(); + $cache->save($cacheId, $list); + } + + return $list; + } + private function loadSpammerList() { if ($this->spammerList !== null) { return $this->spammerList; } - $file = PIWIK_INCLUDE_PATH . '/vendor/piwik/referrer-spam-blacklist/spammers.txt'; - $this->spammerList = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + // Read first from the auto-updated list in database + $list = Option::get(self::OPTION_STORAGE_NAME); + + if ($list) { + $this->spammerList = unserialize($list); + } else { + // Fallback to reading the bundled list + $file = PIWIK_INCLUDE_PATH . '/vendor/piwik/referrer-spam-blacklist/spammers.txt'; + $this->spammerList = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); + } return $this->spammerList; } diff --git a/plugins/CoreAdminHome/Commands/RunScheduledTasks.php b/plugins/CoreAdminHome/Commands/RunScheduledTasks.php index 2d6a0e2439..4c231dd3f9 100644 --- a/plugins/CoreAdminHome/Commands/RunScheduledTasks.php +++ b/plugins/CoreAdminHome/Commands/RunScheduledTasks.php @@ -9,9 +9,11 @@ namespace Piwik\Plugins\CoreAdminHome\Commands; +use Piwik\Container\StaticContainer; use Piwik\FrontController; use Piwik\Plugin\ConsoleCommand; -use Piwik\Plugins\CoreAdminHome\API; +use Piwik\Scheduler\Scheduler; +use Symfony\Component\Console\Input\InputArgument; use Symfony\Component\Console\Input\InputInterface; use Symfony\Component\Console\Input\InputOption; use Symfony\Component\Console\Output\OutputInterface; @@ -20,9 +22,10 @@ class RunScheduledTasks extends ConsoleCommand { protected function configure() { - $this->setName('core:run-scheduled-tasks'); - $this->setAliases(array('scheduled-tasks:run')); + $this->setName('scheduled-tasks:run'); + $this->setAliases(array('core:run-scheduled-tasks')); $this->setDescription('Will run all scheduled tasks due to run at this time.'); + $this->addArgument('task', InputArgument::OPTIONAL, 'Optionally pass the name of a task to run (will run even if not scheduled to run now)'); $this->addOption('force', null, InputOption::VALUE_NONE, 'If set, it will execute all tasks even the ones not due to run at this time.'); } @@ -34,7 +37,18 @@ class RunScheduledTasks extends ConsoleCommand $this->forceRunAllTasksIfRequested($input); FrontController::getInstance()->init(); - API::getInstance()->runScheduledTasks(); + + // TODO use dependency injection + /** @var Scheduler $scheduler */ + $scheduler = StaticContainer::get('Piwik\Scheduler\Scheduler'); + + $task = $input->getArgument('task'); + + if ($task) { + $this->runSingleTask($scheduler, $task, $output); + } else { + $scheduler->run(); + } $this->writeSuccessMessage($output, array('Scheduled Tasks executed')); } @@ -47,4 +61,19 @@ class RunScheduledTasks extends ConsoleCommand define('DEBUG_FORCE_SCHEDULED_TASKS', true); } } -}
\ No newline at end of file + + private function runSingleTask(Scheduler $scheduler, $task, OutputInterface $output) + { + try { + $message = $scheduler->runTaskNow($task); + } catch (\InvalidArgumentException $e) { + $message = $e->getMessage() . PHP_EOL + . 'Available tasks:' . PHP_EOL + . implode(PHP_EOL, $scheduler->getTaskList()); + + throw new \Exception($message); + } + + $output->writeln($message); + } +} diff --git a/plugins/CoreAdminHome/Tasks.php b/plugins/CoreAdminHome/Tasks.php index 7a3ff406b5..2fe5a83ed9 100644 --- a/plugins/CoreAdminHome/Tasks.php +++ b/plugins/CoreAdminHome/Tasks.php @@ -10,11 +10,13 @@ namespace Piwik\Plugins\CoreAdminHome; use Piwik\ArchiveProcessor\Rules; use Piwik\Archive\ArchivePurger; -use Piwik\Container\StaticContainer; use Piwik\DataAccess\ArchiveTableCreator; use Piwik\Date; use Piwik\Db; +use Piwik\Http; +use Piwik\Option; use Piwik\Plugins\CoreAdminHome\Tasks\ArchivesToPurgeDistributedList; +use Piwik\Tracker\Visit\ReferrerSpamFilter; use Psr\Log\LoggerInterface; class Tasks extends \Piwik\Plugin\Tasks @@ -45,13 +47,15 @@ class Tasks extends \Piwik\Plugin\Tasks // lowest priority since tables should be optimized after they are modified $this->daily('optimizeArchiveTable', null, self::LOWEST_PRIORITY); + + $this->weekly('updateSpammerBlacklist'); } public function purgeOutdatedArchives() { if ($this->willPurgingCausePotentialProblemInUI()) { $this->logger->info("Purging temporary archives: skipped (browser triggered archiving not enabled & not running after core:archive)"); - return false; + return; } $archiveTables = ArchiveTableCreator::getTablesArchivesInstalled(); @@ -101,6 +105,26 @@ class Tasks extends \Piwik\Plugin\Tasks } /** + * Update the referrer spam blacklist + * + * @see https://github.com/piwik/referrer-spam-blacklist + */ + public function updateSpammerBlacklist() + { + $url = 'https://raw.githubusercontent.com/piwik/referrer-spam-blacklist/master/spammers.txt'; + $list = Http::sendHttpRequest($url, 30); + $list = preg_split("/\r\n|\n|\r/", $list); + if (count($list) < 10) { + throw new \Exception(sprintf( + 'The spammers list downloaded from %s contains less than 10 entries, considering it a fail', + $url + )); + } + + Option::set(ReferrerSpamFilter::OPTION_STORAGE_NAME, serialize($list)); + } + + /** * we should only purge outdated & custom range archives if we know cron archiving has just run, * or if browser triggered archiving is enabled. if cron archiving has run, then we know the latest * archives are in the database, and we can remove temporary ones. if browser triggered archiving is diff --git a/tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php b/tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php new file mode 100644 index 0000000000..b5eec03bce --- /dev/null +++ b/tests/PHPUnit/Integration/Tracker/Visit/ReferrerSpamFilterTest.php @@ -0,0 +1,101 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + */ + +namespace Piwik\Tests\Integration\Tracker\Visit; + +use Piwik\Cache; +use Piwik\Option; +use Piwik\Tests\Framework\TestCase\IntegrationTestCase; +use Piwik\Tracker\Request; +use Piwik\Tracker\Visit\ReferrerSpamFilter; + +/** + * @group Tracker + * @group Visit + */ +class ReferrerSpamFilterTest extends IntegrationTestCase +{ + /** + * @var ReferrerSpamFilter + */ + private $filter; + + public function setUp() + { + parent::setUp(); + + Cache::flushAll(); + $this->filter = new ReferrerSpamFilter; + } + + public function tearDown() + { + parent::tearDown(); + + Cache::flushAll(); + } + + /** + * @test + */ + public function should_detect_spam() + { + $request = new Request(array( + 'urlref' => 'semalt.com', + )); + + $this->assertTrue($this->filter->isSpam($request)); + } + + /** + * @test + */ + public function should_ignore_valid_referrers() + { + $request = new Request(array( + 'urlref' => 'google.com', + )); + + $this->assertFalse($this->filter->isSpam($request)); + } + + /** + * @test + */ + public function should_ignore_requests_with_empty_referrers() + { + $request = new Request(array()); + + $this->assertFalse($this->filter->isSpam($request)); + } + + /** + * @test + */ + public function should_load_spammer_list_from_options_if_exists() + { + // We store google.com in the spammer blacklist + $list = serialize(array( + 'google.com', + )); + Option::set(ReferrerSpamFilter::OPTION_STORAGE_NAME, $list); + + $request = new Request(array( + 'urlref' => 'semalt.com', + )); + $this->assertFalse($this->filter->isSpam($request)); + + // Now Google is blacklisted + $request = new Request(array( + 'urlref' => 'google.com', + )); + $this->assertTrue($this->filter->isSpam($request)); + + Option::delete(ReferrerSpamFilter::OPTION_STORAGE_NAME); + } +} diff --git a/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php b/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php index 2547d3e5e9..236b8eb8ed 100644 --- a/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php +++ b/tests/PHPUnit/Unit/Scheduler/SchedulerTest.php @@ -180,6 +180,37 @@ class SchedulerTest extends \PHPUnit_Framework_TestCase self::resetPiwikOption(); } + /** + * @dataProvider runDataProvider + */ + public function testRunTaskNow($expectedTimetable, $expectedExecutedTasks, $timetableBeforeTaskExecution, $configuredTasks) + { + $taskLoader = $this->getMock('Piwik\Scheduler\TaskLoader'); + $taskLoader->expects($this->atLeastOnce()) + ->method('loadTasks') + ->willReturn($configuredTasks); + + // stub the piwik option object to control the returned option value + self::stubPiwikOption(serialize($timetableBeforeTaskExecution)); + + $timetable = new Timetable(); + $initialTimetable = $timetable->getTimetable(); + + $scheduler = new Scheduler($taskLoader, new NullLogger()); + + foreach ($configuredTasks as $task) { + /** @var Task $task */ + $result = $scheduler->runTaskNow($task->getName()); + + $this->assertNotEmpty($result); + } + + // assert the timetable is NOT updated + $this->assertSame($initialTimetable, $timetable->getTimetable()); + + self::resetPiwikOption(); + } + private static function stubPiwikOption($timetable) { self::getReflectedPiwikOptionInstance()->setValue(new PiwikOption($timetable)); |