diff options
author | Matthieu Aubry <matt@piwik.org> | 2015-06-16 10:29:28 +0300 |
---|---|---|
committer | Matthieu Aubry <matt@piwik.org> | 2015-06-16 10:29:28 +0300 |
commit | 36529f5175c36670e982940ac7488ac37908fa8b (patch) | |
tree | af98eb8f5826b64883c6c5c19d7faebd23ffe5d9 | |
parent | f7cc14a78ffa3cc8e26b3d457f9e36863f3cbafb (diff) | |
parent | eed36ace76a745c3f604577367f3b4a00ede75e2 (diff) |
Merge pull request #7887 from piwik/6785_log_delete_command
Fixes #6785, #7180 add log delete command and make log deletion delete only based on time and not idvisit.
17 files changed, 1369 insertions, 289 deletions
diff --git a/core/DataAccess/RawLogDao.php b/core/DataAccess/RawLogDao.php index 0105a1047f..184b33909f 100644 --- a/core/DataAccess/RawLogDao.php +++ b/core/DataAccess/RawLogDao.php @@ -9,13 +9,28 @@ namespace Piwik\DataAccess; use Piwik\Common; +use Piwik\Container\StaticContainer; use Piwik\Db; +use Piwik\Piwik; +use Piwik\Plugin\Dimension\DimensionMetadataProvider; /** * DAO that queries log tables. */ class RawLogDao { + const DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME = 'tmp_log_actions_to_keep'; + + /** + * @var DimensionMetadataProvider + */ + private $dimensionMetadataProvider; + + public function __construct(DimensionMetadataProvider $provider = null) + { + $this->dimensionMetadataProvider = $provider ?: StaticContainer::get('Piwik\Plugin\Dimension\DimensionMetadataProvider'); + } + /** * @param array $values * @param string $idVisit @@ -45,38 +60,152 @@ class RawLogDao /** * @param string $from * @param string $to - * @param array $fields - * @param int $fromId - * @param int $limit - * @return array[] + * @return int */ - public function getVisitsWithDatesLimit($from, $to, $fields = array(), $fromId = 0, $limit = 1000) + public function countVisitsWithDatesLimit($from, $to) { - $sql = "SELECT " . implode(', ', $fields) + $sql = "SELECT COUNT(*) AS num_rows" . " FROM " . Common::prefixTable('log_visit') - . " WHERE visit_first_action_time >= ? AND visit_last_action_time < ?" - . " AND idvisit > ?" - . sprintf(" LIMIT %d", $limit); + . " WHERE visit_last_action_time >= ? AND visit_last_action_time < ?"; - $bind = array($from, $to, $fromId); + $bind = array($from, $to); - return Db::fetchAll($sql, $bind); + return (int) Db::fetchOne($sql, $bind); } /** - * @param string $from - * @param string $to - * @return int + * Iterates over logs in a log table in chunks. Parameters to this function are as backend agnostic + * as possible w/o dramatically increasing code complexity. + * + * @param string $logTable The log table name. Unprefixed, eg, `log_visit`. + * @param array[] $conditions An array describing the conditions logs must match in the query. Translates to + * the WHERE part of a SELECT statement. Each element must contain three elements: + * + * * the column name + * * the operator (ie, '=', '<>', '<', etc.) + * * the operand (ie, a value) + * + * The elements are AND-ed together. + * + * Example: + * + * ``` + * array( + * array('visit_first_action_time', '>=', ...), + * array('visit_first_action_time', '<', ...) + * ) + * ``` + * @param int $iterationStep The number of rows to query at a time. + * @param callable $callback The callback that processes each chunk of rows. */ - public function countVisitsWithDatesLimit($from, $to) + public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback) { - $sql = "SELECT COUNT(*) AS num_rows" - . " FROM " . Common::prefixTable('log_visit') - . " WHERE visit_first_action_time >= ? AND visit_last_action_time < ?"; + $idField = $this->getIdFieldForLogTable($logTable); + list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep); - $bind = array($from, $to); + $lastId = 0; + do { + $rows = Db::fetchAll($query, array_merge(array($lastId), $bind)); + if (!empty($rows)) { + $lastId = $rows[count($rows) - 1][$idField]; - return (int) Db::fetchOne($sql, $bind); + $callback($rows); + } + } while (count($rows) == $iterationStep); + } + + /** + * Deletes visits with the supplied IDs from log_visit. This method does not cascade, so rows in other tables w/ + * the same visit ID will still exist. + * + * @param int[] $idVisits + * @return int The number of deleted rows. + */ + public function deleteVisits($idVisits) + { + $sql = "DELETE FROM `" . Common::prefixTable('log_visit') . "` WHERE idvisit IN " + . $this->getInFieldExpressionWithInts($idVisits); + + $statement = Db::query($sql); + return $statement->rowCount(); + } + + /** + * Deletes visit actions for the supplied visit IDs from log_link_visit_action. + * + * @param int[] $visitIds + * @return int The number of deleted rows. + */ + public function deleteVisitActionsForVisits($visitIds) + { + $sql = "DELETE FROM `" . Common::prefixTable('log_link_visit_action') . "` WHERE idvisit IN " + . $this->getInFieldExpressionWithInts($visitIds); + + $statement = Db::query($sql); + return $statement->rowCount(); + } + + /** + * Deletes conversions for the supplied visit IDs from log_conversion. This method does not cascade, so + * conversion items will not be deleted. + * + * @param int[] $visitIds + * @return int The number of deleted rows. + */ + public function deleteConversions($visitIds) + { + $sql = "DELETE FROM `" . Common::prefixTable('log_conversion') . "` WHERE idvisit IN " + . $this->getInFieldExpressionWithInts($visitIds); + + $statement = Db::query($sql); + return $statement->rowCount(); + } + + /** + * Deletes conversion items for the supplied visit IDs from log_conversion_item. + * + * @param int[] $visitIds + * @return int The number of deleted rows. + */ + public function deleteConversionItems($visitIds) + { + $sql = "DELETE FROM `" . Common::prefixTable('log_conversion_item') . "` WHERE idvisit IN " + . $this->getInFieldExpressionWithInts($visitIds); + + $statement = Db::query($sql); + return $statement->rowCount(); + } + + /** + * Deletes all unused entries from the log_action table. This method uses a temporary table to store used + * actions, and then deletes rows from log_action that are not in this temporary table. + * + * Table locking is required to avoid concurrency issues. + * + * @throws \Exception If table locking permission is not granted to the current MySQL user. + */ + public function deleteUnusedLogActions() + { + if (!Db::isLockPrivilegeGranted()) { + throw new \Exception("RawLogDao.deleteUnusedLogActions() requires table locking permission in order to complete without error."); + } + + // get current max ID in log tables w/ idaction references. + $maxIds = $this->getMaxIdsInLogTables(); + + $this->createTempTableForStoringUsedActions(); + + // do large insert (inserting everything before maxIds) w/o locking tables... + $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true); + + // ... then do small insert w/ locked tables to minimize the amount of time tables are locked. + $this->lockLogTables(); + $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false); + + // delete before unlocking tables so there's no chance a new log row that references an + // unused action will be inserted. + $this->deleteUnusedActions(); + Db::unlockAllTables(); } /** @@ -106,4 +235,149 @@ class RawLogDao { return Db::query($sql, array_merge(array_values($values), array($idVisit))); } -} + + private function getIdFieldForLogTable($logTable) + { + switch ($logTable) { + case 'log_visit': + return 'idvisit'; + case 'log_link_visit_action': + return 'idlink_va'; + case 'log_conversion': + return 'idvisit'; + case 'log_conversion_item': + return 'idvisit'; + case 'log_action': + return 'idaction'; + default: + throw new \InvalidArgumentException("Unknown log table '$logTable'."); + } + } + + // TODO: instead of creating a log query like this, we should re-use segments. to do this, however, there must be a 1-1 + // mapping for dimensions => segments, and each dimension should automatically have a segment. + private function createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep) + { + $bind = array(); + + $sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE $idField > ?"; + + foreach ($conditions as $condition) { + list($column, $operator, $value) = $condition; + + if (is_array($value)) { + $sql .= " AND $column IN (" . Common::getSqlStringFieldsArray($value) . ")"; + + $bind = array_merge($bind, $value); + } else { + $sql .= " AND $column $operator ?"; + + $bind[] = $value; + } + } + + $sql .= " ORDER BY $idField ASC LIMIT " . (int)$iterationStep; + + return array($sql, $bind); + } + + private function getInFieldExpressionWithInts($idVisits) + { + $sql = "("; + + $isFirst = true; + foreach ($idVisits as $idVisit) { + if ($isFirst) { + $isFirst = false; + } else { + $sql .= ', '; + } + + $sql .= (int)$idVisit; + } + + $sql .= ")"; + + return $sql; + } + + + private function getMaxIdsInLogTables() + { + $tables = array('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item'); + $idColumns = $this->getTableIdColumns(); + + $result = array(); + foreach ($tables as $table) { + $idCol = $idColumns[$table]; + $result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); + } + + return $result; + } + + private function createTempTableForStoringUsedActions() + { + $sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME) . " ( + idaction INT(11), + PRIMARY KEY (idaction) + )"; + Db::query($sql); + } + + // protected for testing purposes + protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000) + { + $tempTableName = Common::prefixTable(self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME); + + $idColumns = $this->getTableIdColumns(); + foreach ($this->dimensionMetadataProvider->getActionReferenceColumnsByTable() as $table => $columns) { + $idCol = $idColumns[$table]; + + foreach ($columns as $col) { + $select = "SELECT $col FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?"; + $sql = "INSERT IGNORE INTO $tempTableName $select"; + + if ($olderThan) { + $start = 0; + $finish = $maxIds[$table]; + } else { + $start = $maxIds[$table]; + $finish = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); + } + + Db::segmentedQuery($sql, $start, $finish, $insertIntoTempIterationStep); + } + } + } + + private function lockLogTables() + { + Db::lockTables( + $readLocks = Common::prefixTables('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item'), + $writeLocks = Common::prefixTables('log_action') + ); + } + + private function deleteUnusedActions() + { + list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME); + + $deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable + FROM $logActionTable + LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction + WHERE tmp.idaction IS NULL"; + + Db::query($deleteSql); + } + + private function getTableIdColumns() + { + return array( + 'log_link_visit_action' => 'idlink_va', + 'log_conversion' => 'idvisit', + 'log_visit' => 'idvisit', + 'log_conversion_item' => 'idvisit' + ); + } +}
\ No newline at end of file diff --git a/core/LogDeleter.php b/core/LogDeleter.php new file mode 100644 index 0000000000..fc61ec9358 --- /dev/null +++ b/core/LogDeleter.php @@ -0,0 +1,110 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + */ + +namespace Piwik; + +use Piwik\DataAccess\RawLogDao; + +/** + * Service that deletes log entries. Methods in this class cascade, so deleting visits will delete visit actions, + * conversions and conversion items. + */ +class LogDeleter +{ + /** + * @var RawLogDao + */ + private $rawLogDao; + + public function __construct(RawLogDao $rawLogDao) + { + $this->rawLogDao = $rawLogDao; + } + + /** + * Deletes visits by ID. This method cascades, so conversions, conversion items and visit actions for + * the visits are also deleted. + * + * @param int[] $visitIds + * @return int The number of deleted visits. + */ + public function deleteVisits($visitIds) + { + $this->deleteConversions($visitIds); + $this->rawLogDao->deleteVisitActionsForVisits($visitIds); + + return $this->rawLogDao->deleteVisits($visitIds); + } + + /** + * Deletes conversions by visit ID. This method cascades, so conversion items are also deleted. + * + * @param int[] $visitIds The list of visits to delete conversions for. + * @return int The number rows deleted. + */ + public function deleteConversions($visitIds) + { + $this->deleteConversionItems($visitIds); + return $this->rawLogDao->deleteConversions($visitIds); + } + + /** + * Deletes conversion items by visit ID. + * + * @param int[] $visitIds The list of visits to delete conversions for. + * @return int The number rows deleted. + */ + public function deleteConversionItems($visitIds) + { + return $this->rawLogDao->deleteConversionItems($visitIds); + } + + /** + * Deletes visits within the specified date range and belonging to the specified site (if any). Visits are + * deleted in chunks, so only `$iterationStep` visits are deleted at a time. + * + * @param string|null $startDatetime A datetime string. Visits that occur at this time or after are deleted. If not supplied, + * visits from the beginning of time are deleted. + * @param string|null $endDatetime A datetime string. Visits that occur before this time are deleted. If not supplied, + * visits from the end of time are deleted. + * @param int|null $idSite The site to delete visits from. + * @param int $iterationStep The number of visits to delete at a single time. + * @param callable $afterChunkDeleted Callback executed after every chunk of visits are deleted. + * @return int The number of visits deleted. + */ + public function deleteVisitsFor($startDatetime, $endDatetime, $idSite = null, $iterationStep = 1000, $afterChunkDeleted = null) + { + $fields = array('idvisit'); + $conditions = array(); + + if (!empty($startDatetime)) { + $conditions[] = array('visit_last_action_time', '>=', $startDatetime); + } + + if (!empty($endDatetime)) { + $conditions[] = array('visit_last_action_time', '<', $endDatetime); + } + + if (!empty($idSite)) { + $conditions[] = array('idsite', '=', $idSite); + } + + $logsDeleted = 0; + $logPurger = $this; + $this->rawLogDao->forAllLogs('log_visit', $fields, $conditions, $iterationStep, function ($logs) use ($logPurger, &$logsDeleted, $afterChunkDeleted) { + $ids = array_map(function ($row) { return reset($row); }, $logs); + $logsDeleted += $logPurger->deleteVisits($ids); + + if (!empty($afterChunkDeleted)) { + $afterChunkDeleted($logsDeleted); + } + }); + + return $logsDeleted; + } +}
\ No newline at end of file diff --git a/plugins/PrivacyManager/DimensionMetadataProvider.php b/core/Plugin/Dimension/DimensionMetadataProvider.php index 6f2b4e9afd..089c60c0a6 100644 --- a/plugins/PrivacyManager/DimensionMetadataProvider.php +++ b/core/Plugin/Dimension/DimensionMetadataProvider.php @@ -6,9 +6,7 @@ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later */ -namespace Piwik\Plugins\PrivacyManager; - -use Piwik\Plugin\Dimension\ActionDimension; +namespace Piwik\Plugin\Dimension; /** * Provides metadata about dimensions for the LogDataPurger class. diff --git a/plugins/CoreAdminHome/Commands/DeleteLogsData.php b/plugins/CoreAdminHome/Commands/DeleteLogsData.php new file mode 100644 index 0000000000..0860a2c910 --- /dev/null +++ b/plugins/CoreAdminHome/Commands/DeleteLogsData.php @@ -0,0 +1,197 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + */ + +namespace Piwik\Plugins\CoreAdminHome\Commands; + +use Piwik\Common; +use Piwik\Container\StaticContainer; +use Piwik\DataAccess\RawLogDao; +use Piwik\Date; +use Piwik\Db; +use Piwik\LogDeleter; +use Piwik\Plugin\ConsoleCommand; +use Piwik\Site; +use Piwik\Timer; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Input\InputOption; +use Symfony\Component\Console\Output\OutputInterface; +use Symfony\Component\Console\Question\ConfirmationQuestion; + +/** + * Command to selectively delete visits. + */ +class DeleteLogsData extends ConsoleCommand +{ + private static $logTables = array( + 'log_visit', + 'log_link_visit_action', + 'log_conversion', + 'log_conversion_item', + 'log_action' + ); + + /** + * @var RawLogDao + */ + private $rawLogDao; + + /** + * @var LogDeleter + */ + private $logDeleter; + + public function __construct(LogDeleter $logDeleter = null, RawLogDao $rawLogDao = null) + { + parent::__construct(); + + $this->logDeleter = $logDeleter ?: StaticContainer::get('Piwik\LogDeleter'); + $this->rawLogDao = $rawLogDao ?: StaticContainer::get('Piwik\DataAccess\RawLogDao'); + } + + protected function configure() + { + $this->setName('core:delete-logs-data'); + $this->setDescription('Delete data from one of the log tables: ' . implode(', ', self::$logTables) . '.'); + $this->addOption('dates', null, InputOption::VALUE_REQUIRED, 'Delete log data with a date within this date range. Eg, 2012-01-01,2013-01-01'); + $this->addOption('idsite', null, InputOption::VALUE_OPTIONAL, + 'Delete log data belonging to the site with this ID. Comma separated list of website id. Eg, 1, 2, 3, etc. By default log data from all sites is purged.'); + $this->addOption('limit', null, InputOption::VALUE_REQUIRED, "The number of rows to delete at a time. The larger the number, " + . "the more time is spent deleting logs, and the less progress will be printed to the screen.", 1000); + $this->addOption('optimize-tables', null, InputOption::VALUE_NONE, + "If supplied, the command will optimize log tables after deleting logs. Note: this can take a very long time."); + } + + protected function execute(InputInterface $input, OutputInterface $output) + { + list($from, $to) = $this->getDateRangeToDeleteFrom($input); + $idSite = $this->getSiteToDeleteFrom($input); + $step = $this->getRowIterationStep($input); + + $output->writeln( sprintf( + "<info>Preparing to delete all visits belonging to %s between $from and $to.</info>", + $idSite ? "website $idSite" : "ALL websites" + )); + + $confirm = $this->askForDeleteConfirmation($input, $output); + if (!$confirm) { + return; + } + + $timer = new Timer(); + + try { + $logsDeleted = $this->logDeleter->deleteVisitsFor($from, $to, $idSite, $step, function () use ($output) { + $output->write('.'); + }); + } catch (\Exception $ex) { + $output->writeln(""); + + throw $ex; + } + + $this->writeSuccessMessage($output, array( + "Successfully deleted $logsDeleted visits. <comment>" . $timer . "</comment>")); + + if ($input->getOption('optimize-tables')) { + $this->optimizeTables($output); + } + } + + /** + * @param InputInterface $input + * @return Date[] + */ + private function getDateRangeToDeleteFrom(InputInterface $input) + { + $dates = $input->getOption('dates'); + if (empty($dates)) { + throw new \InvalidArgumentException("No date range supplied in --dates option. Deleting all logs by default is not allowed, you must specify a date range."); + } + + $parts = explode(',', $dates); + $parts = array_map('trim', $parts); + + if (count($parts) !== 2) { + throw new \InvalidArgumentException("Invalid date range supplied: $dates"); + } + + list($start, $end) = $parts; + + try { + /** @var Date[] $dateObjects */ + $dateObjects = array(Date::factory($start), Date::factory($end)); + } catch (\Exception $ex) { + throw new \InvalidArgumentException("Invalid date range supplied: $dates (" . $ex->getMessage() . ")", $code = 0, $ex); + } + + if ($dateObjects[0]->getTimestamp() > $dateObjects[1]->getTimestamp()) { + throw new \InvalidArgumentException("Invalid date range supplied: $dates (first date is older than the last date)"); + } + + $dateObjects = array($dateObjects[0]->getDatetime(), $dateObjects[1]->getDatetime()); + + return $dateObjects; + } + + private function getSiteToDeleteFrom(InputInterface $input) + { + $idSite = $input->getOption('idsite'); + + if(is_null($idSite)) { + return $idSite; + } + // validate the site ID + try { + new Site($idSite); + } catch (\Exception $ex) { + throw new \InvalidArgumentException("Invalid site ID: $idSite", $code = 0, $ex); + } + + return $idSite; + } + + private function getRowIterationStep(InputInterface $input) + { + $step = (int) $input->getOption('limit'); + + if ($step <= 0) { + throw new \InvalidArgumentException("Invalid row limit supplied: $step. Must be a number greater than 0."); + } + + return $step; + } + + private function askForDeleteConfirmation(InputInterface $input, OutputInterface $output) + { + $helper = $this->getHelper('question'); + $question = new ConfirmationQuestion('<comment>You are about to delete log data. This action cannot be undone, are you sure you want to continue? (Y/N)</comment> ', false); + + return $helper->ask($input, $output, $question); + } + + private function optimizeTables(OutputInterface $output) + { + foreach (self::$logTables as $table) { + $output->write("Optimizing table $table... "); + + $timer = new Timer(); + + $prefixedTable = Common::prefixTable($table); + + $done = Db::optimizeTables($prefixedTable); + + if($done) { + $output->writeln("done. <comment>" . $timer . "</comment>"); + } else { + $output->writeln("skipped! <comment>" . $timer . "</comment>"); + } + } + + $this->writeSuccessMessage($output, array("Table optimization finished.")); + } +}
\ No newline at end of file diff --git a/plugins/CoreAdminHome/tests/Integration/Commands/DeleteLogsDataTest.php b/plugins/CoreAdminHome/tests/Integration/Commands/DeleteLogsDataTest.php new file mode 100644 index 0000000000..7c474d9c8b --- /dev/null +++ b/plugins/CoreAdminHome/tests/Integration/Commands/DeleteLogsDataTest.php @@ -0,0 +1,153 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + */ +namespace Piwik\Plugins\CoreAdminHome\tests\Integration\Commands; + +use Piwik\Common; +use Piwik\Container\StaticContainer; +use Piwik\DataAccess\RawLogDao; +use Piwik\Tests\Fixtures\ManySitesImportedLogs; +use Piwik\Tests\Framework\TestCase\ConsoleCommandTestCase; +use Piwik\Tests\Framework\TestCase\IntegrationTestCase; +use Symfony\Component\Console\Helper\QuestionHelper; + +/** + * @group Core + */ +class DeleteLogsDataTest extends ConsoleCommandTestCase +{ + /** + * @var ManySitesImportedLogs + */ + public static $fixture; + + /** + * @dataProvider getTestDataForInvalidDateRangeTest + */ + public function test_Command_Fails_WhenInvalidDateRangeSupplied($dateRange) + { + $this->setCommandInput('N'); + + $result = $this->applicationTester->run(array( + 'command' => 'core:delete-logs-data', + '--dates' => $dateRange, + '--idsite' => self::$fixture->idSite, + '-vvv' => true + )); + + $this->assertNotEquals(0, $result, $this->getCommandDisplayOutputErrorMessage()); + $this->assertContains('Invalid date range supplied', $this->applicationTester->getDisplay()); + } + + public function getTestDataForInvalidDateRangeTest() + { + return array( + array('completegarbage'), + array('2012-01-01,garbage'), + array('garbage,2012-01-01'), + array('2012-02-01,2012-01-01'), // first date is older than the last date + array(',') + ); + } + + public function test_Command_Fails_WhenInvalidSiteIdSupplied() + { + $this->setCommandInput('N'); + + $result = $this->applicationTester->run(array( + 'command' => 'core:delete-logs-data', + '--dates' => '2012-01-01,2012-01-02', + '--idsite' => 43, + '-vvv' => true + )); + + $this->assertNotEquals(0, $result, $this->getCommandDisplayOutputErrorMessage()); + $this->assertContains('Invalid site ID', $this->applicationTester->getDisplay()); + } + + /** + * @dataProvider getTestDataForInvalidIterationStepTest + */ + public function test_Command_Fails_WhenInvalidIterationStepSupplied($limit) + { + $this->setCommandInput('N'); + + $result = $this->applicationTester->run(array( + 'command' => 'core:delete-logs-data', + '--dates' => '2012-01-01,2012-01-02', + '--idsite' => self::$fixture->idSite, + '--limit' => $limit, + '-vvv' => true + )); + + $this->assertNotEquals(0, $result, $this->getCommandDisplayOutputErrorMessage()); + $this->assertContains('Invalid row limit supplied', $this->applicationTester->getDisplay()); + } + + public function getTestDataForInvalidIterationStepTest() + { + return array( + array(0), + array(-45) + ); + } + + public function test_Command_SkipsLogDeletionIfUserDoesNotConfirm() + { + $this->setCommandInput('N'); + + $dateRange = '2012-08-09,2012-08-11'; + $this->assertVisitsFoundInLogs($dateRange); + + $result = $this->applicationTester->run(array( + 'command' => 'core:delete-logs-data', + '--dates' => $dateRange, + '--idsite' => self::$fixture->idSite, + '-vvv' => true + )); + + $this->assertEquals(0, $result, $this->getCommandDisplayOutputErrorMessage()); + $this->assertNotRegExp("/Successfully deleted [0-9]+ rows from all log tables/", $this->applicationTester->getDisplay()); + } + + public function test_Command_CorrectlyDeletesRequestedLogFiles() + { + $this->setCommandInput('Y'); + + $dateRange = '2012-08-09,2012-08-11'; + $this->assertVisitsFoundInLogs($dateRange); + + $options = array('interactive' => true); + $result = $this->applicationTester->run(array( + 'command' => 'core:delete-logs-data', + '--dates' => $dateRange, + '--idsite' => self::$fixture->idSite, + '-vvv' => true + ), $options); + + $this->assertEquals(0, $result, $this->getCommandDisplayOutputErrorMessage()); + $this->assertContains("Successfully deleted 15 visits", $this->applicationTester->getDisplay()); + } + + private function setCommandInput($value) + { + /** @var QuestionHelper $dialog */ + $dialog = $this->application->getHelperSet()->get('question'); + $dialog->setInputStream($this->getInputStream("$value\n")); + } + + protected function assertVisitsFoundInLogs($dateRange) + { + list($from, $to) = explode(",", $dateRange); + + /** @var RawLogDao $dao */ + $dao = StaticContainer::get('Piwik\DataAccess\RawLogDao'); + $this->assertNotEmpty($dao->countVisitsWithDatesLimit($from, $to)); + } +} + +DeleteLogsDataTest::$fixture = new ManySitesImportedLogs();
\ No newline at end of file diff --git a/plugins/PrivacyManager/LogDataPurger.php b/plugins/PrivacyManager/LogDataPurger.php index 21421f11d2..937d421672 100755 --- a/plugins/PrivacyManager/LogDataPurger.php +++ b/plugins/PrivacyManager/LogDataPurger.php @@ -9,34 +9,43 @@ namespace Piwik\Plugins\PrivacyManager; use Piwik\Common; +use Piwik\DataAccess\RawLogDao; use Piwik\Date; use Piwik\Db; use Piwik\Log; -use Piwik\Piwik; +use Piwik\LogDeleter; /** * Purges the log_visit, log_conversion and related tables of old visit data. */ class LogDataPurger { - const TEMP_TABLE_NAME = 'tmp_log_actions_to_keep'; - /** * The max set of rows each table scan select should query at one time. */ public static $selectSegmentSize = 100000; /** - * @param DimensionMetadataProvider + * LogDeleter service used to delete visits. + * + * @var LogDeleter + */ + private $logDeleter; + + /** + * DAO class that is used to delete unused actions. + * + * @var RawLogDao */ - private $dimensionMetadataProvider; + private $rawLogDao; /** * Constructor. */ - public function __construct(DimensionMetadataProvider $dimensionMetadataProvider) + public function __construct(LogDeleter $logPurger, RawLogDao $rawLogDao) { - $this->dimensionMetadataProvider = $dimensionMetadataProvider; + $this->logPurger = $logPurger; + $this->rawLogDao = $rawLogDao; } /** @@ -50,32 +59,17 @@ class LogDataPurger * @param int $deleteLogsOlderThan The number of days after which log entires are considered old. * Visits and related data whose age is greater than this number * will be purged. - * @param int $maxRowsToDeletePerQuery The maximum number of rows to delete in one query. Used to - * make sure log tables aren't locked for too long. */ - public function purgeData($deleteLogsOlderThan, $maxRowsToDeletePerQuery) + public function purgeData($deleteLogsOlderThan) { - $maxIdVisit = $this->getDeleteIdVisitOffset($deleteLogsOlderThan); - - // break if no ID was found (nothing to delete for given period) - if (empty($maxIdVisit)) { - return; - } + $dateUpperLimit = Date::factory("today")->subDay($deleteLogsOlderThan); + $this->logPurger->deleteVisitsFor($start = null, $dateUpperLimit->getDatetime()); $logTables = self::getDeleteTableLogTables(); - // delete data from log tables - $where = "WHERE idvisit <= ?"; - foreach ($logTables as $logTable) { - // deleting from log_action must be handled differently, so we do it later - if ($logTable != Common::prefixTable('log_action')) { - Db::deleteAllRows($logTable, $where, "idvisit ASC", $maxRowsToDeletePerQuery, array($maxIdVisit)); - } - } - // delete unused actions from the log_action table (but only if we can lock tables) if (Db::isLockPrivilegeGranted()) { - $this->purgeUnusedLogActions(); + $this->rawLogDao->deleteUnusedLogActions(); } else { $logMessage = get_class($this) . ": LOCK TABLES privilege not granted; skipping unused actions purge"; Log::warning($logMessage); @@ -95,6 +89,9 @@ class LogDataPurger * Visits and related data whose age is greater than this number * will be purged. * @return array + * + * TODO: purge estimate uses max idvisit w/ time, but purge does not, so estimate may be less accurate. + * to be more accurate, it should use the same strategy as purgeData(), but this could be very slow. */ public function getPurgeEstimate($deleteLogsOlderThan) { @@ -118,29 +115,6 @@ class LogDataPurger } /** - * Safely delete all unused log_action rows. - */ - private function purgeUnusedLogActions() - { - $this->createTempTable(); - - // get current max ID in log tables w/ idaction references. - $maxIds = $this->getMaxIdsInLogTables(); - - // do large insert (inserting everything before maxIds) w/o locking tables... - $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = true); - - // ... then do small insert w/ locked tables to minimize the amount of time tables are locked. - $this->lockLogTables(); - $this->insertActionsToKeep($maxIds, $deleteOlderThanMax = false); - - // delete before unlocking tables so there's no chance a new log row that references an - // unused action will be inserted. - $this->deleteUnusedActions(); - Db::unlockAllTables(); - } - - /** * get highest idVisit to delete rows from * @return string */ @@ -173,101 +147,6 @@ class LogDataPurger return (int) Db::fetchOne($sql, array($maxIdVisit)); } - private function createTempTable() - { - $sql = "CREATE TEMPORARY TABLE " . Common::prefixTable(self::TEMP_TABLE_NAME) . " ( - idaction INT(11), - PRIMARY KEY (idaction) - )"; - Db::query($sql); - } - - private function getMaxIdsInLogTables() - { - $tables = array('log_conversion', 'log_link_visit_action', 'log_visit', 'log_conversion_item'); - $idColumns = $this->getTableIdColumns(); - - $result = array(); - foreach ($tables as $table) { - $idCol = $idColumns[$table]; - $result[$table] = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); - } - - return $result; - } - - private function insertActionsToKeep($maxIds, $olderThan = true) - { - $tempTableName = Common::prefixTable(self::TEMP_TABLE_NAME); - - $idColumns = $this->getTableIdColumns(); - $idActionColumnsByTable = $this->dimensionMetadataProvider->getActionReferenceColumnsByTable(); - foreach ($idActionColumnsByTable as $table => $columns) { - $idCol = $idColumns[$table]; - - foreach ($columns as $col) { - $select = "SELECT $col FROM " . Common::prefixTable($table) . " WHERE $idCol >= ? AND $idCol < ?"; - $sql = "INSERT IGNORE INTO $tempTableName $select"; - - if ($olderThan) { - $start = 0; - $finish = $maxIds[$table]; - } else { - $start = $maxIds[$table]; - $finish = Db::fetchOne("SELECT MAX($idCol) FROM " . Common::prefixTable($table)); - } - - Db::segmentedQuery($sql, $start, $finish, self::$selectSegmentSize); - } - } - - // allow code to be executed after data is inserted. for concurrency testing purposes. - if ($olderThan) { - /** - * @ignore - */ - Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.olderThan"); - } else { - /** - * @ignore - */ - Piwik::postEvent("LogDataPurger.ActionsToKeepInserted.newerThan"); - } - } - - private function lockLogTables() - { - Db::lockTables( - $readLocks = Common::prefixTables('log_conversion', - 'log_link_visit_action', - 'log_visit', - 'log_conversion_item'), - $writeLocks = Common::prefixTables('log_action') - ); - } - - private function deleteUnusedActions() - { - list($logActionTable, $tempTableName) = Common::prefixTables("log_action", self::TEMP_TABLE_NAME); - - $deleteSql = "DELETE LOW_PRIORITY QUICK IGNORE $logActionTable - FROM $logActionTable - LEFT JOIN $tempTableName tmp ON tmp.idaction = $logActionTable.idaction - WHERE tmp.idaction IS NULL"; - - Db::query($deleteSql); - } - - private function getTableIdColumns() - { - return array( - 'log_link_visit_action' => 'idlink_va', - 'log_conversion' => 'idvisit', - 'log_visit' => 'idvisit', - 'log_conversion_item' => 'idvisit' - ); - } - // let's hardcode, since these are not dynamically created tables public static function getDeleteTableLogTables() { diff --git a/plugins/PrivacyManager/PrivacyManager.php b/plugins/PrivacyManager/PrivacyManager.php index 775ba48af7..b8ff45e480 100644 --- a/plugins/PrivacyManager/PrivacyManager.php +++ b/plugins/PrivacyManager/PrivacyManager.php @@ -326,7 +326,7 @@ class PrivacyManager extends Plugin // execute the purge /** @var LogDataPurger $logDataPurger */ $logDataPurger = StaticContainer::get('Piwik\Plugins\PrivacyManager\LogDataPurger'); - $logDataPurger->purgeData($settings['delete_logs_older_than'], $settings['delete_logs_max_rows_per_query']); + $logDataPurger->purgeData($settings['delete_logs_older_than']); return true; } diff --git a/tests/PHPUnit/System/PrivacyManagerTest.php b/plugins/PrivacyManager/tests/Integration/DataPurgingTest.php index bf0a6fcce1..1f79d34b82 100644 --- a/tests/PHPUnit/System/PrivacyManagerTest.php +++ b/plugins/PrivacyManager/tests/Integration/DataPurgingTest.php @@ -5,35 +5,53 @@ * @link http://piwik.org * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later */ -namespace Piwik\Tests\System; +namespace Piwik\Plugins\PrivacyManager\tests\Integration; use Piwik\Archive; use Piwik\Common; use Piwik\Config; -use Piwik\DataAccess\ArchiveTableCreator; -use Piwik\DataTable\Manager; +use Piwik\DataAccess\RawLogDao; use Piwik\Date; use Piwik\Db; +use Piwik\LogDeleter; +use Piwik\DbHelper; use Piwik\Option; use Piwik\Plugins\Goals\API as APIGoals; use Piwik\Plugins\Goals\Archiver; -use Piwik\Plugins\PrivacyManager\DimensionMetadataProvider; +use Piwik\Plugin\Dimension\DimensionMetadataProvider; use Piwik\Plugins\PrivacyManager\LogDataPurger; use Piwik\Plugins\PrivacyManager\PrivacyManager; use Piwik\Plugins\PrivacyManager\ReportsPurger; use Piwik\Plugins\VisitorInterest\API as APIVisitorInterest; -use Piwik\Site; use Piwik\Tests\Framework\TestCase\IntegrationTestCase; -use Piwik\Tracker\Cache; use Piwik\Tracker\GoalManager; use Piwik\Tests\Framework\Fixture; -use Piwik\Translate; + +class DataPurgingTest_RawLogDao extends RawLogDao +{ + public $insertActionsOlderThanCallback; + public $insertActionsNewerThanCallback; + + protected function insertActionsToKeep($maxIds, $olderThan = true, $insertIntoTempIterationStep = 100000) + { + parent::insertActionsToKeep($maxIds, $olderThan, 2); // we use 2 to force iterations during tests + + // allow code to be executed after data is inserted. for concurrency testing purposes. + if ($olderThan && $this->insertActionsOlderThanCallback) { + $callback = $this->insertActionsOlderThanCallback; + $callback(); + } else if ($this->insertActionsNewerThanCallback) { + $callback = $this->insertActionsNewerThanCallback; + $callback(); + } + } +} /** - * @group PrivacyManagerTest + * @group PrivacyManager * @group Plugins */ -class PrivacyManagerTest extends IntegrationTestCase +class DataPurgingTest extends IntegrationTestCase { // constants used in checking whether numeric tables are populated correctly. // 'done' entries exist for every period, even if there's no metric data, so we need the @@ -120,7 +138,9 @@ class PrivacyManagerTest extends IntegrationTestCase public function tearDown() { - $tempTableName = Common::prefixTable(LogDataPurger::TEMP_TABLE_NAME); + parent::tearDown(); + + $tempTableName = Common::prefixTable(RawLogDao::DELETE_UNUSED_ACTIONS_TEMP_TABLE_NAME); Db::query("DROP TABLE IF EXISTS " . $tempTableName); parent::tearDown(); @@ -219,6 +239,21 @@ class PrivacyManagerTest extends IntegrationTestCase $this->assertEquals(self::FEB_METRIC_ARCHIVE_COUNT + 1, $this->_getTableCount($archiveTables['blob'][1])); // February } + public function test_LogDataPurging_WorksWhenVisitsInPastTracked() + { + DbHelper::deleteArchiveTables(); + + self::trackVisitInPast(); + self::_addReportData(); + + $this->_setTimeToRun(); + $this->assertTrue( $this->instance->deleteLogData() ); + + $this->checkLogDataPurged(); + + // NOTE: it is not expected that the data purging estimate will work when visits in the past are tracked + } + /** * Make sure nothing happens when deleting logs & reports are both disabled. */ @@ -251,10 +286,8 @@ class PrivacyManagerTest extends IntegrationTestCase */ public function testPurgeDataDeleteLogsNoData() { - \Piwik\DbHelper::truncateAllTables(); - foreach (ArchiveTableCreator::getTablesArchivesInstalled() as $table) { - Db::exec("DROP TABLE $table"); - } + DbHelper::truncateAllTables(); + DbHelper::deleteArchiveTables(); // get purge data prediction $prediction = PrivacyManager::getPurgeEstimate(); @@ -482,9 +515,9 @@ class PrivacyManagerTest extends IntegrationTestCase */ public function testPurgeLogDataConcurrency() { - \Piwik\Piwik::addAction("LogDataPurger.ActionsToKeepInserted.olderThan", array($this, 'addReferenceToUnusedAction')); - - $purger = new LogDataPurger(new DimensionMetadataProvider()); + $rawLogDao = new DataPurgingTest_RawLogDao(new DimensionMetadataProvider()); + $rawLogDao->insertActionsOlderThanCallback = array($this, 'addReferenceToUnusedAction'); + $purger = new LogDataPurger(new LogDeleter($rawLogDao), $rawLogDao); $this->unusedIdAction = Db::fetchOne( "SELECT idaction FROM " . Common::prefixTable('log_action') . " WHERE name = ?", @@ -649,11 +682,25 @@ class PrivacyManagerTest extends IntegrationTestCase Fixture::checkBulkTrackingResponse($t->doBulkTrack()); } + protected static function trackVisitInPast() + { + $start = Date::factory(self::$dateTime); + + // add a visit in the past so the idvisit will be greater than the others, but the time will be older + // this tests issue #7180 + $t = Fixture::getTracker(self::$idSite, $start, $defaultInit = true); + // we subtract 5 so it will be on the same day as another visit. this way, we won't create another day archive + // and change the counts in asserts + $t->setForceVisitDateTime($start->subDay(self::$daysAgoStart - 5)); + $t->setUrl("http://whatever.com/days_in_past"); + $t->doTrackPageView('visit in past'); + } + protected static function _addReportData() { $date = Date::factory(self::$dateTime); - $archive = Archive::build(self::$idSite, 'year', $date); + Archive::build(self::$idSite, 'year', $date); APIVisitorInterest::getInstance()->getNumberOfVisitsPerVisitDuration(self::$idSite, 'year', $date); @@ -748,7 +795,6 @@ class PrivacyManagerTest extends IntegrationTestCase $this->assertEquals(45, $this->_getTableCount('log_action')); $archiveTables = self::_getArchiveTableNames(); - //var_export(Db::fetchAll("SELECT * FROM " . Common::prefixTable($archiveTables['numeric'][0]))); $janMetricCount = $this->_getExpectedNumericArchiveCountJan(); $this->assertEquals($janMetricCount, $this->_getTableCount($archiveTables['numeric'][0])); // January diff --git a/plugins/UserCountry/Commands/AttributeHistoricalDataWithLocations.php b/plugins/UserCountry/Commands/AttributeHistoricalDataWithLocations.php index 42b3750dcd..5e617f53e6 100644 --- a/plugins/UserCountry/Commands/AttributeHistoricalDataWithLocations.php +++ b/plugins/UserCountry/Commands/AttributeHistoricalDataWithLocations.php @@ -115,26 +115,10 @@ class AttributeHistoricalDataWithLocations extends ConsoleCommand protected function processSpecifiedLogsInChunks(OutputInterface $output, $from, $to, $segmentLimit) { - $visitFieldsToSelect = array_merge(array('idvisit', 'location_ip'), array_keys(VisitorGeolocator::$logVisitFieldsToUpdate)); - - $lastId = 0; - do { - $logs = $this->dao->getVisitsWithDatesLimit($from, $to, $visitFieldsToSelect, $lastId, $segmentLimit); - if (!empty($logs)) { - $lastId = $logs[count($logs) - 1]['idvisit']; - - $this->reattributeVisitLogs($output, $logs); - } - } while (count($logs) == $segmentLimit); - } - - protected function reattributeVisitLogs(OutputInterface $output, $logRows) - { - foreach ($logRows as $row) { - $this->visitorGeolocator->attributeExistingVisit($row); - - $this->onVisitProcessed($output); - } + $self = $this; + $this->visitorGeolocator->reattributeVisitLogs($from, $to, $idSite = null, $segmentLimit, function () use ($output, $self) { + $self->onVisitProcessed($output); + }); } /** diff --git a/plugins/UserCountry/VisitorGeolocator.php b/plugins/UserCountry/VisitorGeolocator.php index be960c02f9..1c517075a1 100644 --- a/plugins/UserCountry/VisitorGeolocator.php +++ b/plugins/UserCountry/VisitorGeolocator.php @@ -234,6 +234,41 @@ class VisitorGeolocator } /** + * Re-geolocate visits within a date range for a specified site (if any). + * + * @param string $from A datetime string to treat as the lower bound. Visits newer than this date are processed. + * @param string $to A datetime string to treat as the upper bound. Visits older than this date are processed. + * @param int|null $idSite If supplied, only visits for this site are re-attributed. + * @param int $iterationStep The number of visits to re-attribute at the same time. + * @param callable|null $onLogProcessed If supplied, this callback is called after every row is processed. + * The processed visit and the updated values are passed to the callback. + */ + public function reattributeVisitLogs($from, $to, $idSite = null, $iterationStep = 1000, $onLogProcessed = null) + { + $visitFieldsToSelect = array_merge(array('idvisit', 'location_ip'), array_keys(VisitorGeolocator::$logVisitFieldsToUpdate)); + + $conditions = array( + array('visit_last_action_time', '>=', $from), + array('visit_last_action_time', '<', $to) + ); + + if (!empty($idSite)) { + $conditions[] = array('idsite', '=', $idSite); + } + + $self = $this; + $this->dao->forAllLogs('log_visit', $visitFieldsToSelect, $conditions, $iterationStep, function ($logs) use ($self, $onLogProcessed) { + foreach ($logs as $row) { + $updatedValues = $self->attributeExistingVisit($row); + + if (!empty($onLogProcessed)) { + $onLogProcessed($row, $updatedValues); + } + } + }); + } + + /** * @return LocationProvider */ public function getProvider() diff --git a/plugins/UserCountry/tests/Integration/VisitorGeolocatorTest.php b/plugins/UserCountry/tests/Integration/VisitorGeolocatorTest.php index 9c414a5708..ef1be64b1a 100644 --- a/plugins/UserCountry/tests/Integration/VisitorGeolocatorTest.php +++ b/plugins/UserCountry/tests/Integration/VisitorGeolocatorTest.php @@ -15,6 +15,7 @@ use Piwik\Network\IPUtils; use Piwik\Plugins\UserCountry\VisitorGeolocator; use Piwik\Plugins\UserCountry\LocationProvider; use Piwik\Tests\Framework\TestCase\IntegrationTestCase; +use Piwik\Tests\Framework\TestDataHelper\LogHelper; use Piwik\Tracker\Cache; use Piwik\Tracker\Visit; use Piwik\Tests\Framework\Mock\LocationProvider as MockLocationProvider; @@ -28,6 +29,18 @@ class VisitorGeolocatorTest extends IntegrationTestCase { const TEST_IP = '1.2.3.4'; + /** + * @var LogHelper + */ + private $logInserter; + + public function setUp() + { + parent::setUp(); + + $this->logInserter = new LogHelper(); + } + public function test_getLocation_shouldReturnLocationForProvider_IfLocationIsSetForCurrentProvider() { $location = array( @@ -218,7 +231,7 @@ class VisitorGeolocatorTest extends IntegrationTestCase $geolocator = new VisitorGeolocator($mockLocationProvider); $valuesUpdated = $geolocator->attributeExistingVisit($visit, $useCache = false); - $this->assertEquals($expectedVisitProperties, $this->getVisit($visit['idvisit']), $message = '', $delta = 0.001); + $this->assertEquals($expectedVisitProperties, $this->logInserter->getVisit($visit['idvisit']), $message = '', $delta = 0.001); $expectedUpdateValues = $expectedUpdateValues === null ? $expectedVisitProperties : $expectedUpdateValues; $this->assertEquals($expectedUpdateValues, $valuesUpdated, $message = '', $delta = 0.001); @@ -247,6 +260,99 @@ class VisitorGeolocatorTest extends IntegrationTestCase $this->assertNull($result); } + public function test_reattributeVisitLogs_ReattributesVisitsInDateRangeAndFromSite_AndCallsCallbackWithEveryProcessedRow() + { + foreach (array(1, 2) as $idSite) { + foreach (array('2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04') as $date) { + $this->insertVisit(array( + 'visit_last_action_time' => $date, + 'idsite' => $idSite + )); + } + } + + $mockLocationProvider = $this->getProviderMockThatGeolocates(array( + 'location_country' => 'US', + 'location_region' => 'rg', + 'location_city' => 'the city' + )); + $geolocator = new VisitorGeolocator($mockLocationProvider); + + $reattributedVisits = array(); + $geolocator->reattributeVisitLogs('2012-01-02', '2012-01-04', 2, $segmentLimit = 1000, function ($row) use (&$reattributedVisits) { + $reattributedVisits[] = $row['idvisit']; + }); + + sort($reattributedVisits); + + $expectedVisitsVisited = array(6, 7); + $this->assertEquals($expectedVisitsVisited, $reattributedVisits); + + // check that no visits were re-attributed for site 1 + $actualVisits = Db::fetchAll("SELECT visit_last_action_time, idsite, location_country, location_region, location_city FROM " + . Common::prefixTable('log_visit') . " ORDER BY idsite ASC, visit_last_action_time ASC"); + $expectedVisits = array( + array( + 'visit_last_action_time' => '2012-01-01 00:00:00', + 'idsite' => '1', + 'location_country' => 'xx', + 'location_region' => null, + 'location_city' => null + ), + array( + 'visit_last_action_time' => '2012-01-02 00:00:00', + 'idsite' => '1', + 'location_country' => 'xx', + 'location_region' => null, + 'location_city' => null + ), + array( + 'visit_last_action_time' => '2012-01-03 00:00:00', + 'idsite' => '1', + 'location_country' => 'xx', + 'location_region' => null, + 'location_city' => null + ), + array( + 'visit_last_action_time' => '2012-01-04 00:00:00', + 'idsite' => '1', + 'location_country' => 'xx', + 'location_region' => null, + 'location_city' => null + ), + array( + 'visit_last_action_time' => '2012-01-01 00:00:00', + 'idsite' => '2', + 'location_country' => 'xx', + 'location_region' => null, + 'location_city' => null + ), + array( + 'visit_last_action_time' => '2012-01-02 00:00:00', + 'idsite' => '2', + 'location_country' => 'us', + 'location_region' => 'rg', + 'location_city' => 'the city' + ), + array( + 'visit_last_action_time' => '2012-01-03 00:00:00', + 'idsite' => '2', + 'location_country' => 'us', + 'location_region' => 'rg', + 'location_city' => 'the city' + ), + array( + 'visit_last_action_time' => '2012-01-04 00:00:00', + 'idsite' => '2', + 'location_country' => 'xx', + 'location_region' => null, + 'location_city' => null + ), + ); + + $this->assertEquals($expectedVisits, $actualVisits); + } + /** * @return PHPUnit_Framework_MockObject_MockObject|LocationProvider */ @@ -261,7 +367,7 @@ class VisitorGeolocatorTest extends IntegrationTestCase private function getProviderMockThatGeolocates($locationResult) { $mock = $this->getProviderMock(); - $mock->expects($this->once())->method('getLocation')->will($this->returnCallback(function ($info) use ($locationResult) { + $mock->expects($this->any())->method('getLocation')->will($this->returnCallback(function ($info) use ($locationResult) { if ($info['ip'] == VisitorGeolocatorTest::TEST_IP) { return $locationResult; } else { @@ -274,76 +380,17 @@ class VisitorGeolocatorTest extends IntegrationTestCase private function insertVisit($visit = array()) { $defaultProperties = array( - 'idsite' => 1, - 'idvisitor' => Common::hex2bin('ea95f303f2165aa0'), - 'visit_last_action_time' => '2012-01-01 00:00:00', - 'config_id' => Common::hex2bin('ea95f303f2165aa0'), - 'location_ip' => IPUtils::stringToBinaryIP(self::TEST_IP), - 'visitor_localtime' => '2012-01-01 00:00:00', - 'location_country' => 'xx', - 'config_os' => 'xxx', - 'visit_total_events' => 0, - 'visitor_days_since_last' => 0, - 'config_quicktime' => 0, - 'config_pdf' => 0, - 'config_realplayer' => 0, - 'config_silverlight' => 0, - 'config_windowsmedia' => 0, - 'config_java' => 0, - 'config_gears' => 0, - 'config_resolution' => 0, - 'config_resolution' => '', - 'config_cookie' => 0, - 'config_director' => 0, - 'config_flash' => 0, - 'config_browser_version' => '', - 'visitor_count_visits' => 1, - 'visitor_returning' => 0, - 'visit_total_time' => 123, - 'visit_entry_idaction_name' => 0, - 'visit_entry_idaction_url' => 0, - 'visitor_days_since_order' => 0, - 'visitor_days_since_first' => 0, - 'visit_first_action_time' => '2012-01-01 00:00:00', - 'visit_goal_buyer' => 0, - 'visit_goal_converted' => 0, - 'visit_exit_idaction_name' => 0, - 'referer_url' => '', - 'location_browser_lang' => 'xx', - 'config_browser_engine' => '', - 'config_browser_name' => '', - 'referer_type' => 0, - 'referer_name' => '', - 'visit_total_actions' => 0, - 'visit_total_searches' => 0 + 'location_ip' => IPUtils::stringToBinaryIP(self::TEST_IP) ); - $visit = array_merge($defaultProperties, $visit); - - $this->insertInto('log_visit', $visit); - - $idVisit = Db::fetchOne("SELECT LAST_INSERT_ID()"); - return $this->getVisit($idVisit, $allColumns = true); - } - - private function getVisit($idVisit, $allColumns = false) - { - $columns = $allColumns ? "*" : "location_country, location_region, location_city, location_latitude, location_longitude"; - $visit = Db::fetchRow("SELECT $columns FROM " . Common::prefixTable('log_visit') . " WHERE idvisit = ?", array($idVisit)); - - return $visit; + return $this->logInserter->insertVisit(array_merge($defaultProperties, $visit)); } private function insertTwoConversions($visit) { $conversionProperties = array( - 'idvisit' => $visit['idvisit'], 'idsite' => $visit['idsite'], 'idvisitor' => $visit['idvisitor'], - 'server_time' => '2012-01-01 00:00:00', - 'idgoal' => 1, - 'buster' => 1, - 'url' => '', 'location_longitude' => $visit['location_longitude'], 'location_latitude' => $visit['location_latitude'], 'location_region' => $visit['location_region'], @@ -351,23 +398,12 @@ class VisitorGeolocatorTest extends IntegrationTestCase 'location_city' => $visit['location_city'], 'visitor_count_visits' => $visit['visitor_count_visits'], 'visitor_returning' => $visit['visitor_returning'], - 'visitor_days_since_order' => 0, - 'visitor_days_since_first' => 0 ); - $this->insertInto('log_conversion', $conversionProperties); + $this->logInserter->insertConversion($visit['idvisit'], $conversionProperties); $conversionProperties['buster'] = 2; - $this->insertInto('log_conversion', $conversionProperties); - } - - private function insertInto($table, $row) - { - $columns = implode(', ', array_keys($row)); - $columnsPlaceholders = Common::getSqlStringFieldsArray($row); - $values = array_values($row); - - Db::query("INSERT INTO " . Common::prefixTable($table) . " ($columns) VALUES ($columnsPlaceholders)", $values); + $this->logInserter->insertConversion($visit['idvisit'], $conversionProperties); } private function getConversions($visit) diff --git a/tests/PHPUnit/Framework/Fixture.php b/tests/PHPUnit/Framework/Fixture.php index 0cbd4e9394..f60a43e0c8 100644 --- a/tests/PHPUnit/Framework/Fixture.php +++ b/tests/PHPUnit/Framework/Fixture.php @@ -716,9 +716,16 @@ class Fixture extends \PHPUnit_Framework_Assert $outfileName = $deflatedOut . '.gz'; if (file_exists($deflatedOut)) { + $filesize = filesize($deflatedOut); + if($filesize == 0) { + throw new Exception("The file $deflatedOut is empty. Suggestion: delete it and try again."); + } + + // Valid geoip db found return; } + Log::warning("Geoip database $outfileName is not found. Downloading from $url..."); $dump = fopen($url, 'rb'); $outfile = fopen($outfileName, 'wb'); if(!$outfile) { diff --git a/tests/PHPUnit/Framework/TestCase/ConsoleCommandTestCase.php b/tests/PHPUnit/Framework/TestCase/ConsoleCommandTestCase.php index 2da07556e1..31452e4651 100644 --- a/tests/PHPUnit/Framework/TestCase/ConsoleCommandTestCase.php +++ b/tests/PHPUnit/Framework/TestCase/ConsoleCommandTestCase.php @@ -10,9 +10,33 @@ namespace Piwik\Tests\Framework\TestCase; use Piwik\Console; use Symfony\Component\Console\Application; +use Symfony\Component\Console\Input\InputInterface; +use Symfony\Component\Console\Output\OutputInterface; use Symfony\Component\Console\Tester\ApplicationTester; /** + * This class is used to workaround a Symfony issue. For tests that need to test interactivity, + * we need to create a memory stream, set it as the input stream, and force symfony to think it + * is truly interactive. The forcing is done by ApplicationTester. Unfortunately, the Application::configureIO + * method will reverse this change if the `posix_isatty` method exists. It will call it on the stream, + * and since the stream will never be an actual tty, the interactivity will be overwritten. + * + * This class gets whether the input is interactive before configureIO is called, and restores + * it after the method is called. + */ +class TestConsole extends Console +{ + protected function configureIO(InputInterface $input, OutputInterface $output) + { + $isInteractive = $input->isInteractive(); + + parent::configureIO($input, $output); + + $input->setInteractive($isInteractive); + } +} + +/** * Base class for test cases that test Piwik console commands. Derives from SystemTestCase * so the entire Piwik environment is set up. * @@ -51,7 +75,7 @@ class ConsoleCommandTestCase extends SystemTestCase { parent::setUp(); - $this->application = new Console(); + $this->application = new TestConsole(); $this->application->setAutoExit(false); $this->applicationTester = new ApplicationTester($this->application); diff --git a/tests/PHPUnit/Framework/TestCase/IntegrationTestCase.php b/tests/PHPUnit/Framework/TestCase/IntegrationTestCase.php index 9f2c5eb688..1dafb3580b 100644 --- a/tests/PHPUnit/Framework/TestCase/IntegrationTestCase.php +++ b/tests/PHPUnit/Framework/TestCase/IntegrationTestCase.php @@ -13,7 +13,6 @@ use Piwik\Config; use Piwik\Db; use Piwik\Tests\Framework\Fixture; use Piwik\Cache as PiwikCache; -use Piwik\Tests\Framework\Mock\TestConfig; /** * Tests extending IntegrationTestCase are much slower to run: the setUp will diff --git a/tests/PHPUnit/Framework/TestDataHelper/LogHelper.php b/tests/PHPUnit/Framework/TestDataHelper/LogHelper.php new file mode 100644 index 0000000000..b55dfae1a1 --- /dev/null +++ b/tests/PHPUnit/Framework/TestDataHelper/LogHelper.php @@ -0,0 +1,160 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + */ + +namespace Piwik\Tests\Framework\TestDataHelper; + +use Piwik\Common; +use Piwik\Network\IPUtils; +use Piwik\Db; + +/** + * Test helper that inserts rows into log tables. Defines defaults for all non null columns so + * developers can specify as little as needed. + */ +class LogHelper +{ + public function insertVisit($visit = array()) + { + $defaultProperties = array( + 'idsite' => 1, + 'idvisitor' => $this->getDummyVisitorId(), + 'visit_last_action_time' => '2012-01-01 00:00:00', + 'config_id' => $this->getDummyVisitorId(), + 'location_ip' => IPUtils::stringToBinaryIP('1.2.3.4'), + 'visitor_localtime' => '2012-01-01 00:00:00', + 'location_country' => 'xx', + 'config_os' => 'xxx', + 'visit_total_events' => 0, + 'visitor_days_since_last' => 0, + 'config_quicktime' => 0, + 'config_pdf' => 0, + 'config_realplayer' => 0, + 'config_silverlight' => 0, + 'config_windowsmedia' => 0, + 'config_java' => 0, + 'config_gears' => 0, + 'config_resolution' => 0, + 'config_resolution' => '', + 'config_cookie' => 0, + 'config_director' => 0, + 'config_flash' => 0, + 'config_browser_version' => '', + 'visitor_count_visits' => 1, + 'visitor_returning' => 0, + 'visit_total_time' => 123, + 'visit_entry_idaction_name' => 0, + 'visit_entry_idaction_url' => 0, + 'visitor_days_since_order' => 0, + 'visitor_days_since_first' => 0, + 'visit_first_action_time' => '2012-01-01 00:00:00', + 'visit_goal_buyer' => 0, + 'visit_goal_converted' => 0, + 'visit_exit_idaction_name' => 0, + 'referer_url' => '', + 'location_browser_lang' => 'xx', + 'config_browser_engine' => '', + 'config_browser_name' => '', + 'referer_type' => 0, + 'referer_name' => '', + 'visit_total_actions' => 0, + 'visit_total_searches' => 0 + ); + + $visit = array_merge($defaultProperties, $visit); + + $this->insertInto('log_visit', $visit); + + $idVisit = Db::fetchOne("SELECT LAST_INSERT_ID()"); + return $this->getVisit($idVisit, $allColumns = true); + } + + private function insertInto($table, $row) + { + $columns = implode(', ', array_keys($row)); + $columnsPlaceholders = Common::getSqlStringFieldsArray($row); + $values = array_values($row); + + Db::query("INSERT INTO " . Common::prefixTable($table) . " ($columns) VALUES ($columnsPlaceholders)", $values); + } + + public function getVisit($idVisit, $allColumns = false) + { + $columns = $allColumns ? "*" : "location_country, location_region, location_city, location_latitude, location_longitude"; + $visit = Db::fetchRow("SELECT $columns FROM " . Common::prefixTable('log_visit') . " WHERE idvisit = ?", array($idVisit)); + + return $visit; + } + + public function insertConversion($idVisit, $properties = array()) + { + $defaultProperties = array( + 'idvisit' => $idVisit, + 'idsite' => 1, + 'idvisitor' => $this->getDummyVisitorId(), + 'server_time' => '2012-01-01 00:00:00', + 'idgoal' => 1, + 'buster' => 1, + 'url' => '', + 'location_country' => 'xx', + 'visitor_count_visits' => 0, + 'visitor_returning' => 0, + 'visitor_days_since_order' => 0, + 'visitor_days_since_first' => 0 + ); + + $properties = array_merge($defaultProperties, $properties); + + $this->insertInto('log_conversion', $properties); + } + + private function getDummyVisitorId() + { + return Common::hex2bin('ea95f303f2165aa0'); + } + + public function insertVisitAction($idVisit, $properties = array()) + { + $defaultProperties = array( + 'idsite' => 1, + 'idvisitor' => $this->getDummyVisitorId(), + 'idvisit' => $idVisit, + 'idaction_name_ref' => 1, + 'server_time' => '2012-01-01 00:00:00', + 'time_spent_ref_action' => 1 + ); + + $properties = array_merge($defaultProperties, $properties); + + $this->insertInto('log_link_visit_action', $properties); + } + + public function insertConversionItem($idVisit, $idOrder, $properties = array()) + { + $defaultProperties = array( + 'idsite' => 1, + 'idvisitor' => $this->getDummyVisitorId(), + 'server_time' => '2012-01-01 00:00:00', + 'idvisit' => $idVisit, + 'idorder' => $idOrder, + 'idaction_sku' => 1, + 'idaction_name' => 2, + 'idaction_category' => 3, + 'idaction_category2' => 4, + 'idaction_category3' => 5, + 'idaction_category4' => 6, + 'idaction_category5' => 7, + 'price' => 40, + 'quantity' => 4, + 'deleted' => 0 + ); + + $properties = array_merge($defaultProperties, $properties); + + $this->insertInto('log_conversion_item', $properties); + } +} diff --git a/tests/PHPUnit/Integration/LogDeleterTest.php b/tests/PHPUnit/Integration/LogDeleterTest.php new file mode 100644 index 0000000000..a3f0a2f841 --- /dev/null +++ b/tests/PHPUnit/Integration/LogDeleterTest.php @@ -0,0 +1,178 @@ +<?php +/** + * Piwik - free/libre analytics platform + * + * @link http://piwik.org + * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later + */ + +namespace Piwik\Tests\Integration; + +use Piwik\Common; +use Piwik\DataAccess\RawLogDao; +use Piwik\Db; +use Piwik\LogDeleter; +use Piwik\Tests\Framework\TestCase\IntegrationTestCase; +use Piwik\Tests\Framework\TestDataHelper\LogHelper; + +/** + * @group Core + */ +class LogDeleterTest extends IntegrationTestCase +{ + /** + * @var LogHelper + */ + private $logInserter; + + /** + * @var LogDeleter + */ + private $logDeleter; + + public function setUp() + { + parent::setUp(); + + $this->logDeleter = new LogDeleter(new RawLogDao()); + + $this->logInserter = new LogHelper(); + $this->insertTestData(); + } + + public function test_deleteVisits_RemovesVisitsAndOtherRelatedLogs() + { + $this->logDeleter->deleteVisits(array(2, 3)); + + $this->assertVisitExists(1); + $this->assertVisitNotExists(2); + $this->assertVisitNotExists(3); + $this->assertVisitExists(4); + } + + public function test_deleteConversions_RemovesConversionsAndConversionItems() + { + $this->logDeleter->deleteConversions(array(2, 3)); + + $this->assertConversionsNotExists(2); + $this->assertConversionsNotExists(3); + + $this->assertVisitExists(1); + $this->assertVisitExists(2, $checkAggregates = false); + $this->assertVisitExists(3, $checkAggregates = false); + $this->assertVisitExists(4); + } + + public function test_deleteConversionItems_RemovesConversionItems() + { + $this->logDeleter->deleteConversionItems(array(2, 3)); + + $this->assertConversionItemsNotExist(2); + $this->assertConversionItemsNotExist(3); + + $this->assertConversionsExists(2, $checkAggregates = false); + $this->assertConversionsExists(3, $checkAggregates = false); + + $this->assertVisitExists(1); + $this->assertVisitExists(2, $checkAggregates = false); + $this->assertVisitExists(3, $checkAggregates = false); + $this->assertVisitExists(4); + } + + public function test_deleteVisitsFor_DeletesVisitsForSpecifiedRangeAndSites_AndInvokesCallbackAfterEveryChunkIsDeleted() + { + $iterationCount = 0; + $this->logDeleter->deleteVisitsFor('2012-01-01', '2012-01-02 05:05:05', 2, $iterationStep = 1, function () use (&$iterationCount) { + ++$iterationCount; + }); + + $this->assertEquals(2, $iterationCount); + + // visits for idSite = 1 do not get deleted + $this->assertVisitExists(1); + $this->assertVisitExists(2); + + // visits for idSite = 2 do get deleted + $this->assertVisitNotExists(3); + $this->assertVisitNotExists(4); + } + + private function insertTestData() + { + // two visits for site = 1 + $this->insertVisit($idSite = 1, $dateTime = '2012-01-01 00:00:00'); + $this->insertVisit($idSite = 1, $dateTime = '2012-01-02 00:00:00'); + + // two visits for site = 2 + $this->insertVisit($idSite = 2, $dateTime = '2012-01-01 00:00:00'); + $this->insertVisit($idSite = 2, $dateTime = '2012-01-02 00:00:00'); + } + + private function insertVisit($idSite, $dateTime) + { + $visit = $this->logInserter->insertVisit(array('idsite' => $idSite, 'visit_last_action_time' => $dateTime)); + + $orderId = 'idorder_' . $visit['idvisit']; + + // insert two actions + $this->logInserter->insertVisitAction($visit['idvisit'], array('idsite' => $idSite)); + $this->logInserter->insertVisitAction($visit['idvisit'], array('idsite' => $idSite)); + + // insert two conversions + $this->logInserter->insertConversion($visit['idvisit'], array('idsite' => $idSite, 'buster' => 1)); + $this->logInserter->insertConversion($visit['idvisit'], array('idsite' => $idSite, 'buster' => 2, 'idorder' => $orderId)); + + // insert two conversion items for last conversion + $this->logInserter->insertConversionItem($visit['idvisit'], $orderId, array('idsite' => $idSite)); + $this->logInserter->insertConversionItem($visit['idvisit'], $orderId, array('idsite' => $idSite, 'idaction_sku' => 123)); + } + + private function assertVisitExists($idVisit, $checkAggregates = true) + { + $this->assertEquals(1, $this->getRowCountWithIdVisit('log_visit', $idVisit)); + $this->assertEquals(2, $this->getRowCountWithIdVisit('log_link_visit_action', $idVisit)); + + if ($checkAggregates) { + $this->assertConversionsExists($idVisit); + } + } + + private function assertConversionsExists($idVisit, $checkAggregates = true) + { + $this->assertEquals(2, $this->getRowCountWithIdVisit('log_conversion', $idVisit)); + + if ($checkAggregates) { + $this->assertConversionItemsExist($idVisit); + } + } + + private function assertConversionItemsExist($idVisit) + { + $this->assertEquals(2, $this->getRowCountWithIdVisit('log_conversion_item', $idVisit)); + } + + private function assertVisitNotExists($idVisit) + { + $this->assertEquals(0, $this->getRowCountWithIdVisit('log_visit', $idVisit)); + $this->assertEquals(0, $this->getRowCountWithIdVisit('log_link_visit_action', $idVisit)); + + $this->assertConversionsNotExists($idVisit); + } + + private function getRowCountWithIdVisit($table, $idVisit) + { + return Db::fetchOne("SELECT COUNT(*) FROM " . Common::prefixTable($table) . " WHERE idvisit = $idVisit"); + } + + private function assertConversionsNotExists($idVisit) + { + $this->assertEquals(0, $this->getRowCountWithIdVisit('log_conversion', $idVisit)); + + $this->assertConversionItemsNotExist($idVisit); + } + + private function assertConversionItemsNotExist($idVisit) + { + $this->assertEquals(0, $this->getRowCountWithIdVisit('log_conversion_item', $idVisit)); + } +}
\ No newline at end of file diff --git a/plugins/PrivacyManager/tests/Unit/DimensionMetadataProviderTest.php b/tests/PHPUnit/Unit/Plugin/Dimension/DimensionMetadataProviderTest.php index 7c015a9206..e5a7823f98 100644 --- a/plugins/PrivacyManager/tests/Unit/DimensionMetadataProviderTest.php +++ b/tests/PHPUnit/Unit/Plugin/Dimension/DimensionMetadataProviderTest.php @@ -6,9 +6,9 @@ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later */ -namespace Piwik\Plugins\PrivacyManager\tests\Unit; +namespace Piwik\Tests\Unit\Plugin\Dimension; -use Piwik\Plugins\PrivacyManager\DimensionMetadataProvider; +use Piwik\Plugin\Dimension\DimensionMetadataProvider; use Piwik\Tests\Framework\TestCase\UnitTestCase; use Piwik\Plugin\Manager as PluginManager; @@ -23,7 +23,7 @@ class DimensionMetadataProviderTest extends UnitTestCase $manager->loadPlugins(array('Events', 'Contents')); } - public function test_getActionReferenceColumnsByTable_DetectsActionReferenceDimensions_AndIncludesHardocdedColumns() + public function test_getActionReferenceColumnsByTable_DetectsActionReferenceDimensions_AndIncludesHardcodedColumns() { $dimensionMetadataProvider = new DimensionMetadataProvider(); |