Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Steur <tsteur@users.noreply.github.com>2019-09-03 12:18:33 +0300
committerdiosmosis <diosmosis@users.noreply.github.com>2019-09-03 12:18:33 +0300
commit17cc46f0a26e85ce6d3645aa60e0c1ee1611d683 (patch)
treed040247f0ece505435f576822815200c2e4f6499
parentbb145dbae3764152e0eeca1d4995a96ee82ef9bf (diff)
Faster raw log data deletion (#14844)
* Faster raw log data deletion * cannot use DB reader with this new performance feature * delete 2K visits at once instead of only 1K * fix index not defined * forgot to commit file
-rw-r--r--core/DataAccess/RawLogDao.php57
-rw-r--r--core/LogDeleter.php4
-rw-r--r--plugins/UserCountry/VisitorGeolocator.php2
3 files changed, 46 insertions, 17 deletions
diff --git a/core/DataAccess/RawLogDao.php b/core/DataAccess/RawLogDao.php
index ea4d6a82ac..a3d7f0ab5a 100644
--- a/core/DataAccess/RawLogDao.php
+++ b/core/DataAccess/RawLogDao.php
@@ -103,23 +103,41 @@ class RawLogDao
* ```
* @param int $iterationStep The number of rows to query at a time.
* @param callable $callback The callback that processes each chunk of rows.
+ * @param string $willDelete Set to true if you will make sure to delete all rows that were fetched. If you are in
+ * doubt and not sure if to set true or false, use "false". Setting it to true will
+ * enable an internal performance improvement but it can result in an endless loop if not
+ * used properly.
*/
- public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback, $useReader = false)
+ public function forAllLogs($logTable, $fields, $conditions, $iterationStep, $callback, $willDelete)
{
- $idField = $this->getIdFieldForLogTable($logTable);
- list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep);
-
$lastId = 0;
- if ($useReader) {
- $db = Db::getReader();
+
+ if ($willDelete) {
+ // we don't want to look at eg idvisit so the query will be mostly index covered as the
+ // "where idvisit > 0 ... ORDER BY idvisit ASC" will be gone... meaning we don't need to look at a huge range
+ // of visits...
+ $idField = null;
+ $bindFunction = function ($bind, $lastId) {
+ return $bind;
+ };
} else {
- $db = Db::get();
+ // when we are not deleting, we need to ensure to iterate over each visitor step by step... meaning we
+ // need to remember which visit we have already looked at and which one not. Therefore we need to apply
+ // "where idvisit > $lastId" in the query and "order by idvisit ASC"
+ $idField = $this->getIdFieldForLogTable($logTable);
+ $bindFunction = function ($bind, $lastId) {
+ return array_merge(array($lastId), $bind);
+ };
}
+
+ list($query, $bind) = $this->createLogIterationQuery($logTable, $idField, $fields, $conditions, $iterationStep);
+
do {
- $rows = $db->fetchAll($query, array_merge(array($lastId), $bind));
+ $rows = Db::fetchAll($query, call_user_func($bindFunction, $bind, $lastId));
if (!empty($rows)) {
- $lastId = $rows[count($rows) - 1][$idField];
-
+ if ($idField) {
+ $lastId = $rows[count($rows) - 1][$idField];
+ }
$callback($rows);
}
} while (count($rows) == $iterationStep);
@@ -255,23 +273,34 @@ class RawLogDao
{
$bind = array();
- $sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE $idField > ?";
+ $sql = "SELECT " . implode(', ', $fields) . " FROM `" . Common::prefixTable($logTable) . "` WHERE ";
+
+ $parts = array();
+
+ if ($idField) {
+ $parts[] = "$idField > ?";
+ }
foreach ($conditions as $condition) {
list($column, $operator, $value) = $condition;
if (is_array($value)) {
- $sql .= " AND $column IN (" . Common::getSqlStringFieldsArray($value) . ")";
+ $parts[] = "$column IN (" . Common::getSqlStringFieldsArray($value) . ")";
$bind = array_merge($bind, $value);
} else {
- $sql .= " AND $column $operator ?";
+ $parts[]= "$column $operator ?";
$bind[] = $value;
}
}
+ $sql .= implode(' AND ', $parts);
+
+ if ($idField) {
+ $sql .= " ORDER BY $idField ASC";
+ }
- $sql .= " ORDER BY $idField ASC LIMIT " . (int)$iterationStep;
+ $sql .= " LIMIT " . (int)$iterationStep;
return array($sql, $bind);
}
diff --git a/core/LogDeleter.php b/core/LogDeleter.php
index 4fa9c1d030..e97416e446 100644
--- a/core/LogDeleter.php
+++ b/core/LogDeleter.php
@@ -70,7 +70,7 @@ class LogDeleter
* @param callable $afterChunkDeleted Callback executed after every chunk of visits are deleted.
* @return int The number of visits deleted.
*/
- public function deleteVisitsFor($startDatetime, $endDatetime, $idSite = null, $iterationStep = 1000, $afterChunkDeleted = null)
+ public function deleteVisitsFor($startDatetime, $endDatetime, $idSite = null, $iterationStep = 2000, $afterChunkDeleted = null)
{
$fields = array('idvisit');
$conditions = array();
@@ -102,7 +102,7 @@ class LogDeleter
if (!empty($afterChunkDeleted)) {
$afterChunkDeleted($logsDeleted);
}
- }, $useReader = true);
+ }, $willDelete = true);
return $logsDeleted;
}
diff --git a/plugins/UserCountry/VisitorGeolocator.php b/plugins/UserCountry/VisitorGeolocator.php
index 3aa52bd3f2..5f992af396 100644
--- a/plugins/UserCountry/VisitorGeolocator.php
+++ b/plugins/UserCountry/VisitorGeolocator.php
@@ -265,7 +265,7 @@ class VisitorGeolocator
$onLogProcessed($row, $updatedValues);
}
}
- });
+ }, $willDelete = false);
}
/**