Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Giehl <stefan@matomo.org>2022-04-12 11:52:24 +0300
committerGitHub <noreply@github.com>2022-04-12 11:52:24 +0300
commitb24d41ff0d6a863dc9c7ba8f0e4ba27d206ca092 (patch)
tree1f48e7bd541f57749ca009767265dddd3718fc23
parentb12ef7e8fa0bc4f693244f4110c64564613f380c (diff)
Prevent possible errors when exporting datasets in GDPR tool (#19056)
* Only use dimensions for formatting if they don't define a special sql segment * apply PSR12 code formatting
-rw-r--r--plugins/PrivacyManager/Model/DataSubjects.php129
1 files changed, 79 insertions, 50 deletions
diff --git a/plugins/PrivacyManager/Model/DataSubjects.php b/plugins/PrivacyManager/Model/DataSubjects.php
index 3c63448a90..56c29591e7 100644
--- a/plugins/PrivacyManager/Model/DataSubjects.php
+++ b/plugins/PrivacyManager/Model/DataSubjects.php
@@ -1,4 +1,5 @@
<?php
+
/**
* Matomo - free/libre analytics platform
*
@@ -6,6 +7,7 @@
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
*
*/
+
namespace Piwik\Plugins\PrivacyManager\Model;
use Piwik\Columns\Dimension;
@@ -21,6 +23,7 @@ use Piwik\Plugin\LogTablesProvider;
use Piwik\Site;
use Piwik\Tracker\LogTable;
use Piwik\Tracker\PageUrl;
+use Psr\Log\LoggerInterface;
class DataSubjects
{
@@ -49,7 +52,7 @@ class DataSubjects
public function deleteDataSubjectsForDeletedSites($allExistingIdSites)
{
if (empty($allExistingIdSites)) {
- return array();
+ return [];
}
$allExistingIdSites = array_map('intval', $allExistingIdSites);
@@ -66,15 +69,15 @@ class DataSubjects
if (empty($idSitesNoLongerExisting)) {
// nothing to be deleted... if there is no entry for that table in log_visit or log_link_visit_action
// then there shouldn't be anything to be deleted in other tables either
- return array();
+ return [];
}
$logTables = $this->getLogTablesToDeleteFrom();
// It's quicker to call the delete queries one site at a time instead of using the IN operator and potentially
// creating a huge result set
foreach ($idSitesNoLongerExisting as $idSiteNoLongerExisting) {
- $r = $this->deleteLogDataFrom($logTables, function($tableToSelectFrom) use ($idSiteNoLongerExisting) {
- return [$tableToSelectFrom . '.idsite = '. $idSiteNoLongerExisting, []];
+ $r = $this->deleteLogDataFrom($logTables, function ($tableToSelectFrom) use ($idSiteNoLongerExisting) {
+ return [$tableToSelectFrom . '.idsite = ' . $idSiteNoLongerExisting, []];
});
foreach ($r as $k => $v) {
if (!array_key_exists($k, $results)) {
@@ -91,16 +94,16 @@ class DataSubjects
public function deleteDataSubjects($visits)
{
if (empty($visits)) {
- return array();
+ return [];
}
- $results = array();
+ $results = [];
/**
* Lets you delete data subjects to make your plugin GDPR compliant.
* This can be useful if you have developed a plugin which stores any data for visits but doesn't
* use any core logic to store this data. If core API's are used, for example log tables, then the data may
- * be deleted automatically.
+ * be deleted automatically.
*
* **Example**
*
@@ -114,7 +117,7 @@ class DataSubjects
* @param array &$visits An array with multiple visit entries containing an idvisit and idsite each. The data
* for these visits is requested to be deleted.
*/
- Piwik::postEvent('PrivacyManager.deleteDataSubjects', array(&$results, $visits));
+ Piwik::postEvent('PrivacyManager.deleteDataSubjects', [&$results, $visits]);
$datesToInvalidateByIdSite = $this->getDatesToInvalidate($visits);
@@ -141,16 +144,16 @@ class DataSubjects
private function getDatesToInvalidate($visits)
{
- $idVisitsByIdSites = array();
+ $idVisitsByIdSites = [];
foreach ($visits as $visit) {
$idSite = (int)$visit['idsite'];
if (!isset($idVisitsByIdSites[$idSite])) {
- $idVisitsByIdSites[$idSite] = array();
+ $idVisitsByIdSites[$idSite] = [];
}
$idVisitsByIdSites[$idSite][] = (int)$visit['idvisit'];
}
- $datesToInvalidate = array();
+ $datesToInvalidate = [];
foreach ($idVisitsByIdSites as $idSite => $idVisits) {
$timezone = Site::getTimezoneFor($idSite);
@@ -159,7 +162,7 @@ class DataSubjects
. ' AND idvisit IN (' . implode(',', $idVisits) . ')';
$resultSet = Db::fetchAll($sql);
- $dates = array();
+ $dates = [];
foreach ($resultSet as $row) {
$date = Date::factory($row['visit_last_action_time'], $timezone);
$dates[$date->toString('Y-m-d')] = 1;
@@ -194,14 +197,14 @@ class DataSubjects
foreach ($logTables as $logTable) {
$logTableName = $logTable->getName();
- $from = array($logTableName);
+ $from = [$logTableName];
$tableToSelect = $this->findNeededTables($logTable, $from);
if (!$tableToSelect) {
throw new \Exception('Cannot join table ' . $logTable->getName());
}
- list($where, $bind) = $generateWhere($tableToSelect);
+ [$where, $bind] = $generateWhere($tableToSelect);
$sql = "DELETE $logTableName FROM " . $this->makeFromStatement($from) . " WHERE $where";
@@ -229,11 +232,11 @@ class DataSubjects
$bName = $b->getName();
if ($bName === 'log_visit') {
return -1;
- } else if ($aName === 'log_visit') {
+ } elseif ($aName === 'log_visit') {
return 1;
- } else if ($bName === 'log_link_visit_action') {
+ } elseif ($bName === 'log_link_visit_action') {
return -1;
- } else if ($aName === 'log_link_visit_action') {
+ } elseif ($aName === 'log_link_visit_action') {
return 1;
}
@@ -267,7 +270,7 @@ class DataSubjects
public function exportDataSubjects($visits)
{
if (empty($visits)) {
- return array();
+ return [];
}
$logTables = $this->logTablesProvider->getAllLogTables();
@@ -277,7 +280,7 @@ class DataSubjects
$dimensions = Dimension::getAllDimensions();
- $results = array();
+ $results = [];
foreach ($logTables as $logTable) {
$logTableName = $logTable->getName();
@@ -285,7 +288,7 @@ class DataSubjects
continue; // we export these entries further below
}
- $from = array($logTableName);
+ $from = [$logTableName];
$tableToSelect = $this->findNeededTables($logTable, $from);
if (!$tableToSelect) {
@@ -294,17 +297,21 @@ class DataSubjects
continue;
}
- list($where, $bind) = $this->visitsToWhereAndBind($tableToSelect, $visits);
+ [$where, $bind] = $this->visitsToWhereAndBind($tableToSelect, $visits);
- $select = array();
+ $select = [];
$cols = DbHelper::getTableColumns(Common::prefixTable($logTableName));
ksort($cols); // make sure test results will be always in same order
- $binaryFields = array();
- $dimensionPerCol = array();
+ $binaryFields = [];
+ $dimensionPerCol = [];
foreach ($cols as $col => $config) {
foreach ($dimensions as $dimension) {
- if ($dimension->getDbTableName() === $logTableName && $dimension->getColumnName() === $col) {
+ if (
+ $dimension->getDbTableName() === $logTableName
+ && $dimension->getColumnName() === $col
+ && $dimension->getSqlSegment() === $logTableName . '.' . $col
+ ) {
if ($dimension->getType() === Dimension::TYPE_BINARY) {
$binaryFields[] = $col;
}
@@ -328,7 +335,7 @@ class DataSubjects
$idFields = $logTable->getIdColumn();
if (!empty($idFields)) {
if (!is_array($idFields)) {
- $idFields = array($idFields);
+ $idFields = [$idFields];
}
$sql .= ' ORDER BY ';
foreach ($idFields as $field) {
@@ -348,8 +355,27 @@ class DataSubjects
}
foreach ($result[$index] as $rowColumn => $rowValue) {
if (isset($dimensionPerCol[$rowColumn])) {
- $result[$index][$rowColumn] = $dimensionPerCol[$rowColumn]->formatValue($rowValue, $result[$index]['idsite'], new Formatter());
- } else if (!empty($rowValue)) {
+ try {
+ $result[$index][$rowColumn] = $dimensionPerCol[$rowColumn]->formatValue(
+ $rowValue,
+ $result[$index]['idsite'],
+ new Formatter()
+ );
+ } catch (\Exception $e) {
+ // if formatting failes for some reason use the raw value
+ StaticContainer::get(LoggerInterface::class)->error(
+ 'Failed to format column {column} with dimension {dimension}: {exception}',
+ [
+ 'column' => $rowColumn,
+ 'dimension' => get_class($dimensionPerCol[$rowColumn]),
+ 'exception' => $e,
+ 'ignoreInScreenWriter' => true,
+ ]
+ );
+
+ $result[$index][$rowColumn] = $rowValue;
+ }
+ } elseif (!empty($rowValue)) {
// we try to auto detect uncompressed values so plugins have to do less themselves. makes it a bit slower but should be fine
$testValue = @gzuncompress($rowValue);
if ($testValue !== false) {
@@ -372,10 +398,10 @@ class DataSubjects
$dimensionLogTable = $this->logTablesProvider->getLogTable($dimensionTable);
if ($join && $join instanceof ActionNameJoin && $dimensionColumn && $dimensionTable && $dimensionLogTable && $dimensionLogTable->getColumnToJoinOnIdVisit()) {
- $from = array('log_action', array('table' => $dimensionTable, 'joinOn' => "log_action.idaction = `$dimensionTable`.`$dimensionColumn`"));
+ $from = ['log_action', ['table' => $dimensionTable, 'joinOn' => "log_action.idaction = `$dimensionTable`.`$dimensionColumn`"]];
$tableToSelect = $this->findNeededTables($dimensionLogTable, $from);
- list($where, $bind) = $this->visitsToWhereAndBind($tableToSelect, $visits);
+ [$where, $bind] = $this->visitsToWhereAndBind($tableToSelect, $visits);
$from = $this->makeFromStatement($from);
$sql = "SELECT log_action.idaction, log_action.name, log_action.url_prefix FROM $from WHERE $where";
@@ -394,7 +420,7 @@ class DataSubjects
usort($result, function ($a1, $a2) {
return $a1['idaction'] > $a2['idaction'] ? 1 : -1;
});
- $results['log_action_' . $dimensionTable.'_' . $dimensionColumn] = $result;
+ $results['log_action_' . $dimensionTable . '_' . $dimensionColumn] = $result;
}
}
}
@@ -419,7 +445,7 @@ class DataSubjects
* @param array &$visits An array with multiple visit entries containing an idvisit and idsite each. The data
* for these visits is requested to be exported.
*/
- Piwik::postEvent('PrivacyManager.exportDataSubjects', array(&$results, $visits));
+ Piwik::postEvent('PrivacyManager.exportDataSubjects', [&$results, $visits]);
krsort($results); // make sure test results are always in same order
@@ -433,12 +459,12 @@ class DataSubjects
if ($logTable->getColumnToJoinOnIdVisit()) {
$tableToSelect = 'log_visit';
if ($logTableName !== 'log_visit') {
- $from[] = array('table' => 'log_visit', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdVisit(), 'log_visit', 'idvisit'));
+ $from[] = ['table' => 'log_visit', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdVisit(), 'log_visit', 'idvisit')];
}
} elseif ($logTable->getColumnToJoinOnIdAction()) {
$tableToSelect = 'log_link_visit_action';
if ($logTableName !== 'log_link_visit_action') {
- $from[] = array('table' => 'log_link_visit_action', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdAction(), 'log_link_visit_action', 'idaction_url'));
+ $from[] = ['table' => 'log_link_visit_action', 'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $logTable->getColumnToJoinOnIdAction(), 'log_link_visit_action', 'idaction_url')];
}
} else {
$tableToSelect = $this->joinNonCoreTable($logTable, $from);
@@ -463,15 +489,20 @@ class DataSubjects
private function visitsToWhereAndBind($tableToSelect, $visits)
{
- $where = array();
- $bind = array();
- $in = array();
+ $where = [];
+ $bind = [];
+ $in = [];
foreach ($visits as $visit) {
if (empty($visit['idsite'])) {
$in[] = (int) $visit['idvisit'];
} else {
- $where[] = sprintf('(%s.idsite = %d AND %s.idvisit = %d)',
- $tableToSelect, (int) $visit['idsite'], $tableToSelect, (int) $visit['idvisit']);
+ $where[] = sprintf(
+ '(%s.idsite = %d AND %s.idvisit = %d)',
+ $tableToSelect,
+ (int) $visit['idsite'],
+ $tableToSelect,
+ (int) $visit['idvisit']
+ );
}
}
$where = implode(' OR ', $where);
@@ -479,10 +510,10 @@ class DataSubjects
if (!empty($where)) {
$where .= ' OR ';
}
- $where .= $tableToSelect . '.idvisit in (' . implode(',',$in) . ')';
+ $where .= $tableToSelect . '.idvisit in (' . implode(',', $in) . ')';
}
- return array($where, $bind);
+ return [$where, $bind];
}
private function joinNonCoreTable(LogTable $logTable, &$from)
@@ -498,26 +529,26 @@ class DataSubjects
$joinTable = $this->logTablesProvider->getLogTable($tableName);
if ($joinTable->getColumnToJoinOnIdVisit()) {
- $from[] = array(
+ $from[] = [
'table' => $joinTable->getName(),
'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $joinColumn, $joinTable->getName(), $joinColumn)
- );
+ ];
if ($joinTable->getName() !== 'log_visit') {
- $from[] = array(
+ $from[] = [
'table' => 'log_visit',
'joinOn' => sprintf('%s.%s = %s.%s', $joinTable->getName(), $joinTable->getColumnToJoinOnIdVisit(), 'log_visit', $joinTable->getColumnToJoinOnIdVisit())
- );
+ ];
}
$tableToSelect = 'log_visit';
return $tableToSelect;
} else {
- $subFroms = array();
+ $subFroms = [];
$tableToSelect = $this->joinNonCoreTable($joinTable, $subFroms);
if ($tableToSelect) {
- $from[] = array(
+ $from[] = [
'table' => $joinTable->getName(),
'joinOn' => sprintf('%s.%s = %s.%s', $logTableName, $joinColumn, $joinTable->getName(), $joinColumn)
- );
+ ];
foreach ($subFroms as $subFrom) {
$from[] = $subFrom;
}
@@ -525,7 +556,6 @@ class DataSubjects
}
}
}
-
}
/**
@@ -541,5 +571,4 @@ class DataSubjects
});
return $deleteCounts;
}
-
}