Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/PrivacyManager/Dao/LogDataAnonymizer.php')
-rw-r--r--plugins/PrivacyManager/Dao/LogDataAnonymizer.php274
1 files changed, 274 insertions, 0 deletions
diff --git a/plugins/PrivacyManager/Dao/LogDataAnonymizer.php b/plugins/PrivacyManager/Dao/LogDataAnonymizer.php
new file mode 100644
index 0000000000..d4d7319acc
--- /dev/null
+++ b/plugins/PrivacyManager/Dao/LogDataAnonymizer.php
@@ -0,0 +1,274 @@
+<?php
+/**
+ * Matomo - free/libre analytics platform
+ *
+ * @link https://matomo.org
+ * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
+ *
+ */
+namespace Piwik\Plugins\PrivacyManager\Dao;
+
+use Piwik\Common;
+use Piwik\Db;
+use Piwik\DbHelper;
+use Piwik\Network\IP;
+use Piwik\Plugins\PrivacyManager\Config;
+use Piwik\Plugins\PrivacyManager\IPAnonymizer;
+use Piwik\Plugins\PrivacyManager\Tracker\RequestProcessor;
+use Piwik\Plugins\UserCountry\LocationProvider;
+use Piwik\Plugins\UserCountry\VisitorGeolocator;
+use Piwik\SettingsPiwik;
+use Piwik\Tracker\Model;
+use Exception;
+
+class LogDataAnonymizer
+{
+ const NUM_ROWS_UPDATE_AT_ONCE = 10000;
+ protected $COLUMNS_BLACKLISTED = array('idvisit', 'idvisitor', 'idsite', 'visit_last_action_time', 'config_id', 'location_ip', 'idlink_va', 'server_time', 'idgoal', 'buster', 'idorder');
+
+ /**
+ * @var string
+ */
+ private $logVisitTable;
+
+ public function __construct()
+ {
+ $this->logVisitTable = Common::prefixTable('log_visit');
+ }
+
+ public function anonymizeVisitInformation($idSites, $startDate, $endDate, $anonymizeIp, $anonimizeLocation, $anonymizeUserId)
+ {
+ if (!$anonymizeIp && !$anonimizeLocation && !$anonymizeUserId) {
+ return 0; // nothing to do
+ }
+
+ if (empty($idSites)) {
+ $idSites = $this->getAllIdSitesString($this->logVisitTable);
+ } else {
+ $idSites = array_map('intval', $idSites);
+ }
+
+ if (empty($idSites)) {
+ return 0; // no visit tracked yet, the idsite in() would otherwise fail
+ }
+
+ $idSites = implode(', ', $idSites);
+
+ $numVisitsToUpdate = $this->getNumVisitsInTimeRange($idSites, $startDate, $endDate);
+
+ if (empty($numVisitsToUpdate)) {
+ return 0;
+ }
+
+ $privacyConfig = new Config();
+ $minimumIpAddressMaskLength = 2;
+ $ipMask = max($minimumIpAddressMaskLength, $privacyConfig->ipAddressMaskLength);
+
+ $numRecordsUpdated = 0;
+ $trackerModel = new Model();
+ $geolocator = new VisitorGeolocator();
+
+ for ($i = 0; $i < $numVisitsToUpdate; $i = $i + self::NUM_ROWS_UPDATE_AT_ONCE) {
+ $offset = $i;
+ $limit = self::NUM_ROWS_UPDATE_AT_ONCE;
+ if (($offset + $limit) > $numVisitsToUpdate) {
+ $limit = $numVisitsToUpdate % $limit;
+ }
+
+ $sql = sprintf('SELECT idsite, idvisit, location_ip, user_id, location_longitude, location_latitude, location_city, location_region, location_country FROM %s WHERE idsite in (%s) and visit_last_action_time >= ? and visit_last_action_time <= ? ORDER BY idsite, visit_last_action_time, idvisit LIMIT %d OFFSET %d', $this->logVisitTable, $idSites, $limit, $offset);
+ $rows = Db::query($sql, array($startDate, $endDate))->fetchAll();
+
+ foreach ($rows as $row) {
+ $ipObject = IP::fromBinaryIP($row['location_ip']);
+ $ipString = $ipObject->toString();
+ $ipAnonymized = IPAnonymizer::applyIPMask($ipObject, $ipMask);
+ $update = array();
+
+ if ($anonymizeIp) {
+ if ($ipString !== $ipAnonymized->toString()) {
+ // needs updating
+ $update['location_ip'] = $ipAnonymized->toBinary();
+ }
+ }
+
+ if ($anonymizeUserId && isset($row['user_id']) && $row['user_id'] !== false && $row['user_id'] !== '') {
+ $update['user_id'] = RequestProcessor::anonymizeUserId($row['user_id']);
+ }
+
+ if ($anonimizeLocation) {
+ $location = $geolocator->getLocation(array('ip' => $ipAnonymized->toString()));
+
+ $keys = array(
+ 'location_longitude' => LocationProvider::LONGITUDE_KEY,
+ 'location_latitude' => LocationProvider::LATITUDE_KEY,
+ 'location_city' => LocationProvider::CITY_NAME_KEY,
+ 'location_region' => LocationProvider::REGION_CODE_KEY,
+ 'location_country' => LocationProvider::COUNTRY_CODE_KEY,
+ );
+
+ foreach ($keys as $name => $val) {
+ $newLocationData = null;
+ if (isset($location[$val]) && $location[$val] !== false) {
+ $newLocationData = $location[$val];
+ }
+ if ($newLocationData !== $row[$name]) {
+ $update[$name] = $newLocationData;
+ }
+ }
+ }
+ if (!empty($update)) {
+ $trackerModel->updateVisit($row['idsite'], $row['idvisit'], $update);
+ $numRecordsUpdated++;
+ }
+ }
+ unset($rows);
+ }
+
+ return $numRecordsUpdated;
+ }
+
+ public function unsetLogVisitTableColumns($idSites, $startDate, $endDate, $columns)
+ {
+ return $this->unsetLogTableColumns('log_visit', 'visit_last_action_time', $idSites, $startDate, $endDate, $columns);
+ }
+
+ public function unsetLogConversionTableColumns($idSites, $startDate, $endDate, $visitColumns)
+ {
+ $columnsToUnset = array();
+
+ $table = 'log_conversion';
+ $logTableFields = $this->getAvailableColumnsWithDefaultValue(Common::prefixTable($table));
+ foreach ($visitColumns as $column) {
+ // we do not fail if a specified column does not exist here as this is applied to visit columns
+ // and some visit columns may not exist in log_conversion. We do not want to fail in this case
+ if (array_key_exists($column, $logTableFields)) {
+ $columnsToUnset[] = $column;
+ }
+ }
+
+ return $this->unsetLogTableColumns($table, 'server_time', $idSites, $startDate, $endDate, $columnsToUnset);
+ }
+
+ public function unsetLogLinkVisitActionColumns($idSites, $startDate, $endDate, $columns)
+ {
+ return $this->unsetLogTableColumns('log_link_visit_action', 'server_time', $idSites, $startDate, $endDate, $columns);
+ }
+
+ public function checkAllVisitColumns($visitColumns)
+ {
+ $this->areAllColumnsValid('log_visit', $visitColumns);
+ return null;
+ }
+
+ public function checkAllLinkVisitActionColumns($linkVisitActionColumns)
+ {
+ $this->areAllColumnsValid('log_link_visit_action', $linkVisitActionColumns);
+ return null;
+ }
+
+ public function getAvailableVisitColumnsToAnonymize()
+ {
+ return $this->getAvailableColumnsWithDefaultValue(Common::prefixTable('log_visit'));
+ }
+
+ public function getAvailableLinkVisitActionColumnsToAnonymize()
+ {
+ return $this->getAvailableColumnsWithDefaultValue(Common::prefixTable('log_link_visit_action'));
+ }
+
+ private function areAllColumnsValid($table, $columns)
+ {
+ if (empty($columns)) {
+ return;
+ }
+
+ $table = Common::prefixTable($table);
+ $logTableFields = $this->getAvailableColumnsWithDefaultValue($table);
+
+ foreach ($columns as $column) {
+ if (!array_key_exists($column, $logTableFields)) {
+ throw new Exception(sprintf('The column "%s" seems to not exist in %s or cannot be unset. Use one of %s', $column, $table, implode(', ', array_keys($logTableFields))));
+ }
+ }
+ }
+
+ private function unsetLogTableColumns($table, $dateColumn, $idSites, $startDate, $endDate, $columns)
+ {
+ if (empty($columns)) {
+ return 0;
+ }
+
+ $table = Common::prefixTable($table);
+
+ if (empty($idSites)) {
+ $idSites = $this->getAllIdSitesString($table);
+ } else {
+ $idSites = array_map('intval', $idSites);
+ }
+
+ if (empty($idSites)) {
+ return 0; // no visit tracked yet, the idsite in() would otherwise fail
+ }
+
+ $idSites = implode(', ', $idSites);
+
+ $logTableFields = $this->getAvailableColumnsWithDefaultValue($table);
+
+ $col = [];
+ $bind = [];
+ foreach ($columns as $column) {
+ if (!array_key_exists($column, $logTableFields)) {
+ throw new Exception(sprintf('The column "%s" cannot be unset because it has no default value or it does not exist in "%s". Use one of %s', $column, $table, implode(', ', array_keys($logTableFields))));
+ }
+ $col[] = $column . ' = ?';
+ $bind[] = $logTableFields[$column];
+ }
+ $col = implode(',', $col);
+ $bind[] = $startDate;
+ $bind[] = $endDate;
+
+ $sql = sprintf('UPDATE %s SET %s WHERE idsite in (%s) and %s >= ? and %s <= ?', $table, $col, $idSites, $dateColumn, $dateColumn);
+ return Db::query($sql, $bind)->rowCount();
+ }
+
+ private function getNumVisitsInTimeRange($idSites, $startDate, $endDate)
+ {
+ $sql = sprintf('SELECT count(*) FROM %s WHERE idsite in (%s) and visit_last_action_time >= ? and visit_last_action_time <= ?', $this->logVisitTable, $idSites);
+ $numVisits = Db::query($sql, array($startDate, $endDate))->fetchColumn();
+
+ return $numVisits;
+ }
+
+ private function getAvailableColumnsWithDefaultValue($table)
+ {
+ $columns = DbHelper::getTableColumns($table);
+ $values = array();
+ foreach ($columns as $column => $config) {
+ $hasDefaultKey = array_key_exists('Default', $config);
+
+ if (in_array($column, $this->COLUMNS_BLACKLISTED, true)) {
+ continue;
+ } elseif (strtoupper($config['Null']) === 'NO' && $hasDefaultKey && $config['Default'] === null) {
+ // we cannot unset this column as it may result in an error or random data
+ continue;
+ } elseif ($hasDefaultKey) {
+ $values[$column] = $config['Default'];
+ } elseif (strtoupper($config['Null']) === 'YES') {
+ $values[$column] = null;
+ }
+ }
+ return $values;
+ }
+
+ private function getAllIdSitesString($table)
+ {
+ // we need the idSites in order to use the index
+ $sites = Db::query(sprintf('SELECT DISTINCT idsite FROM %s', $table))->fetchAll();
+ $idSites = array();
+ foreach ($sites as $site) {
+ $idSites[] = (int) $site['idsite'];
+ }
+ return $idSites;
+ }
+
+}