Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'core/Segment.php')
-rw-r--r--core/Segment.php124
1 files changed, 116 insertions, 8 deletions
diff --git a/core/Segment.php b/core/Segment.php
index 8b46e4a811..8911d06247 100644
--- a/core/Segment.php
+++ b/core/Segment.php
@@ -1,6 +1,6 @@
<?php
/**
- * Piwik - free/libre analytics platform
+ * Matomo - free/libre analytics platform
*
* @link https://matomo.org
* @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
@@ -76,6 +76,16 @@ class Segment
protected $idSites = null;
/**
+ * @var Date
+ */
+ protected $startDate = null;
+
+ /**
+ * @var Date
+ */
+ protected $endDate = null;
+
+ /**
* @var LogQueryBuilder
*/
private $segmentQueryBuilder;
@@ -93,12 +103,20 @@ class Segment
/**
* Constructor.
*
+ * When using segments that contain a != or !@ condition on a non visit dimension (e.g. action, conversion, ...) it
+ * is needed to use a subquery to get correct results. To avoid subqueries that fetch too many data it's required to
+ * set a startDate and/or an endDate in this case. That date will be used to limit the subquery (along with possibly
+ * given idSites). If no startDate and endDate is given for such a segment it will generate a query that directly
+ * joins the according tables, but trigger a php warning as results might be incorrect.
+ *
* @param string $segmentCondition The segment condition, eg, `'browserCode=ff;countryCode=CA'`.
* @param array $idSites The list of sites the segment will be used with. Some segments are
* dependent on the site, such as goal segments.
+ * @param Date|null $startDate start date used to limit subqueries
+ * @param Date|null $endDate end date used to limit subqueries
* @throws
*/
- public function __construct($segmentCondition, $idSites)
+ public function __construct($segmentCondition, $idSites, Date $startDate = null, Date $endDate = null)
{
$this->segmentQueryBuilder = StaticContainer::get('Piwik\DataAccess\LogQueryBuilder');
@@ -111,6 +129,14 @@ class Segment
$this->originalString = $segmentCondition;
+ if ($startDate instanceof Date) {
+ $this->startDate = $startDate;
+ }
+
+ if ($endDate instanceof Date) {
+ $this->endDate = $endDate;
+ }
+
// The segment expression can be urlencoded. Unfortunately, both the encoded and decoded versions
// can usually be parsed successfully. To pick the right one, we try both and pick the one w/ more
// successfully parsed subexpressions.
@@ -195,6 +221,12 @@ class Segment
$string = substr($string, 0, self::SEGMENT_TRUNCATE_LIMIT);
$this->string = $string;
+
+ if (empty($idSites)) {
+ $idSites = [];
+ } else if (!is_array($idSites)) {
+ $idSites = [$idSites];
+ }
$this->idSites = $idSites;
$segment = new SegmentExpression($string);
$this->segmentExpression = $segment;
@@ -209,8 +241,7 @@ class Segment
$cleanedExpressions = array();
foreach ($expressions as $expression) {
$operand = $expression[SegmentExpression::INDEX_OPERAND];
- $cleanedExpression = $this->getCleanedExpression($operand);
- $expression[SegmentExpression::INDEX_OPERAND] = $cleanedExpression;
+ $expression[SegmentExpression::INDEX_OPERAND] = $this->getCleanedExpression($operand);
$cleanedExpressions[] = $expression;
}
@@ -226,7 +257,8 @@ class Segment
$availableSegment = $this->getSegmentByName($name);
- if (!empty($availableSegment['unionOfSegments'])) {
+ // We leave segments using !@ and != operands untouched for segments not on log_visit table as they will be build using a subquery
+ if (!$this->doesSegmentNeedSubquery($operand[SegmentExpression::INDEX_OPERAND_OPERATOR], $name) && !empty($availableSegment['unionOfSegments'])) {
$count = 0;
foreach ($availableSegment['unionOfSegments'] as $segmentNameOfUnion) {
$count++;
@@ -252,6 +284,51 @@ class Segment
return $expressionsWithUnions;
}
+ private function isVisitSegment($name)
+ {
+ $availableSegment = $this->getSegmentByName($name);
+
+ if (!empty($availableSegment['unionOfSegments'])) {
+ foreach ($availableSegment['unionOfSegments'] as $segmentNameOfUnion) {
+ $unionSegment = $this->getSegmentByName($segmentNameOfUnion);
+ if (strpos($unionSegment['sqlSegment'], 'log_visit.') === 0) {
+ return true;
+ }
+ }
+ } else if (strpos($availableSegment['sqlSegment'], 'log_visit.') === 0) {
+ return true;
+ }
+
+ return false;
+ }
+
+ private function doesSegmentNeedSubquery($operator, $segmentName)
+ {
+ $requiresSubQuery = in_array($operator, [
+ SegmentExpression::MATCH_DOES_NOT_CONTAIN,
+ SegmentExpression::MATCH_NOT_EQUAL
+ ]) && !$this->isVisitSegment($segmentName);
+
+ if ($requiresSubQuery && empty($this->startDate) && empty($this->endDate)) {
+ $e = new Exception();
+ Log::warning("Avoiding segment subquery due to missing start date and/or an end date. Please ensure a start date and/or end date is set when initializing a segment if it's used to build a query. Stacktrace:\n" . $e->getTraceAsString());
+ return false;
+ }
+
+ return $requiresSubQuery;
+ }
+
+ private function getInvertedOperatorForSubQuery($operator)
+ {
+ if ($operator === SegmentExpression::MATCH_DOES_NOT_CONTAIN) {
+ return SegmentExpression::MATCH_CONTAINS;
+ } else if ($operator === SegmentExpression::MATCH_NOT_EQUAL) {
+ return SegmentExpression::MATCH_EQUAL;
+ }
+
+ throw new Exception("Operator not support for subqueries");
+ }
+
/**
* Returns `true` if the segment is empty, `false` if otherwise.
*/
@@ -277,9 +354,6 @@ class Segment
}
$idSites = $this->idSites;
- if (!is_array($idSites)) {
- $idSites = array($this->idSites);
- }
return Rules::isRequestAuthorizedToArchive()
|| Rules::isBrowserArchivingAvailableForSegments()
@@ -297,6 +371,40 @@ class Segment
$segment = $this->getSegmentByName($name);
$sqlName = $segment['sqlSegment'];
+ // Build subqueries for segments that are not on log_visit table but use !@ or != as operator
+ // This is required to ensure segments like actionUrl!@value really do not include any visit having an action containing `value`
+ if ($this->doesSegmentNeedSubquery($matchType, $name)) {
+ $operator = $this->getInvertedOperatorForSubQuery($matchType);
+ $stringSegment = $name . $operator . $value;
+ $segmentObj = new Segment($stringSegment, $this->idSites, $this->startDate, $this->endDate);
+
+ $select = 'log_visit.idvisit';
+ $from = 'log_visit';
+ $datetimeField = 'visit_last_action_time';
+ $where = [];
+ $bind = [];
+ if (!empty($this->idSites)) {
+ $where[] = "$from.idsite IN (" . Common::getSqlStringFieldsArray($this->idSites) . ")";
+ $bind = $this->idSites;
+ }
+ if ($this->startDate instanceof Date) {
+ $where[] = "$from.$datetimeField >= ?";
+ $bind[] = $this->startDate->toString(Date::DATE_TIME_FORMAT);
+ }
+ if ($this->endDate instanceof Date) {
+ $where[] = "$from.$datetimeField <= ?";
+ $bind[] = $this->endDate->toString(Date::DATE_TIME_FORMAT);
+ }
+
+ $logQueryBuilder = StaticContainer::get('Piwik\DataAccess\LogQueryBuilder');
+ $forceGroupByBackup = $logQueryBuilder->getForcedInnerGroupBySubselect();
+ $logQueryBuilder->forceInnerGroupBySubselect(LogQueryBuilder::FORCE_INNER_GROUP_BY_NO_SUBSELECT);
+ $query = $segmentObj->getSelectQuery($select, $from, implode(' AND ', $where), $bind);
+ $logQueryBuilder->forceInnerGroupBySubselect($forceGroupByBackup);
+
+ return ['log_visit.idvisit', SegmentExpression::MATCH_ACTIONS_NOT_CONTAINS, $query];
+ }
+
if ($matchType != SegmentExpression::MATCH_IS_NOT_NULL_NOR_EMPTY
&& $matchType != SegmentExpression::MATCH_IS_NULL_OR_EMPTY) {