Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/matomo-org/matomo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBenaka Moorthi <benaka.moorthi@gmail.com>2013-05-31 11:36:25 +0400
committerBenaka Moorthi <benaka.moorthi@gmail.com>2013-05-31 11:36:25 +0400
commit6abbacb9575087161ade571de2b0ad373b03d115 (patch)
tree4da27e6b31ffc2f62e0cb0b7ee2cecfd15de2c22 /core/Archive.php
parentcbc929d0e6a10337c29c3459048611c23a2d2d65 (diff)
Refactored archive data querying code. Removed tree-like data structure from Piwik_Archive, added memory optimizations for archiving (not storing archive processing instances), added optimization for querying data indexed by site + date, added ability to select multiple blobs/metrics at once (from different plugins), added optimization to Piwik_ArchiveProcessing_Period where numerics/records are selected all at once instead of one at a time.
Also added data access layer object for archive querying code, allowed anonymous function objects to be used as DataTable filters, and removed 'timestamp' DataTable metadata.
Diffstat (limited to 'core/Archive.php')
-rw-r--r--core/Archive.php816
1 files changed, 670 insertions, 146 deletions
diff --git a/core/Archive.php b/core/Archive.php
index 5492c1b4a8..42690fb379 100644
--- a/core/Archive.php
+++ b/core/Archive.php
@@ -12,9 +12,14 @@
/**
* The archive object is used to query specific data for a day or a period of statistics for a given website.
*
+ * Limitations:
+ * - If you query w/ a range period, you can only query for ONE at a time.
+ * - If you query w/ a non-range period, you can query for multiple periods, but they must
+ * all be of the same type (ie, day, week, month, year).
+ *
* Example:
* <pre>
- * $archive = Piwik_Archive::build($idSite = 1, $period = 'week', '2008-03-08' );
+ * $archive = Piwik_Archive::build($idSite = 1, $period = 'week', '2008-03-08');
* $dataTable = $archive->getDataTable('Provider_hostnameExt');
* $dataTable->queueFilter('ReplaceColumnNames');
* return $dataTable;
@@ -22,17 +27,19 @@
*
* Example bis:
* <pre>
- * $archive = Piwik_Archive::build($idSite = 3, $period = 'day', $date = 'today' );
+ * $archive = Piwik_Archive::build($idSite = 3, $period = 'day', $date = 'today');
* $nbVisits = $archive->getNumeric('nb_visits');
* return $nbVisits;
* </pre>
*
* If the requested statistics are not yet processed, Archive uses ArchiveProcessing to archive the statistics.
- *
+ *
+ * TODO: create ticket for this: when building archives, should use each site's timezone (ONLY FOR 'now').
+ *
* @package Piwik
* @subpackage Piwik_Archive
*/
-abstract class Piwik_Archive
+class Piwik_Archive
{
/**
* When saving DataTables in the DB, we sometimes replace the columns name by these IDs so we save up lots of bytes
@@ -185,86 +192,230 @@ abstract class Piwik_Archive
const LABEL_ECOMMERCE_CART = 'ecommerceAbandonedCart';
const LABEL_ECOMMERCE_ORDER = 'ecommerceOrder';
-
+
/**
- * Website Piwik_Site
- *
- * @var Piwik_Site
+ * The list of site IDs to query archive data for.
+ *
+ * @var array
*/
- protected $site = null;
-
+ private $siteIds;
+
+ /**
+ * The list of Piwik_Period's to query archive data for.
+ *
+ * @var array
+ */
+ private $periods;
+
/**
- * Segment applied to the visits set
+ * Segment applied to the visits set.
+ *
* @var Piwik_Segment
*/
- protected $segment = false;
+ private $segment;
+
+ /**
+ * List of archive IDs for the sites, periods and segment we are querying with.
+ * Archive IDs are indexed by done flag and period, ie:
+ *
+ * array(
+ * 'done.Referers' => array(
+ * '2010-01-01' => 1,
+ * '2010-01-02' => 2,
+ * ),
+ * 'done.VisitsSummary' => array(
+ * '2010-01-01' => 3,
+ * '2010-01-02' => 4,
+ * ),
+ * )
+ *
+ * or,
+ *
+ * array(
+ * 'done.all' => array(
+ * '2010-01-01' => 1,
+ * '2010-01-02' => 2
+ * )
+ * )
+ *
+ * @var array
+ */
+ private $idarchives = array();
+
+ /**
+ * If set to true, the result of all get functions (ie, getNumeric, getBlob, etc.)
+ * will be indexed by the site ID, even if we're only querying data for one site.
+ *
+ * @var bool
+ */
+ private $forceIndexedBySite;
+
+ /**
+ * If set to true, the result of all get functions (ie, getNumeric, getBlob, etc.)
+ * will be indexed by the period, even if we're only querying data for one period.
+ *
+ * @var bool
+ */
+ private $forceIndexedByDate;
+
+ /**
+ * Data Access Layer object.
+ *
+ * @var Piwik_DataAccess_ArchiveQuery
+ */
+ private $dataAccess;
+
+ /**
+ * Cache of Piwik_ArchiveProcessing instances used when launching the archiving
+ * process.
+ *
+ * @var array
+ */
+ private $processingCache = array();
+
+ /**
+ * Constructor.
+ *
+ * @param array|int $siteIds List of site IDs to query data for.
+ * @param array|Piwik_Period $periods List of periods to query data for.
+ * @param Piwik_Segment $segment The segment used to narrow the visits set.
+ * @param bool $forceIndexedBySite Whether to force index the result of a query by site ID.
+ * @param bool $forceIndexedByDate Whether to force index the result of a query by period.
+ */
+ public function __construct($siteIds, $periods, Piwik_Segment $segment, $forceIndexedBySite = false,
+ $forceIndexedByDate = false)
+ {
+ $this->siteIds = $this->getAsNonEmptyArray($siteIds, 'siteIds');
+
+ $periods = $this->getAsNonEmptyArray($periods, 'periods');
+ $this->periods = array();
+ foreach ($periods as $period) {
+ $this->periods[$period->getRangeString()] = $period;
+ }
+
+ $this->segment = $segment;
+ $this->forceIndexedBySite = $forceIndexedBySite;
+ $this->forceIndexedByDate = $forceIndexedByDate;
+ $this->dataAccess = new Piwik_DataAccess_ArchiveQuery();
+ }
+
+ /**
+ * Destructor.
+ */
+ public function __destruct()
+ {
+ $this->periods = null;
+ $this->siteIds = null;
+ $this->segment = null;
+ $this->idarchives = array();
+ $this->processingCache = array();
+ }
+
+ /**
+ * Returns the IDs of sites we are querying archive data for.
+ *
+ * @return array
+ */
+ public function getSiteIds()
+ {
+ return $this->siteIds;
+ }
+
+ /**
+ * Returns the periods we are querying archive data for.
+ *
+ * @return array
+ */
+ public function getPeriods()
+ {
+ return $this->periods;
+ }
+
+ /**
+ * Returns the segment used to limit the visit set.
+ *
+ * @return Piwik_Segment|null
+ */
+ public function getSegment()
+ {
+ return $this->segment;
+ }
/**
- * Builds an Archive object or returns the same archive if previously built.
+ * Builds an Archive object using query parameter values.
*
- * @param int|string $idSite integer, or comma separated list of integer
- * @param string $period 'week' 'day' etc.
- * @param Piwik_Date|string $strDate 'YYYY-MM-DD' or magic keywords 'today' @see Piwik_Date::factory()
- * @param bool|string $segment Segment definition - defaults to false for Backward Compatibility
- * @param bool|string $_restrictSitesToLogin Used only when running as a scheduled task
+ * @param int|string $idSite Integer, or comma separated list of integer site IDs.
+ * @param string $period 'day', 'week', 'month', 'year' or 'range'
+ * @param Piwik_Date|string $strDate 'YYYY-MM-DD', magic keywords (ie, 'today'; @see Piwik_Date::factory())
+ * or date range (ie, 'YYYY-MM-DD,YYYY-MM-DD').
+ * @param false|string $segment Segment definition - defaults to false for backward compatibility.
+ * @param false|string $_restrictSitesToLogin Used only when running as a scheduled task.
* @return Piwik_Archive
*/
public static function build($idSite, $period, $strDate, $segment = false, $_restrictSitesToLogin = false)
{
- if ($idSite === 'all') {
- $sites = Piwik_SitesManager_API::getInstance()->getSitesIdWithAtLeastViewAccess($_restrictSitesToLogin);
- } else {
- $sites = Piwik_Site::getIdSitesFromIdSitesString($idSite);
- }
-
- if (!($segment instanceof Piwik_Segment)) {
- $segment = new Piwik_Segment($segment, $idSite);
+ $forceIndexedBySite = false;
+ $forceIndexedByDate = false;
+
+ // determine site IDs to query from
+ if (is_array($idSite)
+ || $idSite == 'all'
+ ) {
+ $forceIndexedBySite = true;
}
-
- // idSite=1,3 or idSite=all
- if ($idSite === 'all'
- || is_array($idSite)
- || count($sites) > 1
+ $sites = Piwik_Site::getIdSitesFromIdSitesString($idSite, $_restrictSitesToLogin);
+
+ // if a period date string is detected: either 'last30', 'previous10' or 'YYYY-MM-DD,YYYY-MM-DD'
+ if (is_string($strDate)
+ && self::isMultiplePeriod($strDate, $period)
) {
- $archive = new Piwik_Archive_Array_IndexedBySite($sites, $period, $strDate, $segment, $_restrictSitesToLogin);
- } // if a period date string is detected: either 'last30', 'previous10' or 'YYYY-MM-DD,YYYY-MM-DD'
- elseif (is_string($strDate) && self::isMultiplePeriod($strDate, $period)) {
- $oSite = new Piwik_Site($idSite);
- $archive = new Piwik_Archive_Array_IndexedByDate($oSite, $period, $strDate, $segment);
- } // case we request a single archive
- else {
- $oSite = new Piwik_Site($idSite);
+ $oPeriod = new Piwik_Period_Range($period, $strDate);
+ $allPeriods = $oPeriod->getSubperiods();
+ $forceIndexedByDate = true;
+ } else {
+ if (count($sites) == 1) {
+ $oSite = new Piwik_Site($sites[0]);
+ } else {
+ $oSite = null;
+ }
+
$oPeriod = Piwik_Archive::makePeriodFromQueryParams($oSite, $period, $strDate);
-
- $archive = new Piwik_Archive_Single();
- $archive->setPeriod($oPeriod);
- $archive->setSite($oSite);
- $archive->setSegment($segment);
+ $allPeriods = array($oPeriod);
}
- return $archive;
+
+ return new Piwik_Archive(
+ $sites, $allPeriods, new Piwik_Segment($segment, $sites), $forceIndexedBySite, $forceIndexedByDate);
}
/**
* Creates a period instance using a Piwik_Site instance and two strings describing
* the period & date.
*
- * @param Piwik_Site $site
+ * @param Piwik_Site|null $site
* @param string $strPeriod The period string: day, week, month, year, range
* @param string $strDate The date or date range string.
* @return Piwik_Period
*/
public static function makePeriodFromQueryParams($site, $strPeriod, $strDate)
{
- $tz = $site->getTimezone();
+ if ($site === null) {
+ $tz = 'UTC';
+ } else {
+ $tz = $site->getTimezone();
+ }
if ($strPeriod == 'range') {
$oPeriod = new Piwik_Period_Range('range', $strDate, $tz, Piwik_Date::factory('today', $tz));
} else {
$oDate = $strDate;
if (!($strDate instanceof Piwik_Date)) {
- if ($strDate == 'now' || $strDate == 'today') {
+ if ($strDate == 'now'
+ || $strDate == 'today'
+ ) {
$strDate = date('Y-m-d', Piwik_Date::factory('now', $tz)->getTimestamp());
- } elseif ($strDate == 'yesterday' || $strDate == 'yesterdaySameTime') {
+ } elseif ($strDate == 'yesterday'
+ || $strDate == 'yesterdaySameTime'
+ ) {
$strDate = date('Y-m-d', Piwik_Date::factory('now', $tz)->subDay(1)->getTimestamp());
}
$oDate = Piwik_Date::factory($strDate);
@@ -275,63 +426,197 @@ abstract class Piwik_Archive
return $oPeriod;
}
-
- abstract public function prepareArchive();
-
+
/**
- * Returns the value of the element $name from the current archive
+ * Returns the value of the element $name from the current archive
* The value to be returned is a numeric value and is stored in the archive_numeric_* tables
*
- * @param string $name For example Referers_distinctKeywords
- * @return float|int|false False if no value with the given name
+ * @param string|array $names One or more archive names, eg, 'nb_visits', 'Referers_distinctKeywords',
+ * etc.
+ * @return numeric|array|false False if no value with the given name, numeric if only one site
+ * and date and we're not forcing an index, and array if multiple
+ * sites/dates are queried.
*/
- abstract public function getNumeric($name);
-
+ public function getNumeric($names)
+ {
+ $data = $this->get($names, 'numeric');
+
+ $resultIndices = $this->getResultIndices();
+ $result = $data->getArray($resultIndices);
+
+ // if only one metric is returned, just return it as a numeric value
+ if (empty($resultIndices)
+ && count($result) <= 1
+ ) {
+ $result = (float)reset($result); // convert to float in case $result is empty
+ }
+
+ return $result;
+ }
+
/**
- * Returns the value of the element $name from the current archive
- *
- * The value to be returned is a blob value and is stored in the archive_numeric_* tables
- *
+ * Returns the value of the elements in $names from the current archive.
+ *
+ * The value to be returned is a blob value and is stored in the archive_blob_* tables.
+ *
* It can return anything from strings, to serialized PHP arrays or PHP objects, etc.
*
- * @param string $name For example Referers_distinctKeywords
- * @return mixed False if no value with the given name
+ * @param string|array $names One or more archive names, eg, 'Referers_keywordBySearchEngine'.
+ * @return string|array|false False if no value with the given name, numeric if only one site
+ * and date and we're not forcing an index, and array if multiple
+ * sites/dates are queried.
*/
- abstract public function getBlob($name);
-
+ public function getBlob($names, $idSubtable = null)
+ {
+ $data = $this->get($names, 'blob', $idSubtable);
+ return $data->getArray($this->getResultIndices());
+ }
+
/**
- *
- * @param $fields
- * @return Piwik_DataTable
+ * Returns the numeric values of the elements in $names as a DataTable.
+ *
+ * Note: Every DataTable instance returned will have at most one row in it.
+ *
+ * @param string|array $names One or more archive names, eg, 'nb_visits', 'Referers_distinctKeywords',
+ * etc.
+ * @return Piwik_DataTable|false False if no value with the given names. Based on the number
+ * of sites/periods, the result can be a DataTable_Array, which
+ * contains DataTable instances.
*/
- abstract public function getDataTableFromNumeric($fields);
+ public function getDataTableFromNumeric($names)
+ {
+ $data = $this->get($names, 'numeric');
+ return $data->getDataTable($this->getResultIndices());
+ }
/**
* This method will build a dataTable from the blob value $name in the current archive.
+ *
+ * For example $name = 'Referers_searchEngineByKeyword' will return a
+ * Piwik_DataTable containing all the keywords. If a $idSubtable is given, the method
+ * will return the subTable of $name. If 'all' is supplied for $idSubtable every subtable
+ * will be returned.
+ *
+ * @param string $name The name of the record to get.
+ * @param int|string|null $idSubtable The subtable ID (if any) or 'all' if requesting every datatable.
+ * @return Piwik_DataTable|false
+ */
+ public function getDataTable($name, $idSubtable = null)
+ {
+ $data = $this->get($name, 'blob', $idSubtable);
+ return $data->getDataTable($this->getResultIndices());
+ }
+
+ /**
+ * Same as getDataTable() except that it will also load in memory all the subtables
+ * for the DataTable $name. You can then access the subtables by using the
+ * Piwik_DataTable_Manager::getTable() function.
*
- * For example $name = 'Referers_searchEngineByKeyword' will return a Piwik_DataTable containing all the keywords
- * If a idSubTable is given, the method will return the subTable of $name
- *
- * @param string $name
- * @param int $idSubTable or null if requesting the parent table
+ * @param string $name The name of the record to get.
+ * @param int|string|null $idSubtable The subtable ID (if any) or 'all' if requesting every datatable.
+ * @param bool $addMetadataSubtableId Whether to add the DB subtable ID as metadata to each datatable,
+ * or not.
* @return Piwik_DataTable
- * @throws exception If the value cannot be found
*/
- abstract public function getDataTable($name, $idSubTable = null);
+ public function getDataTableExpanded($name, $idSubtable = null, $addMetadataSubtableId = true)
+ {
+ $data = $this->get($name, 'blob', 'all');
+ return $data->getExpandedDataTable($this->getResultIndices(), $idSubtable, $addMetadataSubtableId);
+ }
+
+ /**
+ * Returns true if we shouldn't launch the archiving process and false if we should.
+ *
+ * @return bool
+ */
+ public function isArchivingDisabled()
+ {
+ return Piwik_ArchiveProcessing::isArchivingDisabledFor($this->segment, $this->getPeriodLabel());
+ }
/**
- * Same as getDataTable() except that it will also load in memory
- * all the subtables for the DataTable $name.
- * You can then access the subtables by using the Piwik_DataTable_Manager getTable()
+ * Returns true if Segmentation is allowed for this user
*
- * @param string $name
- * @param int|null $idSubTable null if requesting the parent table
- * @return Piwik_DataTable
+ * @return bool
*/
- abstract public function getDataTableExpanded($name, $idSubTable = null);
+ public static function isSegmentationEnabled()
+ {
+ return !Piwik::isUserIsAnonymous()
+ || Piwik_Config::getInstance()->General['anonymous_user_enable_use_segments_API'];
+ }
+ /**
+ * Indicate if $dateString and $period correspond to multiple periods
+ *
+ * @static
+ * @param $dateString
+ * @param $period
+ * @return boolean
+ */
+ public static function isMultiplePeriod($dateString, $period)
+ {
+ return (preg_match('/^(last|previous){1}([0-9]*)$/D', $dateString, $regs)
+ || Piwik_Period_Range::parseDateRange($dateString))
+ && $period != 'range';
+ }
/**
+ * Indicate if $idSiteString corresponds to multiple sites.
+ *
+ * @param string $idSiteString
+ * @return bool
+ */
+ public static function isMultipleSites($idSiteString)
+ {
+ return $idSiteString == 'all' || strpos($idSiteString, ',') !== false;
+ }
+
+ /**
+ * Returns the report names for a list of metric/record names.
+ *
+ * @see getRequestedReport
+ *
+ * @param array $archiveNames
+ */
+ public function getRequestedReports($archiveNames)
+ {
+ $result = array();
+ foreach ($archiveNames as $name) {
+ $result[] = self::getRequestedReport($name);
+ }
+ return array_unique($result);
+ }
+
+ /**
+ * Returns the report name for a metric/record name.
+ *
+ * A report name has the following format: {$pluginName}_{$reportId}, eg. VisitFrequency_Metrics.
+ * The report ID is not used anywhere in Piwik.
+ */
+ public static function getRequestedReport($archiveName)
+ {
+ // Core metrics are always processed in Core, for the requested date/period/segment
+ if (in_array($archiveName, Piwik_ArchiveProcessing::getCoreMetrics())
+ || $archiveName == 'max_actions'
+ ) {
+ return 'VisitsSummary_CoreMetrics';
+ }
+ // VisitFrequency metrics don't follow the same naming convention (HACK)
+ else if(strpos($archiveName, '_returning') > 0
+ // ignore Goal_visitor_returning_1_1_nb_conversions
+ && strpos($archiveName, 'Goal_') === false
+ ) {
+ return 'VisitFrequency_Metrics';
+ }
+ // Goal_* metrics are processed by the Goals plugin (HACK)
+ else if(strpos($archiveName, 'Goal_') === 0) {
+ return 'Goals_Metrics';
+ } else {
+ return $archiveName;
+ }
+ }
+
+ /**
* Helper - Loads a DataTable from the Archive.
* Optionally loads the table recursively,
* or optionally fetches a given subtable with $idSubtable
@@ -363,98 +648,337 @@ abstract class Piwik_Archive
return $dataTable;
}
-
- protected function formatNumericValue($value)
+
+ /**
+ * Queries archive tables for data and returns the result.
+ */
+ private function get($archiveNames, $archiveDataType, $idSubtable = null)
{
- // If there is no dot, we return as is
- // Note: this could be an integer bigger than 32 bits
- if (strpos($value, '.') === false) {
- if ($value === false) {
- return 0;
+ if (!is_array($archiveNames)) {
+ $archiveNames = array($archiveNames);
+ }
+
+ // apply idSubtable
+ if ($idSubtable !== null
+ && $idSubtable != 'all'
+ ) {
+ foreach ($archiveNames as &$name) {
+ $name .= "_$idSubtable";
}
- return (float)$value;
}
-
- // Round up the value with 2 decimals
- // we cast the result as float because returns false when no visitors
- $value = round((float)$value, 2);
- return $value;
- }
-
- public function getSegment()
- {
- return $this->segment;
+
+ $result = new Piwik_Archive_DataCollection(
+ $archiveNames, $archiveDataType, $this->siteIds, $this->periods, $defaultRow = null);
+
+ $archiveIds = $this->getArchiveIds($archiveNames);
+ if (empty($archiveIds)) {
+ return $result;
+ }
+
+ $archiveData = $this->dataAccess->getArchiveData($archiveIds, $archiveNames, $archiveDataType, $idSubtable);
+ foreach ($archiveData as $row) {
+ // values are grouped by idsite (site ID), date1-date2 (date range), then name (field name)
+ $idSite = $row['idsite'];
+ $periodStr = $row['date1'].",".$row['date2'];
+
+ if ($archiveDataType == 'numeric') {
+ $value = $this->formatNumericValue($row['value']);
+ } else {
+ $value = $this->uncompress($row['value']);
+ $result->addMetadata($idSite, $periodStr, 'ts_archived', $row['ts_archived']);
+ }
+
+ $resultRow = &$result->get($idSite, $periodStr);
+ $resultRow[$row['name']] = $value;
+ }
+
+ return $result;
}
-
- public function setSegment(Piwik_Segment $segment)
+
+ /**
+ * Returns archive IDs for the sites, periods and archive names that are being
+ * queried. This function will use the idarchive cache if it has the right data,
+ * query archive tables for IDs w/o launching archiving, or launch archiving and
+ * get the idarchive from Piwik_ArchiveProcessing instances.
+ */
+ private function getArchiveIds($archiveNames)
{
- $this->segment = $segment;
+ $requestedReports = $this->getRequestedReports($archiveNames);
+
+ // figure out which archives haven't been processed (if an archive has been processed,
+ // then we have the archive IDs in $this->idarchives)
+ $doneFlags = array();
+ $archiveGroups = array();
+ foreach ($requestedReports as $report) {
+ $doneFlag = Piwik_ArchiveProcessing::getDoneStringFlagFor(
+ $this->segment, $this->getPeriodLabel(), $report);
+
+ $doneFlags[$doneFlag] = true;
+ if (!isset($this->idarchives[$doneFlag])) {
+ $archiveGroups[] = $this->getArchiveGroupOfReport($report);
+ }
+ }
+
+ $archiveGroups = array_unique($archiveGroups);
+
+ // cache id archives for plugins we haven't processed yet
+ if (!empty($archiveGroups)) {
+ if (!$this->isArchivingDisabled()) {
+ $this->cacheArchiveIdsAfterLaunching($archiveGroups, $requestedReports);
+ } else {
+ $this->cacheArchiveIdsWithoutLaunching($requestedReports);
+ }
+ }
+
+ // order idarchives by the table month they belong to
+ $idArchivesByMonth = array();
+ foreach (array_keys($doneFlags) as $doneFlag) {
+ if (empty($this->idarchives[$doneFlag])) {
+ continue;
+ }
+
+ foreach ($this->idarchives[$doneFlag] as $dateRange => $idarchives) {
+ foreach ($idarchives as $id) {
+ $idArchivesByMonth[$dateRange][] = $id;
+ }
+ }
+ }
+
+ return $idArchivesByMonth;
}
-
+
/**
- * Sets the site
- *
- * @param Piwik_Site $site
+ * Gets the IDs of the archives we're querying for and stores them in $this->archives.
+ * This function will launch the archiving process for each period/site/plugin if
+ * metrics/reports have not been calculated/archived already.
+ *
+ * @param array $archiveGroups @see getArchiveGroupOfReport
+ * @param array $requestedReports @see getRequestedReport
*/
- public function setSite(Piwik_Site $site)
+ private function cacheArchiveIdsAfterLaunching($archiveGroups, $requestedReports)
{
- $this->site = $site;
+ $today = Piwik_Date::today();
+
+ // for every individual query permutation, launch the archiving process and get the archive ID
+ foreach ($this->periods as $period) {
+ $periodStr = $period->getRangeString();
+
+ $twoDaysBeforePeriod = $period->getDateStart()->subDay(2);
+ $twoDaysAfterPeriod = $period->getDateEnd()->addDay(2);
+
+ foreach ($this->siteIds as $idSite) {
+ $site = new Piwik_Site($idSite);
+
+ // if the END of the period is BEFORE the website creation date
+ // we already know there are no stats for this period
+ // we add one day to make sure we don't miss the day of the website creation
+ if ($twoDaysAfterPeriod->isEarlier($site->getCreationDate())) {
+ $archiveDesc = $this->getArchiveDescriptor($idSite, $period);
+ Piwik::log("Archive $archiveDesc skipped, archive is before the website was created.");
+ continue;
+ }
+
+ // if the starting date is in the future we know there is no visit
+ if ($twoDaysBeforePeriod->isLater($today)) {
+ $archiveDesc = $this->getArchiveDescriptor($idSite, $period);
+ Piwik::log("Archive $archiveDesc skipped, archive is after today.");
+ continue;
+ }
+
+ // prepare the ArchiveProcessing instance
+ $processing = $this->getArchiveProcessingInstance($period);
+ $processing->setSite($site);
+ $processing->setPeriod($period);
+ $processing->setSegment($this->segment);
+
+ $processing->isThereSomeVisits = null;
+
+ // process for each requested report as well
+ foreach ($archiveGroups as $pluginOrAll) {
+ if ($pluginOrAll == 'all') {
+ $pluginOrAll = $this->getPluginForReport(reset($requestedReports));
+ }
+ $report = $pluginOrAll.'_reportsAndMetrics';
+
+ $doneFlag = Piwik_ArchiveProcessing::getDoneStringFlagFor(
+ $this->segment, $period->getLabel(), $report);
+ $this->initializeArchiveIdCache($doneFlag);
+
+ $processing->init();
+ $processing->setRequestedReport($report);
+
+ // launch archiving if the requested data hasn't been archived
+ $idArchive = $processing->loadArchive();
+ if (empty($idArchive)) {
+ $processing->launchArchiving();
+ $idArchive = $processing->getIdArchive();
+ }
+
+ if (!$processing->isThereSomeVisits()) {
+ continue;
+ }
+
+ $this->idarchives[$doneFlag][$periodStr][] = $idArchive;
+ }
+ }
+ }
}
-
+
/**
- * Gets the site
- *
- * @return Piwik_Site
+ * Gets the IDs of the archives we're querying for and stores them in $this->archives.
+ * This function will not launch the archiving process (and is thus much, much faster
+ * than cacheArchiveIdsAfterLaunching).
+ *
+ * @param array $requestedReports @see getRequestedReport
*/
- public function getSite()
+ private function cacheArchiveIdsWithoutLaunching($requestedReports)
{
- return $this->site;
+ $periodType = $this->getPeriodLabel();
+
+ $idarchivesByReport = $this->dataAccess->getArchiveIds(
+ $this->siteIds, $this->periods, $this->segment, $requestedReports);
+
+ // initialize archive ID cache for each report
+ foreach ($requestedReports as $report) {
+ $doneFlag = Piwik_ArchiveProcessing::getDoneStringFlagFor($this->segment, $periodType, $report);
+ $this->initializeArchiveIdCache($doneFlag);
+ }
+
+ foreach ($idarchivesByReport as $doneFlag => $idarchivesByDate) {
+ foreach ($idarchivesByDate as $dateRange => $idarchives) {
+ foreach ($idarchives as $idarchive) {
+ $this->idarchives[$doneFlag][$dateRange][] = $idarchive;
+ }
+ }
+ }
}
-
+
/**
- * Returns the Id site associated with this archive
- *
- * @return int
+ * Returns an ArchiveProcessing instance that should be used for a specific
+ * period.
+ *
+ * @param Piwik_Period $period
*/
- public function getIdSite()
+ private function getArchiveProcessingInstance($period)
+ {
+ $label = $period->getLabel();
+ if (!isset($this->processingCache[$label])) {
+ $this->processingCache[$label] = Piwik_ArchiveProcessing::factory($label);
+ }
+ return $this->processingCache[$label];
+ }
+
+ private function getPeriodLabel()
{
- return $this->site->getId();
+ return reset($this->periods)->getLabel();
}
-
+
/**
- * Returns true if Segmentation is allowed for this user
- *
- * @return bool
+ * Returns an array describing what metadata to use when indexing a query result.
+ * For use with Piwik_Archive_DataCollection.
+ *
+ * @return array
*/
- public static function isSegmentationEnabled()
+ private function getResultIndices()
{
- return !Piwik::isUserIsAnonymous()
- || Piwik_Config::getInstance()->General['anonymous_user_enable_use_segments_API'];
+ $indices = array();
+
+ if (count($this->siteIds) > 1
+ || $this->forceIndexedBySite
+ ) {
+ $indices['site'] = 'idSite';
+ }
+
+ if (count($this->periods) > 1
+ || $this->forceIndexedByDate
+ ) {
+ $indices['period'] = 'date';
+ }
+
+ return $indices;
}
+ private function formatNumericValue($value)
+ {
+ // If there is no dot, we return as is
+ // Note: this could be an integer bigger than 32 bits
+ if (strpos($value, '.') === false) {
+ if ($value === false) {
+ return 0;
+ }
+ return (float)$value;
+ }
+
+ // Round up the value with 2 decimals
+ // we cast the result as float because returns false when no visitors
+ return round((float)$value, 2);
+ }
+
+ private function getArchiveDescriptor($idSite, $period)
+ {
+ return "site $idSite, {$period->getLabel()} ({$period->getPrettyString()})";
+ }
+
+ private function uncompress($data)
+ {
+ return @gzuncompress($data);
+ }
+
+ private function getAsNonEmptyArray($array, $paramName)
+ {
+ if (!is_array($array)) {
+ $array = array($array);
+ }
+
+ if (empty($array)) {
+ throw new Exception("Piwik_Archive::__construct: \$$paramName is empty.");
+ }
+
+ return $array;
+ }
+
/**
- * Indicate if $dateString and $period correspond to multiple periods
- *
- * @static
- * @param $dateString
- * @param $period
- * @return boolean
+ * Initializes the archive ID cache ($this->idarchives) for a particular 'done' flag.
+ *
+ * It is necessary that each archive ID caching function call this method for each
+ * unique 'done' flag it encounters, since the getArchiveIds function determines
+ * whether archiving should be launched based on whether $this->idarchives has a
+ * an entry for a specific 'done' flag.
+ *
+ * If this function is not called, then periods with no visits will not add
+ * entries to the cache. If the archive is used again, SQL will be executed to
+ * try and find the archive IDs even though we know there are none.
*/
- public static function isMultiplePeriod($dateString, $period)
+ private function initializeArchiveIdCache($doneFlag)
{
- return (preg_match('/^(last|previous){1}([0-9]*)$/D', $dateString, $regs)
- || Piwik_Period_Range::parseDateRange($dateString))
- && $period != 'range';
+ if (!isset($this->idarchives[$doneFlag])) {
+ $this->idarchives[$doneFlag] = array();
+ }
}
-
+
/**
- * Indicate if $idSiteString corresponds to multiple sites.
- *
- * @param string $idSiteString
- * @return bool
+ * Returns the archiving group identifier of a report.
+ *
+ * More than one plugin can be called at once. In such a case we don't want to
+ * launch archiving three times for three plugins if doing it once is enough,
+ * so getArchiveIds makes sure to get the archive group of all reports.
+ *
+ * If the period isn't a range, then all plugins' archiving code is executed.
+ * If the period is a range, then archiving code is executed individually for
+ * each plugin.
*/
- public static function isMultipleSites($idSiteString)
+ private function getArchiveGroupOfReport($report)
{
- return $idSiteString == 'all' || strpos($idSiteString, ',') !== false;
+ if ($this->getPeriodLabel() != 'range') {
+ return 'all';
+ }
+
+ return $this->getPluginForReport($report);
+ }
+
+ private function getPluginForReport($report)
+ {
+ $parts = explode('_', $report);
+ return $parts[0];
}
}