diff options
author | Benaka Moorthi <benaka.moorthi@gmail.com> | 2013-05-31 11:36:25 +0400 |
---|---|---|
committer | Benaka Moorthi <benaka.moorthi@gmail.com> | 2013-05-31 11:36:25 +0400 |
commit | 6abbacb9575087161ade571de2b0ad373b03d115 (patch) | |
tree | 4da27e6b31ffc2f62e0cb0b7ee2cecfd15de2c22 /core/Archive.php | |
parent | cbc929d0e6a10337c29c3459048611c23a2d2d65 (diff) |
Refactored archive data querying code. Removed tree-like data structure from Piwik_Archive, added memory optimizations for archiving (not storing archive processing instances), added optimization for querying data indexed by site + date, added ability to select multiple blobs/metrics at once (from different plugins), added optimization to Piwik_ArchiveProcessing_Period where numerics/records are selected all at once instead of one at a time.
Also added data access layer object for archive querying code, allowed anonymous function objects to be used as DataTable filters, and removed 'timestamp' DataTable metadata.
Diffstat (limited to 'core/Archive.php')
-rw-r--r-- | core/Archive.php | 816 |
1 files changed, 670 insertions, 146 deletions
diff --git a/core/Archive.php b/core/Archive.php index 5492c1b4a8..42690fb379 100644 --- a/core/Archive.php +++ b/core/Archive.php @@ -12,9 +12,14 @@ /** * The archive object is used to query specific data for a day or a period of statistics for a given website. * + * Limitations: + * - If you query w/ a range period, you can only query for ONE at a time. + * - If you query w/ a non-range period, you can query for multiple periods, but they must + * all be of the same type (ie, day, week, month, year). + * * Example: * <pre> - * $archive = Piwik_Archive::build($idSite = 1, $period = 'week', '2008-03-08' ); + * $archive = Piwik_Archive::build($idSite = 1, $period = 'week', '2008-03-08'); * $dataTable = $archive->getDataTable('Provider_hostnameExt'); * $dataTable->queueFilter('ReplaceColumnNames'); * return $dataTable; @@ -22,17 +27,19 @@ * * Example bis: * <pre> - * $archive = Piwik_Archive::build($idSite = 3, $period = 'day', $date = 'today' ); + * $archive = Piwik_Archive::build($idSite = 3, $period = 'day', $date = 'today'); * $nbVisits = $archive->getNumeric('nb_visits'); * return $nbVisits; * </pre> * * If the requested statistics are not yet processed, Archive uses ArchiveProcessing to archive the statistics. - * + * + * TODO: create ticket for this: when building archives, should use each site's timezone (ONLY FOR 'now'). + * * @package Piwik * @subpackage Piwik_Archive */ -abstract class Piwik_Archive +class Piwik_Archive { /** * When saving DataTables in the DB, we sometimes replace the columns name by these IDs so we save up lots of bytes @@ -185,86 +192,230 @@ abstract class Piwik_Archive const LABEL_ECOMMERCE_CART = 'ecommerceAbandonedCart'; const LABEL_ECOMMERCE_ORDER = 'ecommerceOrder'; - + /** - * Website Piwik_Site - * - * @var Piwik_Site + * The list of site IDs to query archive data for. + * + * @var array */ - protected $site = null; - + private $siteIds; + + /** + * The list of Piwik_Period's to query archive data for. + * + * @var array + */ + private $periods; + /** - * Segment applied to the visits set + * Segment applied to the visits set. + * * @var Piwik_Segment */ - protected $segment = false; + private $segment; + + /** + * List of archive IDs for the sites, periods and segment we are querying with. + * Archive IDs are indexed by done flag and period, ie: + * + * array( + * 'done.Referers' => array( + * '2010-01-01' => 1, + * '2010-01-02' => 2, + * ), + * 'done.VisitsSummary' => array( + * '2010-01-01' => 3, + * '2010-01-02' => 4, + * ), + * ) + * + * or, + * + * array( + * 'done.all' => array( + * '2010-01-01' => 1, + * '2010-01-02' => 2 + * ) + * ) + * + * @var array + */ + private $idarchives = array(); + + /** + * If set to true, the result of all get functions (ie, getNumeric, getBlob, etc.) + * will be indexed by the site ID, even if we're only querying data for one site. + * + * @var bool + */ + private $forceIndexedBySite; + + /** + * If set to true, the result of all get functions (ie, getNumeric, getBlob, etc.) + * will be indexed by the period, even if we're only querying data for one period. + * + * @var bool + */ + private $forceIndexedByDate; + + /** + * Data Access Layer object. + * + * @var Piwik_DataAccess_ArchiveQuery + */ + private $dataAccess; + + /** + * Cache of Piwik_ArchiveProcessing instances used when launching the archiving + * process. + * + * @var array + */ + private $processingCache = array(); + + /** + * Constructor. + * + * @param array|int $siteIds List of site IDs to query data for. + * @param array|Piwik_Period $periods List of periods to query data for. + * @param Piwik_Segment $segment The segment used to narrow the visits set. + * @param bool $forceIndexedBySite Whether to force index the result of a query by site ID. + * @param bool $forceIndexedByDate Whether to force index the result of a query by period. + */ + public function __construct($siteIds, $periods, Piwik_Segment $segment, $forceIndexedBySite = false, + $forceIndexedByDate = false) + { + $this->siteIds = $this->getAsNonEmptyArray($siteIds, 'siteIds'); + + $periods = $this->getAsNonEmptyArray($periods, 'periods'); + $this->periods = array(); + foreach ($periods as $period) { + $this->periods[$period->getRangeString()] = $period; + } + + $this->segment = $segment; + $this->forceIndexedBySite = $forceIndexedBySite; + $this->forceIndexedByDate = $forceIndexedByDate; + $this->dataAccess = new Piwik_DataAccess_ArchiveQuery(); + } + + /** + * Destructor. + */ + public function __destruct() + { + $this->periods = null; + $this->siteIds = null; + $this->segment = null; + $this->idarchives = array(); + $this->processingCache = array(); + } + + /** + * Returns the IDs of sites we are querying archive data for. + * + * @return array + */ + public function getSiteIds() + { + return $this->siteIds; + } + + /** + * Returns the periods we are querying archive data for. + * + * @return array + */ + public function getPeriods() + { + return $this->periods; + } + + /** + * Returns the segment used to limit the visit set. + * + * @return Piwik_Segment|null + */ + public function getSegment() + { + return $this->segment; + } /** - * Builds an Archive object or returns the same archive if previously built. + * Builds an Archive object using query parameter values. * - * @param int|string $idSite integer, or comma separated list of integer - * @param string $period 'week' 'day' etc. - * @param Piwik_Date|string $strDate 'YYYY-MM-DD' or magic keywords 'today' @see Piwik_Date::factory() - * @param bool|string $segment Segment definition - defaults to false for Backward Compatibility - * @param bool|string $_restrictSitesToLogin Used only when running as a scheduled task + * @param int|string $idSite Integer, or comma separated list of integer site IDs. + * @param string $period 'day', 'week', 'month', 'year' or 'range' + * @param Piwik_Date|string $strDate 'YYYY-MM-DD', magic keywords (ie, 'today'; @see Piwik_Date::factory()) + * or date range (ie, 'YYYY-MM-DD,YYYY-MM-DD'). + * @param false|string $segment Segment definition - defaults to false for backward compatibility. + * @param false|string $_restrictSitesToLogin Used only when running as a scheduled task. * @return Piwik_Archive */ public static function build($idSite, $period, $strDate, $segment = false, $_restrictSitesToLogin = false) { - if ($idSite === 'all') { - $sites = Piwik_SitesManager_API::getInstance()->getSitesIdWithAtLeastViewAccess($_restrictSitesToLogin); - } else { - $sites = Piwik_Site::getIdSitesFromIdSitesString($idSite); - } - - if (!($segment instanceof Piwik_Segment)) { - $segment = new Piwik_Segment($segment, $idSite); + $forceIndexedBySite = false; + $forceIndexedByDate = false; + + // determine site IDs to query from + if (is_array($idSite) + || $idSite == 'all' + ) { + $forceIndexedBySite = true; } - - // idSite=1,3 or idSite=all - if ($idSite === 'all' - || is_array($idSite) - || count($sites) > 1 + $sites = Piwik_Site::getIdSitesFromIdSitesString($idSite, $_restrictSitesToLogin); + + // if a period date string is detected: either 'last30', 'previous10' or 'YYYY-MM-DD,YYYY-MM-DD' + if (is_string($strDate) + && self::isMultiplePeriod($strDate, $period) ) { - $archive = new Piwik_Archive_Array_IndexedBySite($sites, $period, $strDate, $segment, $_restrictSitesToLogin); - } // if a period date string is detected: either 'last30', 'previous10' or 'YYYY-MM-DD,YYYY-MM-DD' - elseif (is_string($strDate) && self::isMultiplePeriod($strDate, $period)) { - $oSite = new Piwik_Site($idSite); - $archive = new Piwik_Archive_Array_IndexedByDate($oSite, $period, $strDate, $segment); - } // case we request a single archive - else { - $oSite = new Piwik_Site($idSite); + $oPeriod = new Piwik_Period_Range($period, $strDate); + $allPeriods = $oPeriod->getSubperiods(); + $forceIndexedByDate = true; + } else { + if (count($sites) == 1) { + $oSite = new Piwik_Site($sites[0]); + } else { + $oSite = null; + } + $oPeriod = Piwik_Archive::makePeriodFromQueryParams($oSite, $period, $strDate); - - $archive = new Piwik_Archive_Single(); - $archive->setPeriod($oPeriod); - $archive->setSite($oSite); - $archive->setSegment($segment); + $allPeriods = array($oPeriod); } - return $archive; + + return new Piwik_Archive( + $sites, $allPeriods, new Piwik_Segment($segment, $sites), $forceIndexedBySite, $forceIndexedByDate); } /** * Creates a period instance using a Piwik_Site instance and two strings describing * the period & date. * - * @param Piwik_Site $site + * @param Piwik_Site|null $site * @param string $strPeriod The period string: day, week, month, year, range * @param string $strDate The date or date range string. * @return Piwik_Period */ public static function makePeriodFromQueryParams($site, $strPeriod, $strDate) { - $tz = $site->getTimezone(); + if ($site === null) { + $tz = 'UTC'; + } else { + $tz = $site->getTimezone(); + } if ($strPeriod == 'range') { $oPeriod = new Piwik_Period_Range('range', $strDate, $tz, Piwik_Date::factory('today', $tz)); } else { $oDate = $strDate; if (!($strDate instanceof Piwik_Date)) { - if ($strDate == 'now' || $strDate == 'today') { + if ($strDate == 'now' + || $strDate == 'today' + ) { $strDate = date('Y-m-d', Piwik_Date::factory('now', $tz)->getTimestamp()); - } elseif ($strDate == 'yesterday' || $strDate == 'yesterdaySameTime') { + } elseif ($strDate == 'yesterday' + || $strDate == 'yesterdaySameTime' + ) { $strDate = date('Y-m-d', Piwik_Date::factory('now', $tz)->subDay(1)->getTimestamp()); } $oDate = Piwik_Date::factory($strDate); @@ -275,63 +426,197 @@ abstract class Piwik_Archive return $oPeriod; } - - abstract public function prepareArchive(); - + /** - * Returns the value of the element $name from the current archive + * Returns the value of the element $name from the current archive * The value to be returned is a numeric value and is stored in the archive_numeric_* tables * - * @param string $name For example Referers_distinctKeywords - * @return float|int|false False if no value with the given name + * @param string|array $names One or more archive names, eg, 'nb_visits', 'Referers_distinctKeywords', + * etc. + * @return numeric|array|false False if no value with the given name, numeric if only one site + * and date and we're not forcing an index, and array if multiple + * sites/dates are queried. */ - abstract public function getNumeric($name); - + public function getNumeric($names) + { + $data = $this->get($names, 'numeric'); + + $resultIndices = $this->getResultIndices(); + $result = $data->getArray($resultIndices); + + // if only one metric is returned, just return it as a numeric value + if (empty($resultIndices) + && count($result) <= 1 + ) { + $result = (float)reset($result); // convert to float in case $result is empty + } + + return $result; + } + /** - * Returns the value of the element $name from the current archive - * - * The value to be returned is a blob value and is stored in the archive_numeric_* tables - * + * Returns the value of the elements in $names from the current archive. + * + * The value to be returned is a blob value and is stored in the archive_blob_* tables. + * * It can return anything from strings, to serialized PHP arrays or PHP objects, etc. * - * @param string $name For example Referers_distinctKeywords - * @return mixed False if no value with the given name + * @param string|array $names One or more archive names, eg, 'Referers_keywordBySearchEngine'. + * @return string|array|false False if no value with the given name, numeric if only one site + * and date and we're not forcing an index, and array if multiple + * sites/dates are queried. */ - abstract public function getBlob($name); - + public function getBlob($names, $idSubtable = null) + { + $data = $this->get($names, 'blob', $idSubtable); + return $data->getArray($this->getResultIndices()); + } + /** - * - * @param $fields - * @return Piwik_DataTable + * Returns the numeric values of the elements in $names as a DataTable. + * + * Note: Every DataTable instance returned will have at most one row in it. + * + * @param string|array $names One or more archive names, eg, 'nb_visits', 'Referers_distinctKeywords', + * etc. + * @return Piwik_DataTable|false False if no value with the given names. Based on the number + * of sites/periods, the result can be a DataTable_Array, which + * contains DataTable instances. */ - abstract public function getDataTableFromNumeric($fields); + public function getDataTableFromNumeric($names) + { + $data = $this->get($names, 'numeric'); + return $data->getDataTable($this->getResultIndices()); + } /** * This method will build a dataTable from the blob value $name in the current archive. + * + * For example $name = 'Referers_searchEngineByKeyword' will return a + * Piwik_DataTable containing all the keywords. If a $idSubtable is given, the method + * will return the subTable of $name. If 'all' is supplied for $idSubtable every subtable + * will be returned. + * + * @param string $name The name of the record to get. + * @param int|string|null $idSubtable The subtable ID (if any) or 'all' if requesting every datatable. + * @return Piwik_DataTable|false + */ + public function getDataTable($name, $idSubtable = null) + { + $data = $this->get($name, 'blob', $idSubtable); + return $data->getDataTable($this->getResultIndices()); + } + + /** + * Same as getDataTable() except that it will also load in memory all the subtables + * for the DataTable $name. You can then access the subtables by using the + * Piwik_DataTable_Manager::getTable() function. * - * For example $name = 'Referers_searchEngineByKeyword' will return a Piwik_DataTable containing all the keywords - * If a idSubTable is given, the method will return the subTable of $name - * - * @param string $name - * @param int $idSubTable or null if requesting the parent table + * @param string $name The name of the record to get. + * @param int|string|null $idSubtable The subtable ID (if any) or 'all' if requesting every datatable. + * @param bool $addMetadataSubtableId Whether to add the DB subtable ID as metadata to each datatable, + * or not. * @return Piwik_DataTable - * @throws exception If the value cannot be found */ - abstract public function getDataTable($name, $idSubTable = null); + public function getDataTableExpanded($name, $idSubtable = null, $addMetadataSubtableId = true) + { + $data = $this->get($name, 'blob', 'all'); + return $data->getExpandedDataTable($this->getResultIndices(), $idSubtable, $addMetadataSubtableId); + } + + /** + * Returns true if we shouldn't launch the archiving process and false if we should. + * + * @return bool + */ + public function isArchivingDisabled() + { + return Piwik_ArchiveProcessing::isArchivingDisabledFor($this->segment, $this->getPeriodLabel()); + } /** - * Same as getDataTable() except that it will also load in memory - * all the subtables for the DataTable $name. - * You can then access the subtables by using the Piwik_DataTable_Manager getTable() + * Returns true if Segmentation is allowed for this user * - * @param string $name - * @param int|null $idSubTable null if requesting the parent table - * @return Piwik_DataTable + * @return bool */ - abstract public function getDataTableExpanded($name, $idSubTable = null); + public static function isSegmentationEnabled() + { + return !Piwik::isUserIsAnonymous() + || Piwik_Config::getInstance()->General['anonymous_user_enable_use_segments_API']; + } + /** + * Indicate if $dateString and $period correspond to multiple periods + * + * @static + * @param $dateString + * @param $period + * @return boolean + */ + public static function isMultiplePeriod($dateString, $period) + { + return (preg_match('/^(last|previous){1}([0-9]*)$/D', $dateString, $regs) + || Piwik_Period_Range::parseDateRange($dateString)) + && $period != 'range'; + } /** + * Indicate if $idSiteString corresponds to multiple sites. + * + * @param string $idSiteString + * @return bool + */ + public static function isMultipleSites($idSiteString) + { + return $idSiteString == 'all' || strpos($idSiteString, ',') !== false; + } + + /** + * Returns the report names for a list of metric/record names. + * + * @see getRequestedReport + * + * @param array $archiveNames + */ + public function getRequestedReports($archiveNames) + { + $result = array(); + foreach ($archiveNames as $name) { + $result[] = self::getRequestedReport($name); + } + return array_unique($result); + } + + /** + * Returns the report name for a metric/record name. + * + * A report name has the following format: {$pluginName}_{$reportId}, eg. VisitFrequency_Metrics. + * The report ID is not used anywhere in Piwik. + */ + public static function getRequestedReport($archiveName) + { + // Core metrics are always processed in Core, for the requested date/period/segment + if (in_array($archiveName, Piwik_ArchiveProcessing::getCoreMetrics()) + || $archiveName == 'max_actions' + ) { + return 'VisitsSummary_CoreMetrics'; + } + // VisitFrequency metrics don't follow the same naming convention (HACK) + else if(strpos($archiveName, '_returning') > 0 + // ignore Goal_visitor_returning_1_1_nb_conversions + && strpos($archiveName, 'Goal_') === false + ) { + return 'VisitFrequency_Metrics'; + } + // Goal_* metrics are processed by the Goals plugin (HACK) + else if(strpos($archiveName, 'Goal_') === 0) { + return 'Goals_Metrics'; + } else { + return $archiveName; + } + } + + /** * Helper - Loads a DataTable from the Archive. * Optionally loads the table recursively, * or optionally fetches a given subtable with $idSubtable @@ -363,98 +648,337 @@ abstract class Piwik_Archive return $dataTable; } - - protected function formatNumericValue($value) + + /** + * Queries archive tables for data and returns the result. + */ + private function get($archiveNames, $archiveDataType, $idSubtable = null) { - // If there is no dot, we return as is - // Note: this could be an integer bigger than 32 bits - if (strpos($value, '.') === false) { - if ($value === false) { - return 0; + if (!is_array($archiveNames)) { + $archiveNames = array($archiveNames); + } + + // apply idSubtable + if ($idSubtable !== null + && $idSubtable != 'all' + ) { + foreach ($archiveNames as &$name) { + $name .= "_$idSubtable"; } - return (float)$value; } - - // Round up the value with 2 decimals - // we cast the result as float because returns false when no visitors - $value = round((float)$value, 2); - return $value; - } - - public function getSegment() - { - return $this->segment; + + $result = new Piwik_Archive_DataCollection( + $archiveNames, $archiveDataType, $this->siteIds, $this->periods, $defaultRow = null); + + $archiveIds = $this->getArchiveIds($archiveNames); + if (empty($archiveIds)) { + return $result; + } + + $archiveData = $this->dataAccess->getArchiveData($archiveIds, $archiveNames, $archiveDataType, $idSubtable); + foreach ($archiveData as $row) { + // values are grouped by idsite (site ID), date1-date2 (date range), then name (field name) + $idSite = $row['idsite']; + $periodStr = $row['date1'].",".$row['date2']; + + if ($archiveDataType == 'numeric') { + $value = $this->formatNumericValue($row['value']); + } else { + $value = $this->uncompress($row['value']); + $result->addMetadata($idSite, $periodStr, 'ts_archived', $row['ts_archived']); + } + + $resultRow = &$result->get($idSite, $periodStr); + $resultRow[$row['name']] = $value; + } + + return $result; } - - public function setSegment(Piwik_Segment $segment) + + /** + * Returns archive IDs for the sites, periods and archive names that are being + * queried. This function will use the idarchive cache if it has the right data, + * query archive tables for IDs w/o launching archiving, or launch archiving and + * get the idarchive from Piwik_ArchiveProcessing instances. + */ + private function getArchiveIds($archiveNames) { - $this->segment = $segment; + $requestedReports = $this->getRequestedReports($archiveNames); + + // figure out which archives haven't been processed (if an archive has been processed, + // then we have the archive IDs in $this->idarchives) + $doneFlags = array(); + $archiveGroups = array(); + foreach ($requestedReports as $report) { + $doneFlag = Piwik_ArchiveProcessing::getDoneStringFlagFor( + $this->segment, $this->getPeriodLabel(), $report); + + $doneFlags[$doneFlag] = true; + if (!isset($this->idarchives[$doneFlag])) { + $archiveGroups[] = $this->getArchiveGroupOfReport($report); + } + } + + $archiveGroups = array_unique($archiveGroups); + + // cache id archives for plugins we haven't processed yet + if (!empty($archiveGroups)) { + if (!$this->isArchivingDisabled()) { + $this->cacheArchiveIdsAfterLaunching($archiveGroups, $requestedReports); + } else { + $this->cacheArchiveIdsWithoutLaunching($requestedReports); + } + } + + // order idarchives by the table month they belong to + $idArchivesByMonth = array(); + foreach (array_keys($doneFlags) as $doneFlag) { + if (empty($this->idarchives[$doneFlag])) { + continue; + } + + foreach ($this->idarchives[$doneFlag] as $dateRange => $idarchives) { + foreach ($idarchives as $id) { + $idArchivesByMonth[$dateRange][] = $id; + } + } + } + + return $idArchivesByMonth; } - + /** - * Sets the site - * - * @param Piwik_Site $site + * Gets the IDs of the archives we're querying for and stores them in $this->archives. + * This function will launch the archiving process for each period/site/plugin if + * metrics/reports have not been calculated/archived already. + * + * @param array $archiveGroups @see getArchiveGroupOfReport + * @param array $requestedReports @see getRequestedReport */ - public function setSite(Piwik_Site $site) + private function cacheArchiveIdsAfterLaunching($archiveGroups, $requestedReports) { - $this->site = $site; + $today = Piwik_Date::today(); + + // for every individual query permutation, launch the archiving process and get the archive ID + foreach ($this->periods as $period) { + $periodStr = $period->getRangeString(); + + $twoDaysBeforePeriod = $period->getDateStart()->subDay(2); + $twoDaysAfterPeriod = $period->getDateEnd()->addDay(2); + + foreach ($this->siteIds as $idSite) { + $site = new Piwik_Site($idSite); + + // if the END of the period is BEFORE the website creation date + // we already know there are no stats for this period + // we add one day to make sure we don't miss the day of the website creation + if ($twoDaysAfterPeriod->isEarlier($site->getCreationDate())) { + $archiveDesc = $this->getArchiveDescriptor($idSite, $period); + Piwik::log("Archive $archiveDesc skipped, archive is before the website was created."); + continue; + } + + // if the starting date is in the future we know there is no visit + if ($twoDaysBeforePeriod->isLater($today)) { + $archiveDesc = $this->getArchiveDescriptor($idSite, $period); + Piwik::log("Archive $archiveDesc skipped, archive is after today."); + continue; + } + + // prepare the ArchiveProcessing instance + $processing = $this->getArchiveProcessingInstance($period); + $processing->setSite($site); + $processing->setPeriod($period); + $processing->setSegment($this->segment); + + $processing->isThereSomeVisits = null; + + // process for each requested report as well + foreach ($archiveGroups as $pluginOrAll) { + if ($pluginOrAll == 'all') { + $pluginOrAll = $this->getPluginForReport(reset($requestedReports)); + } + $report = $pluginOrAll.'_reportsAndMetrics'; + + $doneFlag = Piwik_ArchiveProcessing::getDoneStringFlagFor( + $this->segment, $period->getLabel(), $report); + $this->initializeArchiveIdCache($doneFlag); + + $processing->init(); + $processing->setRequestedReport($report); + + // launch archiving if the requested data hasn't been archived + $idArchive = $processing->loadArchive(); + if (empty($idArchive)) { + $processing->launchArchiving(); + $idArchive = $processing->getIdArchive(); + } + + if (!$processing->isThereSomeVisits()) { + continue; + } + + $this->idarchives[$doneFlag][$periodStr][] = $idArchive; + } + } + } } - + /** - * Gets the site - * - * @return Piwik_Site + * Gets the IDs of the archives we're querying for and stores them in $this->archives. + * This function will not launch the archiving process (and is thus much, much faster + * than cacheArchiveIdsAfterLaunching). + * + * @param array $requestedReports @see getRequestedReport */ - public function getSite() + private function cacheArchiveIdsWithoutLaunching($requestedReports) { - return $this->site; + $periodType = $this->getPeriodLabel(); + + $idarchivesByReport = $this->dataAccess->getArchiveIds( + $this->siteIds, $this->periods, $this->segment, $requestedReports); + + // initialize archive ID cache for each report + foreach ($requestedReports as $report) { + $doneFlag = Piwik_ArchiveProcessing::getDoneStringFlagFor($this->segment, $periodType, $report); + $this->initializeArchiveIdCache($doneFlag); + } + + foreach ($idarchivesByReport as $doneFlag => $idarchivesByDate) { + foreach ($idarchivesByDate as $dateRange => $idarchives) { + foreach ($idarchives as $idarchive) { + $this->idarchives[$doneFlag][$dateRange][] = $idarchive; + } + } + } } - + /** - * Returns the Id site associated with this archive - * - * @return int + * Returns an ArchiveProcessing instance that should be used for a specific + * period. + * + * @param Piwik_Period $period */ - public function getIdSite() + private function getArchiveProcessingInstance($period) + { + $label = $period->getLabel(); + if (!isset($this->processingCache[$label])) { + $this->processingCache[$label] = Piwik_ArchiveProcessing::factory($label); + } + return $this->processingCache[$label]; + } + + private function getPeriodLabel() { - return $this->site->getId(); + return reset($this->periods)->getLabel(); } - + /** - * Returns true if Segmentation is allowed for this user - * - * @return bool + * Returns an array describing what metadata to use when indexing a query result. + * For use with Piwik_Archive_DataCollection. + * + * @return array */ - public static function isSegmentationEnabled() + private function getResultIndices() { - return !Piwik::isUserIsAnonymous() - || Piwik_Config::getInstance()->General['anonymous_user_enable_use_segments_API']; + $indices = array(); + + if (count($this->siteIds) > 1 + || $this->forceIndexedBySite + ) { + $indices['site'] = 'idSite'; + } + + if (count($this->periods) > 1 + || $this->forceIndexedByDate + ) { + $indices['period'] = 'date'; + } + + return $indices; } + private function formatNumericValue($value) + { + // If there is no dot, we return as is + // Note: this could be an integer bigger than 32 bits + if (strpos($value, '.') === false) { + if ($value === false) { + return 0; + } + return (float)$value; + } + + // Round up the value with 2 decimals + // we cast the result as float because returns false when no visitors + return round((float)$value, 2); + } + + private function getArchiveDescriptor($idSite, $period) + { + return "site $idSite, {$period->getLabel()} ({$period->getPrettyString()})"; + } + + private function uncompress($data) + { + return @gzuncompress($data); + } + + private function getAsNonEmptyArray($array, $paramName) + { + if (!is_array($array)) { + $array = array($array); + } + + if (empty($array)) { + throw new Exception("Piwik_Archive::__construct: \$$paramName is empty."); + } + + return $array; + } + /** - * Indicate if $dateString and $period correspond to multiple periods - * - * @static - * @param $dateString - * @param $period - * @return boolean + * Initializes the archive ID cache ($this->idarchives) for a particular 'done' flag. + * + * It is necessary that each archive ID caching function call this method for each + * unique 'done' flag it encounters, since the getArchiveIds function determines + * whether archiving should be launched based on whether $this->idarchives has a + * an entry for a specific 'done' flag. + * + * If this function is not called, then periods with no visits will not add + * entries to the cache. If the archive is used again, SQL will be executed to + * try and find the archive IDs even though we know there are none. */ - public static function isMultiplePeriod($dateString, $period) + private function initializeArchiveIdCache($doneFlag) { - return (preg_match('/^(last|previous){1}([0-9]*)$/D', $dateString, $regs) - || Piwik_Period_Range::parseDateRange($dateString)) - && $period != 'range'; + if (!isset($this->idarchives[$doneFlag])) { + $this->idarchives[$doneFlag] = array(); + } } - + /** - * Indicate if $idSiteString corresponds to multiple sites. - * - * @param string $idSiteString - * @return bool + * Returns the archiving group identifier of a report. + * + * More than one plugin can be called at once. In such a case we don't want to + * launch archiving three times for three plugins if doing it once is enough, + * so getArchiveIds makes sure to get the archive group of all reports. + * + * If the period isn't a range, then all plugins' archiving code is executed. + * If the period is a range, then archiving code is executed individually for + * each plugin. */ - public static function isMultipleSites($idSiteString) + private function getArchiveGroupOfReport($report) { - return $idSiteString == 'all' || strpos($idSiteString, ',') !== false; + if ($this->getPeriodLabel() != 'range') { + return 'all'; + } + + return $this->getPluginForReport($report); + } + + private function getPluginForReport($report) + { + $parts = explode('_', $report); + return $parts[0]; } } |