Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nextcloud/mail.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/lib/IMAP
diff options
context:
space:
mode:
authorChristoph Wurst <christoph@winzerhof-wurst.at>2020-04-06 17:22:00 +0300
committerChristoph Wurst <christoph@winzerhof-wurst.at>2020-04-07 18:03:14 +0300
commit72a5703e84f5271237826b12f3552481baefaf47 (patch)
tree4751a4359d7394f7a181c4d485efadd2b469625b /lib/IMAP
parente6824a3705f7be7c763b5994c3c04420807c17f9 (diff)
Lower the memory footprint of the initial message cache sync
The initial message sync has to fetch potentially large amounts of data and insert that into the database. To work around limitations with sync requests triggered by web requests the process had already been made interruptable and resumable. This means we never insert all the data right away. Yet, the IMAP code fetched all UIDs before we capped it to a maximum number of results per sync attempt. Depending on the mailbox size this operation could require and allocate a lot of memory. On some setup with lower memory limits, the process was aborted by the web server due to a php memory exhaustion. This patch modifies the IMAP code to optimize the memory usage by limiting the amount of data that is fetched with each initial sync attempt. The algorithm works as follows. IMAP allows us to search in a range with a lower an upper bound UID. While we know the highest known UID from the current cache values, we can't derive the range for the next page from that as UIDs are not continuous but might have holes due to deleted messages. If we assume that messages of a mailbox are roughly distributed equally across the assigned UIDs we can guess the max UID for the next range. So we ask the server for min and max UIDs. The min or our known highest UID is always the lower bound. Then we can calculate the distribution rate from the min, max and number of messages and build the upper bound. On everage this will fetch about the expected number of messages. It could be more, but it could also be less. It shouldn't matter in most cases. Signed-off-by: Christoph Wurst <christoph@winzerhof-wurst.at>
Diffstat (limited to 'lib/IMAP')
-rw-r--r--lib/IMAP/MessageMapper.php119
1 files changed, 88 insertions, 31 deletions
diff --git a/lib/IMAP/MessageMapper.php b/lib/IMAP/MessageMapper.php
index 4e32aa0c4..a048bfaaf 100644
--- a/lib/IMAP/MessageMapper.php
+++ b/lib/IMAP/MessageMapper.php
@@ -23,12 +23,14 @@ declare(strict_types=1);
namespace OCA\Mail\IMAP;
+use Generator;
use Horde_Imap_Client;
use Horde_Imap_Client_Base;
use Horde_Imap_Client_Data_Fetch;
use Horde_Imap_Client_Exception;
use Horde_Imap_Client_Fetch_Query;
use Horde_Imap_Client_Ids;
+use Horde_Imap_Client_Search_Query;
use Horde_Imap_Client_Socket;
use Horde_Mime_Mail;
use Horde_Mime_Part;
@@ -73,42 +75,97 @@ class MessageMapper {
* @param Horde_Imap_Client_Socket $client
* @param Mailbox $mailbox
*
+ * @param int $maxResults
* @param int|null $highestKnownUid
*
- * @return IMAPMessage[]
+ * @return array
* @throws Horde_Imap_Client_Exception
*/
public function findAll(Horde_Imap_Client_Socket $client,
Mailbox $mailbox,
- int $highestKnownUid = null,
- int $maxResults = null): array {
+ int $maxResults,
+ ?int $highestKnownUid = 0): array {
+ /**
+ * To prevent memory exhaustion, we don't want to just ask for a list of
+ * all UIDs and limit them client-side. Instead we can (hopefully
+ * efficiently) query the min and max UID as well as the number of
+ * messages. Based on that we assume that UIDs are somewhat distributed
+ * equally and build a page to fetch.
+ *
+ * This logic might return fewer or more results than $maxResults
+ */
+
+ $metaResults = $client->search(
+ $mailbox->getName(),
+ null,
+ [
+ 'results' => [
+ Horde_Imap_Client::SEARCH_RESULTS_MIN,
+ Horde_Imap_Client::SEARCH_RESULTS_MAX,
+ Horde_Imap_Client::SEARCH_RESULTS_COUNT,
+ ]
+ ]
+ );
+ /** @var int $min */
+ $min = $metaResults['min'];
+ /** @var int $max */
+ $max = $metaResults['max'];
+ /** @var int $total */
+ $total = $metaResults['count'];
+
+ if ($total === 0) {
+ // Nothing to fetch for this mailbox
+ return [
+ 'messages' => [],
+ 'all' => true,
+ ];
+ }
+
+ // The inclusive range of UIDs
+ $totalRange = $max - $min + 1;
+ // Here we assume somewhat equally distributed UIDs
+ // +1 is added to fetch all messages with the rare case of strictly
+ // continuous UIDs and fractions
+ $estimatedPageSize = (int)(($totalRange / $total) * $maxResults) + 1;
+ // Determine max UID to fetch, but don't exceed the known maximum
+ $upper = min(
+ $max,
+ $highestKnownUid + $estimatedPageSize
+ );
+
$query = new Horde_Imap_Client_Fetch_Query();
$query->uid();
-
- return $this->findByIds(
- $client,
- $mailbox->getName(),
- array_slice(
- array_filter(
- array_map(
- function(Horde_Imap_Client_Data_Fetch $data) {
- return $data->getUid();
- },
- iterator_to_array($client->fetch(
- $mailbox->getName(),
- $query,
- []
- ))
- ),
- function(int $uid) use ($highestKnownUid) {
- // Don't load the ones we already know
- return $highestKnownUid === null || $uid > $highestKnownUid;
- }
+ $uidsToFetch = array_slice(
+ array_filter(
+ array_map(
+ function (Horde_Imap_Client_Data_Fetch $data) {
+ return $data->getUid();
+ },
+ iterator_to_array($client->fetch(
+ $mailbox->getName(),
+ $query,
+ [
+ 'ids' => new Horde_Imap_Client_Ids(($highestKnownUid + 1) . ':' . $upper)
+ ]
+ ))
),
- 0,
- $maxResults
- )
+
+ function (int $uid) use ($highestKnownUid) {
+ // Don't load the ones we already know
+ return $highestKnownUid === null || $uid > $highestKnownUid;
+ }
+ ),
+ 0,
+ $maxResults
);
+ return [
+ 'messages' => $this->findByIds(
+ $client,
+ $mailbox->getName(),
+ $uidsToFetch
+ ),
+ 'all' => $upper === $max,
+ ];
}
/**
@@ -263,9 +320,9 @@ class MessageMapper {
* @throws Horde_Imap_Client_Exception
*/
public function removeFlag(Horde_Imap_Client_Socket $client,
- Mailbox $mailbox,
- int $uid,
- string $flag): void {
+ Mailbox $mailbox,
+ int $uid,
+ string $flag): void {
$client->store(
$mailbox->getName(),
[
@@ -448,7 +505,7 @@ class MessageMapper {
'ids' => new Horde_Imap_Client_Ids($uids),
]);
- return array_map(function(Horde_Imap_Client_Data_Fetch $fetchData) use ($mailbox, $client) {
+ return array_map(function (Horde_Imap_Client_Data_Fetch $fetchData) use ($mailbox, $client) {
$hasAttachments = false;
$text = '';
@@ -493,7 +550,7 @@ class MessageMapper {
}
return new MessageStructureData($hasAttachments, $text);
- }, iterator_to_array($structures->getIterator()));
+ }, iterator_to_array($structures->getIterator()));
}
}