diff options
author | Christoph Wurst <christoph@winzerhof-wurst.at> | 2020-06-26 22:20:04 +0300 |
---|---|---|
committer | Christoph Wurst <christoph@winzerhof-wurst.at> | 2020-07-08 11:40:15 +0300 |
commit | 9f77500c3cd41872b0e4b53c7a7121c2b5cd45c2 (patch) | |
tree | 3550f4b192376e82f2444653edfc6d0e7a763816 /lib/IMAP | |
parent | 6bf5354d68ba155f955c68a95f522299b0269d43 (diff) |
Implement the jwz threading algorithm
Signed-off-by: Christoph Wurst <christoph@winzerhof-wurst.at>
Diffstat (limited to 'lib/IMAP')
-rw-r--r-- | lib/IMAP/Threading/Container.php | 140 | ||||
-rw-r--r-- | lib/IMAP/Threading/Message.php | 75 | ||||
-rw-r--r-- | lib/IMAP/Threading/ThreadBuilder.php | 220 |
3 files changed, 435 insertions, 0 deletions
diff --git a/lib/IMAP/Threading/Container.php b/lib/IMAP/Threading/Container.php new file mode 100644 index 000000000..77d695ea3 --- /dev/null +++ b/lib/IMAP/Threading/Container.php @@ -0,0 +1,140 @@ +<?php + +declare(strict_types=1); + +/** + * @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +namespace OCA\Mail\IMAP\Threading; + +use RuntimeException; +use function array_key_exists; +use function spl_object_id; + +class Container { + + /** @var Message|null */ + private $message; + + /** @var bool */ + private $root; + + /** @var Container|null */ + private $parent; + + /** @var Container[] */ + private $children = []; + + private function __construct(?Message $message, + bool $root = false) { + $this->message = $message; + $this->root = $root; + } + + public static function root(): self { + return new self( + null, + true + ); + } + + public static function empty(): self { + return new self( + null + ); + } + + public static function with(Message $message): self { + return new self( + $message + ); + } + + public function fill(Message $message): void { + $this->message = $message; + } + + public function hasMessage(): bool { + return $this->message !== null; + } + + public function getMessage(): ?Message { + return $this->message; + } + + public function isRoot(): bool { + return $this->root; + } + + public function hasParent(): bool { + return $this->parent !== null; + } + + public function getParent(): Container { + if ($this->isRoot()) { + throw new RuntimeException('Container root has no parent'); + } + return $this->parent; + } + + public function setParent(?Container $parent): void { + $this->unlink(); + $this->parent = $parent; + if ($parent !== null) { + $parent->children[spl_object_id($this)] = $this; + } + } + + public function hasAncestor(Container $container): bool { + if ($this->parent === $container) { + return true; + } + if ($this->parent !== null) { + return $this->parent->hasAncestor($container); + } + return false; + } + + public function unlink(): void { + if ($this->parent !== null) { + $this->parent->removeChild($this); + } + $this->parent = null; + } + + private function removeChild(Container $child): void { + $objId = spl_object_id($child); + if (array_key_exists($objId, $this->children)) { + unset($this->children[$objId]); + } + } + + public function hasChildren(): bool { + return !empty($this->children); + } + + /** + * @return Container[] + */ + public function getChildren(): array { + return $this->children; + } +} diff --git a/lib/IMAP/Threading/Message.php b/lib/IMAP/Threading/Message.php new file mode 100644 index 000000000..7b3684963 --- /dev/null +++ b/lib/IMAP/Threading/Message.php @@ -0,0 +1,75 @@ +<?php + +declare(strict_types=1); + +/** + * @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +namespace OCA\Mail\IMAP\Threading; + +use function str_replace; +use function strpos; + +class Message { + + /** @var string */ + private $subject; + + /** @var string */ + private $id; + + /** @var string[] */ + private $references; + + /** + * @param string[] $references + */ + public function __construct(string $subject, + string $id, + array $references) { + $this->subject = $subject; + $this->id = $id; + $this->references = $references; + } + + public function hasReSubject(): bool { + return strpos($this->getSubject(), 'Re:') === 0; + } + + public function getSubject(): string { + return $this->subject; + } + + public function getBaseSubject(): string { + return str_replace('Re:', '', $this->getSubject()); + } + + public function getId(): string { + return $this->id; + } + + /** + * @return string[] + */ + public function getReferences(): array { + return $this->references; + } +} diff --git a/lib/IMAP/Threading/ThreadBuilder.php b/lib/IMAP/Threading/ThreadBuilder.php new file mode 100644 index 000000000..1667fcc35 --- /dev/null +++ b/lib/IMAP/Threading/ThreadBuilder.php @@ -0,0 +1,220 @@ +<?php + +declare(strict_types=1); + +/** + * @copyright 2020 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @author 2020 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +namespace OCA\Mail\IMAP\Threading; + +use function array_key_exists; +use function count; + +class ThreadBuilder { + + /** + * @param Message[] $messages + * + * @return Container[] + */ + public function build(array $messages): array { + // Step 1 + $idTable = $this->buildIdTable($messages); + + // Step 2 + $rootContainer = $this->buildRootContainer($idTable); + + // Step 3 + unset($idTable); + + // Step 4 + $this->pruneContainers($rootContainer); + + // Step 5 + $this->groupBySubject($rootContainer); + + // Return the children with reset numeric keys + return array_values($rootContainer->getChildren()); + } + + /** + * @param Message[] $messages + * + * @return Container[] + */ + private function buildIdTable(array $messages): array { + /** @var Container[] $idTable */ + $idTable = []; + + foreach ($messages as $message) { + /** @var Message $message */ + // Step 1.A + $container = $idTable[$message->getId()] ?? null; + if ($container !== null && !$container->hasMessage()) { + $container->fill($message); + } else { + $container = $idTable[$message->getId()] = Container::with($message); + } + + // Step 1.B + $parent = null; + foreach ($message->getReferences() as $reference) { + $refContainer = $idTable[$reference] ?? null; + if ($refContainer === null) { + $refContainer = $idTable[$reference] = Container::empty(); + } + if (!$refContainer->hasParent() + && !($parent !== null && !$parent->hasAncestor($refContainer)) + && !($parent !== null && !$refContainer->hasAncestor($parent))) { + // TODO: Do not add a link if adding that link would introduce a loop: that is, before asserting A->B, search down the children of B to see if A is reachable, and also search down the children of A to see if B is reachable. If either is already reachable as a child of the other, don't add the link. + $refContainer->setParent($parent); + } + + $parent = $refContainer; + } + + // Step 1.C + //$parentId = $message->getReferences()[count($message->getReferences()) - 1] ?? null; + //$container->setParent($idTable[$parentId] ?? null); + if ($parent === null || !$parent->hasAncestor($container)) { + $container->setParent($parent); + } + } + return $idTable; + } + + /** + * @param Container[] $idTable + * + * @return Container + */ + private function buildRootContainer(array $idTable): Container { + $rootContainer = Container::empty(); + foreach ($idTable as $id => $container) { + if (!$container->hasParent()) { + $container->setParent($rootContainer); + } + } + return $rootContainer; + } + + /** + * @param Container $container + */ + private function pruneContainers(Container $root): void { + /** @var Container $container */ + foreach ($root->getChildren() as $id => $container) { + // Step 4.A + if (!$container->hasMessage() && !$container->hasChildren()) { + $container->unlink(); + continue; + } + + // Step 4.B + if (!$container->hasMessage() && $container->hasChildren()) { + if (!$container->getParent()->isRoot() && count($container->getChildren()) > 1) { + // Do not promote + continue; + } + + foreach ($container->getChildren() as $child) { + $parent = $container->getParent(); + $child->setParent($parent); + $container->unlink(); + } + } + + // Descend recursively (we don't care about the returned array here + // but only for the root set) + $this->pruneContainers($container); + } + } + + /** + * @param Container $root + */ + private function groupBySubject(Container $root): void { + // Step 5.A + /** @var Container[] $subjectTable */ + $subjectTable = []; + + // Step 5.B + foreach ($root->getChildren() as $container) { + $subject = $this->getSubTreeSubject($container); + if ($subject === '') { + continue; + } + + $existingContainer = $subjectTable[$subject] ?? null; + $existingMessage = $existingContainer !== null ? $existingContainer->getMessage() : null; + $thisMessage = $container->getMessage(); + if (!array_key_exists($subject, $subjectTable) + || (!$container->hasMessage() && $existingContainer !== null && $existingContainer->hasMessage()) + || ($existingMessage !== null && $existingMessage->hasReSubject() && $thisMessage !== null && !$thisMessage->hasReSubject())) { + $subjectTable[$subject] = $container; + } + } + + // Step 5.C + foreach ($root->getChildren() as $container) { + $subject = $this->getSubTreeSubject($container); + $subjectContainer = $subjectTable[$subject] ?? null; + if ($subjectContainer === null || $subjectContainer === $container) { + continue; + } + + if (!$container->hasMessage() && !$subjectContainer->hasMessage()) { + // Merge the current subject container into this one and replace it + foreach ($subjectContainer->getChildren() as $child) { + $child->setParent($container); + } + $subjectTable[$subject] = $container; + } elseif (!$container->hasMessage() && $subjectContainer->hasMessage()) { + $subjectContainer->setParent($container); + } elseif ($container->hasMessage() && !$subjectContainer->hasMessage()) { + $container->setParent($subjectContainer); + } elseif ($subjectContainer->hasMessage() && !$subjectContainer->getMessage()->hasReSubject() + && $container->hasMessage() && $container->getMessage()->hasReSubject()) { + $container->setParent($subjectContainer); + $subjectTable[$subject]; + } else { + $new = Container::empty(); + $container->setParent($new); + $subjectContainer->setParent($new); + $new->setParent($root); + $subjectTable[$subject] = $new; + } + } + } + + private function getSubTreeSubject(Container $container): string { + if (($message = $container->getMessage()) !== null) { + return $message->getBaseSubject(); + } + + $firstChild = $container->getChildren()[0] ?? null; + if ($firstChild === null || ($message = $firstChild->getMessage()) === null) { + // should not happen + return ''; + } + return $message->getBaseSubject(); + } +} |