diff options
author | Christoph Wurst <christoph@winzerhof-wurst.at> | 2019-10-10 14:36:35 +0300 |
---|---|---|
committer | Christoph Wurst <christoph@winzerhof-wurst.at> | 2020-01-08 16:01:06 +0300 |
commit | 150716df3480175ac223f59f9349eba8cedf6524 (patch) | |
tree | 4151035d6b00aba594799fbb4c1a4b5c935c206d /lib | |
parent | b57fa26fc1401cf3afc4289e35c6ed5d0cc863b4 (diff) |
Use KItinerary to extract information from emails and attachments
Signed-off-by: Christoph Wurst <christoph@winzerhof-wurst.at>
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/Controller/MessagesController.php | 13 | ||||
-rw-r--r-- | lib/IMAP/MessageMapper.php | 132 | ||||
-rw-r--r-- | lib/Integration/KItinerary/ItineraryExtractor.php | 88 | ||||
-rw-r--r-- | lib/Integration/Psr/LoggerAdapter.php | 158 | ||||
-rw-r--r-- | lib/Model/IMAPMessage.php | 3 | ||||
-rw-r--r-- | lib/Service/ItineraryService.php | 106 |
6 files changed, 496 insertions, 4 deletions
diff --git a/lib/Controller/MessagesController.php b/lib/Controller/MessagesController.php index d4ca2c4a6..22b5d7b03 100755 --- a/lib/Controller/MessagesController.php +++ b/lib/Controller/MessagesController.php @@ -31,7 +31,6 @@ declare(strict_types=1); namespace OCA\Mail\Controller; use Exception; -use OCA\Mail\Account; use OCA\Mail\Contracts\IMailManager; use OCA\Mail\Contracts\IMailSearch; use OCA\Mail\Exception\ServiceException; @@ -40,6 +39,7 @@ use OCA\Mail\Http\HtmlResponse; use OCA\Mail\Model\IMAPMessage; use OCA\Mail\Service\AccountService; use OCA\Mail\Service\IMailBox; +use OCA\Mail\Service\ItineraryService; use OCP\AppFramework\Controller; use OCP\AppFramework\Db\DoesNotExistException; use OCP\AppFramework\Http; @@ -47,7 +47,6 @@ use OCP\AppFramework\Http\ContentSecurityPolicy; use OCP\AppFramework\Http\JSONResponse; use OCP\AppFramework\Http\Response; use OCP\AppFramework\Http\TemplateResponse; -use OCP\AppFramework\Utility\ITimeFactory; use OCP\Files\Folder; use OCP\Files\IMimeTypeDetector; use OCP\IL10N; @@ -67,6 +66,9 @@ class MessagesController extends Controller { /** @var IMailSearch */ private $mailSearch; + /** @var ItineraryService */ + private $itineraryService; + /** @var string */ private $currentUserId; @@ -101,6 +103,7 @@ class MessagesController extends Controller { AccountService $accountService, IMailManager $mailManager, IMailSearch $mailSearch, + ItineraryService $itineraryService, string $UserId, $userFolder, ILogger $logger, @@ -112,6 +115,7 @@ class MessagesController extends Controller { $this->accountService = $accountService; $this->mailManager = $mailManager; $this->mailSearch = $mailSearch; + $this->itineraryService = $itineraryService; $this->currentUserId = $UserId; $this->userFolder = $userFolder; $this->logger = $logger; @@ -180,6 +184,11 @@ class MessagesController extends Controller { base64_decode($folderId), $id ); + $json['itineraries'] = $this->itineraryService->extract( + $account, + base64_decode($folderId), + $id + ); $json['attachments'] = array_map(function ($a) use ($accountId, $folderId, $id) { return $this->enrichDownloadUrl($accountId, $folderId, $id, $a); }, $json['attachments']); diff --git a/lib/IMAP/MessageMapper.php b/lib/IMAP/MessageMapper.php index 93c4376a9..f42f8bc35 100644 --- a/lib/IMAP/MessageMapper.php +++ b/lib/IMAP/MessageMapper.php @@ -31,11 +31,13 @@ use Horde_Imap_Client_Fetch_Query; use Horde_Imap_Client_Ids; use Horde_Imap_Client_Socket; use Horde_Mime_Mail; +use Horde_Mime_Part; use OCA\Mail\Db\Mailbox; use OCA\Mail\Exception\ServiceException; use OCA\Mail\Model\IMAPMessage; use OCP\AppFramework\Db\DoesNotExistException; use OCP\ILogger; +use function iterator_to_array; class MessageMapper { @@ -214,4 +216,134 @@ class MessageMapper { ); } + public function getHtmlBody(Horde_Imap_Client_Socket $client, + string $mailbox, + int $id): ?string { + $messageQuery = new Horde_Imap_Client_Fetch_Query(); + $messageQuery->envelope(); + $messageQuery->structure(); + + $result = $client->fetch($mailbox, $messageQuery, [ + 'ids' => new Horde_Imap_Client_Ids([$id]), + ]); + + if (($message = $result->first()) === null) { + throw new DoesNotExistException('Message does not exist'); + } + + $structure = $message->getStructure(); + $htmlPartId = $structure->findBody('html'); + if ($htmlPartId === null) { + // No HTML part + return null; + } + $partsQuery = new Horde_Imap_Client_Fetch_Query(); + $partsQuery->fullText(); + foreach ($structure->partIterator() as $structurePart) { + /** @var Horde_Mime_Part $structurePart */ + $partsQuery->bodyPart($structurePart->getMimeId(), [ + 'decode' => true, + 'peek' => true, + ]); + $partsQuery->bodyPartSize($structurePart->getMimeId()); + if ($structurePart->getMimeId() === $htmlPartId) { + $partsQuery->mimeHeader($structurePart->getMimeId(), [ + 'peek' => true + ]); + } + + } + + $parts = $client->fetch($mailbox, $partsQuery, [ + 'ids' => new Horde_Imap_Client_Ids([$id]), + ]); + + foreach ($parts as $part) { + /** @var Horde_Imap_Client_Data_Fetch $part */ + $stream = $part->getBodyPart($htmlPartId, true); + $partData = $structure->getPart($htmlPartId); + $partData->setContents($stream, [ + 'usestream' => true, + ]); + + $body = $part->getBodyPart($htmlPartId); + if ($body !== null) { + $structurePart = $structure[$htmlPartId]; + $mimeHeaders = $part->getMimeHeader($htmlPartId, Horde_Imap_Client_Data_Fetch::HEADER_PARSE); + if ($enc = $mimeHeaders->getValue('content-transfer-encoding')) { + $structure->setTransferEncoding($enc); + } + $structure->setContents($body); + $decoded = $structure->getContents(); + + return $decoded; + } + } + + return null; + } + + public function getRawAttachments(Horde_Imap_Client_Socket $client, + string $mailbox, + int $id): array { + $messageQuery = new Horde_Imap_Client_Fetch_Query(); + $messageQuery->structure(); + + $result = $client->fetch($mailbox, $messageQuery, [ + 'ids' => new Horde_Imap_Client_Ids([$id]), + ]); + + if (($structureResult = $result->first()) === null) { + throw new DoesNotExistException('Message does not exist'); + } + + $structure = $structureResult->getStructure(); + $partsQuery = new Horde_Imap_Client_Fetch_Query(); + $partsQuery->fullText(); + foreach ($structure->partIterator() as $part) { + /** @var Horde_Mime_Part $part */ + if ($part->getMimeId() === "0") { + // Ignore message header + continue; + } + + $partsQuery->bodyPart($part->getMimeId(), [ + 'peek' => true, + ]); + $partsQuery->mimeHeader($part->getMimeId(), [ + 'peek' => true + ]); + $partsQuery->bodyPartSize($part->getMimeId()); + } + + $parts = $client->fetch($mailbox, $partsQuery, [ + 'ids' => new Horde_Imap_Client_Ids([$id]), + ]); + if (($messageData = $parts->first()) === null) { + throw new DoesNotExistException('Message does not exist'); + } + + $attachments = []; + foreach ($structure->partIterator() as $key => $part) { + /** @var Horde_Mime_Part $part */ + + if (!$part->isAttachment()) { + continue; + } + + $stream = $messageData->getBodyPart($key, true); + $mimeHeaders = $messageData->getMimeHeader($key, Horde_Imap_Client_Data_Fetch::HEADER_PARSE); + if ($enc = $mimeHeaders->getValue('content-transfer-encoding')) { + $part->setTransferEncoding($enc); + } + $part->setContents($stream, [ + 'usestream' => true, + ]); + $decoded = $part->getContents(); + + $attachments[] = $decoded; + } + return $attachments; + } + } diff --git a/lib/Integration/KItinerary/ItineraryExtractor.php b/lib/Integration/KItinerary/ItineraryExtractor.php new file mode 100644 index 000000000..9e6e6e932 --- /dev/null +++ b/lib/Integration/KItinerary/ItineraryExtractor.php @@ -0,0 +1,88 @@ +<?php declare(strict_types=1); + +/** + * @copyright 2019 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @author 2019 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +namespace OCA\Mail\Integration\KItinerary; + +use ChristophWurst\KItinerary\Adapter; +use ChristophWurst\KItinerary\Exception\KItineraryRuntimeException; +use ChristophWurst\KItinerary\Flatpak\FlatpakAdapter; +use ChristophWurst\KItinerary\Itinerary; +use ChristophWurst\KItinerary\ItineraryExtractor as Extractor; +use ChristophWurst\KItinerary\Bin\BinaryAdapter; +use OCA\Mail\Integration\Psr\LoggerAdapter; +use OCP\ILogger; + +class ItineraryExtractor { + + /** @var BinaryAdapter */ + private $binAdapter; + + /** @var FlatpakAdapter */ + private $flatpakAdapter; + + /** @var ILogger */ + private $logger; + + /** @var Adapter */ + private $adapter; + + public function __construct(BinaryAdapter $binAdapter, + FlatpakAdapter $flatpakAdapter, + ILogger $logger) { + $this->binAdapter = $binAdapter; + $this->flatpakAdapter = $flatpakAdapter; + $this->logger = $logger; + } + + private function findAvailableAdapter(): ?Adapter { + if ($this->binAdapter->isAvailable()) { + $this->binAdapter->setLogger(new LoggerAdapter($this->logger)); + return $this->binAdapter; + } + if ($this->flatpakAdapter->isAvailable()) { + return $this->flatpakAdapter; + } + return null; + } + + public function extract(string $content): Itinerary { + if ($this->adapter === null) { + $this->adapter = $this->findAvailableAdapter() ?? false; + } + if ($this->adapter === false) { + $this->logger->warning('KItinerary binary adapter is not available, can\'t extract information'); + + return new Itinerary(); + } + + try { + return (new Extractor($this->adapter))->extractFromString($content); + } catch (KItineraryRuntimeException $e) { + $this->logger->logException($e, [ + 'message' => 'Could not extract itinerary function from KItinerary integration', + ]); + return new Itinerary(); + } + } + +} diff --git a/lib/Integration/Psr/LoggerAdapter.php b/lib/Integration/Psr/LoggerAdapter.php new file mode 100644 index 000000000..34ad54558 --- /dev/null +++ b/lib/Integration/Psr/LoggerAdapter.php @@ -0,0 +1,158 @@ +<?php declare(strict_types=1); + +/** + * @copyright 2019 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @author 2019 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +namespace OCA\Mail\Integration\Psr; + +use OCP\ILogger; +use Psr\Log\LoggerInterface; + +class LoggerAdapter implements LoggerInterface { + + /** @var ILogger */ + private $logger; + + public function __construct(ILogger $logger) { + $this->logger = $logger; + } + + /** + * System is unusable. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function emergency($message, array $context = array()) { + $this->logger->emergency($message, $context); + } + + /** + * Action must be taken immediately. + * + * Example: Entire website down, database unavailable, etc. This should + * trigger the SMS alerts and wake you up. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function alert($message, array $context = array()) { + $this->logger->alert($message, $context); + } + + /** + * Critical conditions. + * + * Example: Application component unavailable, unexpected exception. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function critical($message, array $context = array()) { + $this->logger->critical($message, $context); + } + + /** + * Runtime errors that do not require immediate action but should typically + * be logged and monitored. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function error($message, array $context = array()) { + $this->logger->error($message, $context); + } + + /** + * Exceptional occurrences that are not errors. + * + * Example: Use of deprecated APIs, poor use of an API, undesirable things + * that are not necessarily wrong. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function warning($message, array $context = array()) { + $this->logger->warning($message, $context); + } + + /** + * Normal but significant events. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function notice($message, array $context = array()) { + $this->logger->notice($message, $context); + } + + /** + * Interesting events. + * + * Example: User logs in, SQL logs. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function info($message, array $context = array()) { + $this->logger->info($message, $context); + } + + /** + * Detailed debug information. + * + * @param string $message + * @param array $context + * + * @return void + */ + public function debug($message, array $context = array()) { + $this->logger->debug($message, $context); + } + + /** + * Logs with an arbitrary level. + * + * @param mixed $level + * @param string $message + * @param array $context + * + * @return void + */ + public function log($level, $message, array $context = array()) { + $this->logger->log($level, $message, $context); + } + +} diff --git a/lib/Model/IMAPMessage.php b/lib/Model/IMAPMessage.php index 74ec3aa04..c37451ba1 100644 --- a/lib/Model/IMAPMessage.php +++ b/lib/Model/IMAPMessage.php @@ -29,7 +29,6 @@ declare(strict_types=1); namespace OCA\Mail\Model; -use Closure; use Exception; use Horde_Imap_Client; use Horde_Imap_Client_Data_Envelope; @@ -48,7 +47,6 @@ use OCA\Mail\Service\Html; use OCP\AppFramework\Db\DoesNotExistException; use OCP\Files\File; use OCP\Files\SimpleFS\ISimpleFile; -use OCP\Util; use function base64_encode; use function mb_convert_encoding; @@ -413,6 +411,7 @@ class IMAPMessage implements IMessage, JsonSerializable { public function jsonSerialize(): array { return [ 'id' => $this->getUid(), + 'messageId' => $this->getMessageId(), 'from' => $this->getFrom()->jsonSerialize(), 'to' => $this->getTo()->jsonSerialize(), 'cc' => $this->getCC()->jsonSerialize(), diff --git a/lib/Service/ItineraryService.php b/lib/Service/ItineraryService.php new file mode 100644 index 000000000..a318bf4e4 --- /dev/null +++ b/lib/Service/ItineraryService.php @@ -0,0 +1,106 @@ +<?php declare(strict_types=1); + +/** + * @copyright 2019 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @author 2019 Christoph Wurst <christoph@winzerhof-wurst.at> + * + * @license GNU AGPL version 3 or any later version + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +namespace OCA\Mail\Service; + +use ChristophWurst\KItinerary\Itinerary; +use OCA\Mail\Account; +use OCA\Mail\Db\MailboxMapper; +use OCA\Mail\IMAP\IMAPClientFactory; +use OCA\Mail\IMAP\MessageMapper; +use OCA\Mail\Integration\KItinerary\ItineraryExtractor; +use OCP\ICacheFactory; +use OCP\ILogger; +use function array_reduce; +use function count; +use function json_encode; + +class ItineraryService { + + /** @var IMAPClientFactory */ + private $clientFactory; + + /** @var MailboxMapper */ + private $mailboxMapper; + + /** @var MessageMapper */ + private $messageMapper; + + /** @var ItineraryExtractor */ + private $extractor; + + /** @var ILogger */ + private $logger; + + public function __construct(IMAPClientFactory $clientFactory, + MailboxMapper $mailboxMapper, + MessageMapper $messageMapper, + ItineraryExtractor $extractor, + ICacheFactory $cacheFactory, + ILogger $logger) { + $this->clientFactory = $clientFactory; + $this->mailboxMapper = $mailboxMapper; + $this->messageMapper = $messageMapper; + $this->extractor = $extractor; + $this->cache = $cacheFactory->createLocal(); + $this->logger = $logger; + } + + public function extract(Account $account, string $mailbox, int $id): Itinerary { + $mailbox = $this->mailboxMapper->find($account, $mailbox); + + $cacheKey = 'mail_itinerary_' . $account->getId() . '_' . $mailbox->getMailbox() . '_' . $id; + if ($cached = ($this->cache->get($cacheKey))) { + return Itinerary::fromJson($cached); + } + + $client = $this->clientFactory->getClient($account); + + $itinerary = new Itinerary(); + $htmlBody = $this->messageMapper->getHtmlBody($client, $mailbox->getMailbox(), $id); + if ($htmlBody !== null) { + $itinerary = $itinerary->merge( + $this->extractor->extract($htmlBody) + ); + $this->logger->debug('Extracted ' . count($itinerary) . ' itinerary entries from the message HTML body'); + } else { + $this->logger->debug('Message does not have an HTML body, can\'t extract itinerary info'); + } + $attachments = $this->messageMapper->getRawAttachments($client, $mailbox->getMailbox(), $id); + $itinerary = array_reduce($attachments, function(Itinerary $combined, string $attachment) { + $extracted = $this->extractor->extract($attachment); + $this->logger->debug('Extracted ' . count($extracted) . ' itinerary entries from an attachment'); + return $combined->merge($extracted); + }, $itinerary); + + // Lastly, we put the extracted data through the tool again, so it can combine + // and pick the most relevant information + $final = $this->extractor->extract(json_encode($itinerary)); + $this->logger->debug('Reduced ' . count($itinerary) . ' itinerary entries to ' . count($final) . ' entries'); + + $this->cache->set($cacheKey, json_encode($final)); + + return $final; + } + +} |