#!/usr/bin/perl ################################################################################ # dmarcts-report-parser - A Perl based tool to parse DMARC reports from an IMAP # mailbox or from the filesystem, and insert the information into a database. # ( Formerly known as imap-dmarcts ) # # Copyright (C) 2016 TechSneeze.com and John Bieling # # Available at: # https://github.com/techsneeze/dmarcts-report-parser # # This program is free software: you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along with # this program. If not, see . ################################################################################ ################################################################################ # The subroutines storeXMLInDatabase() and getXMLFromMessage() are based on # John R. Levine's rddmarc (http://www.taugh.com/rddmarc/). The following # special conditions apply to those subroutines: # # Copyright 2012, Taughannock Networks. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # Redistributions in binary form must reproduce the above copyright notice, this # list of conditions and the following disclaimer in the documentation and/or # other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ################################################################################ # Always be safe use strict; use warnings; # Use these modules use Getopt::Long; use IO::Compress::Gzip qw(gzip $GzipError); #use Data::Dumper; use Mail::IMAPClient; use Mail::Mbox::MessageParser; use MIME::Base64 qw(encode_base64); use MIME::Words qw(decode_mimewords); use MIME::Parser; use MIME::Parser::Filer; use XML::Simple; use DBI; use Socket; use Socket6; use PerlIO::gzip; use File::Basename (); use File::MimeInfo; use IO::Socket::SSL; #use IO::Socket::SSL 'debug3'; ################################################################################ ### usage ###################################################################### ################################################################################ sub show_usage { print "\n"; print " Usage: \n"; print " ./dmarcts-report-parser.pl [OPTIONS] [PATH] \n"; print "\n"; print " This script needs a configuration file called in \n"; print " the current working directory, which defines a database server with credentials \n"; print " and (if used) an IMAP server with credentials. \n"; print "\n"; print " Additionaly, one of the following source options must be provided: \n"; print " -i : Read reports from messages on IMAP server as defined in the \n"; print " config file. \n"; print " -m : Read reports from mbox file(s) provided in PATH. \n"; print " -e : Read reports from MIME email file(s) provided in PATH. \n"; print " -x : Read reports from xml file(s) provided in PATH. \n"; print " -z : Read reports from zip file(s) provided in PATH. \n"; print "\n"; print " The following optional options are allowed: \n"; print " -d : Print debug info. \n"; print " -r : Replace existing reports rather than skipping them. \n"; print " --delete : Delete processed message files (the XML is stored in the \n"; print " database for later reference). \n"; print " --info : Print out number of XML files or emails processed. \n"; print "\n"; } ################################################################################ ### main ####################################################################### ################################################################################ # Define all possible configuration options. our ($debug, $delete_reports, $delete_failed, $reports_replace, $maxsize_xml, $compress_xml, $dbtype, $dbname, $dbuser, $dbpass, $dbhost, $dbport, $db_tx_support, $imapserver, $imapport, $imapuser, $imappass, $imapignoreerror, $imapssl, $imaptls, $imapmovefolder, $imapmovefoldererr, $imapreadfolder, $imapopt, $tlsverify, $processInfo); # defaults $maxsize_xml = 50000; $dbtype = 'mysql'; $db_tx_support = 1; # used in messages my $scriptname = 'dmarcts-report-parser.pl'; # allowed values for the DB columns, also used to build the enum() in the # CREATE TABLE statements in checkDatabase(), in order defined here use constant ALLOWED_DISPOSITION => qw( none quarantine reject unknown ); use constant ALLOWED_DKIM_ALIGN => qw( fail pass unknown ); use constant ALLOWED_SPF_ALIGN => qw( fail pass unknown ); use constant ALLOWED_DKIMRESULT => qw( none pass fail neutral policy temperror permerror unknown ); use constant ALLOWED_SPFRESULT => qw( none neutral pass fail softfail temperror permerror unknown ); # Load script configuration options from local config file. The file is expected # to be in the current working directory. my $conf_file = 'dmarcts-report-parser.conf'; # Get command line options. my %options = (); use constant { TS_IMAP => 0, TS_MESSAGE_FILE => 1, TS_XML_FILE => 2, TS_MBOX_FILE => 3, TS_ZIP_FILE => 4 }; GetOptions( \%options, 'd', 'r', 'x', 'm', 'e', 'i', 'z', 'delete', 'info', 'c' => \$conf_file ); # locate conf file or die if ( -e $conf_file ) { #$conf_file = "./$conf_file"; } elsif( -e (File::Basename::dirname($0) . "/$conf_file" ) ) { $conf_file = ( File::Basename::dirname($0) . "/$conf_file" ); } else { show_usage(); die "$scriptname: Could not read config file '$conf_file' from current working directory or path (" . File::Basename::dirname($0) . ')' } # load conf file with error handling if ( substr($conf_file, 0, 1) ne '/' and substr($conf_file, 0, 1) ne '.') { $conf_file = "./$conf_file"; } my $conf_return = do $conf_file; die "$scriptname: couldn't parse $conf_file: $@" if $@; die "$scriptname: couldn't do $conf_file: $!" unless defined $conf_return; # check config if (!defined $imapreadfolder ) { die "$scriptname: \$imapreadfolder not defined. Check config file"; } if (!defined $imapignoreerror ) { $imapignoreerror = 0; # maintain compatibility to old version } # Evaluate command line options my $source_options = 0; our $reports_source; if (exists $options{m}) { $source_options++; $reports_source = TS_MBOX_FILE; } if (exists $options{x}) { $source_options++; $reports_source = TS_XML_FILE; } if (exists $options{e}) { $source_options++; $reports_source = TS_MESSAGE_FILE; } if (exists $options{i}) { $source_options++; $reports_source = TS_IMAP; } if (exists $options{z}) { $source_options++; $reports_source = TS_ZIP_FILE; } if (exists $options{c}) { $source_options++; } if ($source_options > 1) { show_usage(); die "$scriptname: Only one source option can be used (-i, -x, -m, -e or -z).\n"; } elsif ($source_options == 0) { show_usage(); die "$scriptname: Please provide a source option (-i, -x, -m, -e or -z).\n"; } if ($ARGV[0]) { if ($reports_source == TS_IMAP) { show_usage(); die "$scriptname: The IMAP source option (-i) may not be used together with a PATH.\n"; } } else { if ($reports_source != TS_IMAP && $source_options == 1) { show_usage(); die "$scriptname: The provided source option requires a PATH.\n"; } } # Override config options by command line options. if (exists $options{r}) {$reports_replace = 1;} if (exists $options{d}) {$debug = 1;} if (exists $options{delete}) {$delete_reports = 1;} if (exists $options{info}) {$processInfo = 1;} # Setup connection to database server. our %dbx; my $dbx_file = File::Basename::dirname($0) . "/dbx_$dbtype.pl"; my $dbx_return = do $dbx_file; die "$scriptname: couldn't load DB definition for type $dbtype: $@" if $@; die "$scriptname: couldn't load DB definition for type $dbtype: $!" unless defined $dbx_return; my $dbh = DBI->connect("DBI:$dbtype:database=$dbname;host=$dbhost;port=$dbport", $dbuser, $dbpass) or die "$scriptname: Cannot connect to database\n"; if ($db_tx_support) { $dbh->{AutoCommit} = 0; } checkDatabase($dbh); # Process messages based on $reports_source. if ($reports_source == TS_IMAP) { my $socketargs = ''; my $processedReport = 0; # Disable verify mode for TLS support. if ($imaptls == 1) { if ( $tlsverify == 0 ) { print "use tls without verify servercert.\n" if $debug; $imapopt = [ SSL_verify_mode => SSL_VERIFY_NONE ]; } else { print "use tls with verify servercert.\n" if $debug; $imapopt = [ SSL_verify_mode => SSL_VERIFY_PEER ]; } # The whole point of setting this socket arg is so that we don't get the nasty warning } else { print "using ssl without verify servercert.\n" if $debug; $socketargs = [ SSL_verify_mode => SSL_VERIFY_NONE ]; } print "connection to $imapserver with Ssl => $imapssl, User => $imapuser, Ignoresizeerrors => $imapignoreerror\n" if $debug; # Setup connection to IMAP server. my $imap = Mail::IMAPClient->new( Server => $imapserver, Port => $imapport, Ssl => $imapssl, Starttls => $imapopt, Debug => $debug, Socketargs => $socketargs ) # module uses eval, so we use $@ instead of $! or die "$scriptname: IMAP Failure: $@"; # This connection is finished this way because of the tradgedy of exchange... $imap->User($imapuser); $imap->Password($imappass); $imap->connect(); # Ignore Size Errors if we're using Exchange $imap->Ignoresizeerrors($imapignoreerror); # Set $imap to UID mode, which will force imap functions to use/return # UIDs, instead of message sequence numbers. UIDs are not allowed to # change during a session and are not allowed to be used twice. Looping # over message sequence numbers and deleting a msg in between could have # unwanted side effects. $imap->Uid(1); # How many msgs are we going to process? print "Processing ". $imap->message_count($imapreadfolder)." messages in folder <$imapreadfolder>.\n" if $debug; # Only select and search $imapreadfolder, if we actually # have something to do. if ($imap->message_count($imapreadfolder)) { # Select the mailbox to get messages from. $imap->select($imapreadfolder) or die "$scriptname: IMAP Select Error: $!"; # Store each message as an array element. my @msgs = $imap->search('ALL') or die "$scriptname: Couldn't get all messages\n"; # Loop through IMAP messages. foreach my $msg (@msgs) { my $processResult = processXML(TS_MESSAGE_FILE, $imap->message_string($msg), "IMAP message with UID #".$msg); $processedReport++; if ($processResult & 4) { # processXML returned a value with database error bit enabled, do nothing at all! if ($imapmovefoldererr) { # if we can, move to error folder moveToImapFolder($imap, $msg, $imapmovefoldererr); } else { # do nothing at all next; } } elsif ($processResult & 2) { # processXML return a value with delete bit enabled. $imap->delete_message($msg) or warn "$scriptname: Could not delete IMAP message. [$@]\n"; } elsif ($imapmovefolder) { if ($processResult & 1 || !$imapmovefoldererr) { # processXML processed the XML OK, or it failed and there is no error imap folder moveToImapFolder($imap, $msg, $imapmovefolder); } elsif ($imapmovefoldererr) { # processXML failed and error folder set moveToImapFolder($imap, $msg, $imapmovefoldererr); } } elsif ($imapmovefoldererr && !($processResult & 1)) { # processXML failed, error imap folder set, but imapmovefolder unset. An unlikely setup, but still... moveToImapFolder($imap, $msg, $imapmovefoldererr); } } # Expunge and close the folder. $imap->expunge($imapreadfolder); $imap->close($imapreadfolder); } # We're all done with IMAP here. $imap->logout(); if ( $debug || $processInfo ) { print "$scriptname: Processed $processedReport emails.\n"; } } else { # TS_MESSAGE_FILE or TS_XML_FILE or TS_MBOX_FILE my $counts = 0; foreach my $a (@ARGV) { # Linux bash supports wildcard expansion BEFORE the script is # called, so here we only see a list of files. Other OS behave # different, so we should not depend on that feature: Use glob # on each argument to manually expand the argument, if possible. my @file_list = glob($a); foreach my $f (@file_list) { my $filecontent; if ($reports_source == TS_MBOX_FILE) { my $parser = Mail::Mbox::MessageParser->new({"file_name" => $f, "debug" => $debug, "enable_cache" => 0}); my $num = 0; do { $num++; $filecontent = $parser->read_next_email(); if (defined($filecontent)) { if (processXML(TS_MESSAGE_FILE, $filecontent, "message #$num of mbox file <$f>") & 2) { # processXML return a value with delete bit enabled warn "$scriptname: Removing message #$num from mbox file <$f> is not yet supported.\n"; } $counts++; } } while(defined($filecontent)); } elsif ($reports_source == TS_ZIP_FILE) { # filecontent is zip file $filecontent = getXMLFromZip($f); if (processXML(TS_ZIP_FILE, $filecontent, "xml file <$f>") & 2) { # processXML return a value with delete bit enabled unlink($f); } $counts++; } elsif (open(FILE, "<", $f)) { $filecontent = join("", ); close FILE; if ($reports_source == TS_MESSAGE_FILE) { # filecontent is a mime message with zip or xml part if (processXML(TS_MESSAGE_FILE, $filecontent, "message file <$f>") & 2) { # processXML return a value with delete bit enabled unlink($f); } $counts++; } elsif ($reports_source == TS_XML_FILE) { # filecontent is xml file if (processXML(TS_XML_FILE, $filecontent, "xml file <$f>") & 2) { # processXML return a value with delete bit enabled unlink($f); } $counts++; } else { warn "$scriptname: Unknown reports_source <$reports_source> for file <$f>. Skipped.\n"; } } else { warn "$scriptname: Could not open file <$f>: $!. Skipped.\n"; # Could not retrieve filecontent, the skipped message # will be processed every time the script is run even if # delete_reports and delete_failed is given. The user # has to look at the actual file. } } } if ($debug || $processInfo) { print "$scriptname: Processed $counts messages(s).\n"; } } ################################################################################ ### subroutines ################################################################ ################################################################################ sub moveToImapFolder { my $imap = $_[0]; my $msg = $_[1]; my $imapfolder = $_[2]; print "Moving (copy and delete) IMAP message file to IMAP folder: $imapfolder\n" if $debug; # Try to create $imapfolder, if it does not exist. if (!$imap->exists($imapfolder)) { $imap->create($imapfolder) or warn "$scriptname: Could not create IMAP folder: $imapfolder.\n"; } # Try to move the message to $imapfolder. my $newid = $imap->copy($imapfolder, [ $msg ]); if (!$newid) { warn "$scriptname: Error on moving (copy and delete) processed IMAP message: Could not COPY message to IMAP folder: <$imapfolder>!\n"; warn "$scriptname: Messsage will not be moved/deleted. [$@]\n"; } else { $imap->delete_message($msg) or do { warn "$scriptname: Error on moving (copy and delete) processed IMAP message: Could not DELETE message\n"; warn "$scriptname: after copying it to <$imapfolder>. [$@]\n"; } } } sub processXML { my ($type, $filecontent, $f) = (@_); if ($debug) { print "\n"; print "----------------------------------------------------------------\n"; print "Processing $f \n"; print "----------------------------------------------------------------\n"; print "Type: $type\n"; print "FileContent: $filecontent\n"; print "MSG: $f\n"; print "----------------------------------------------------------------\n"; } my $xml; #TS_XML_FILE or TS_MESSAGE_FILE if ($type == TS_MESSAGE_FILE) {$xml = getXMLFromMessage($filecontent);} elsif ($type == TS_ZIP_FILE) {$xml = $filecontent;} else {$xml = getXMLFromXMLString($filecontent);} # If !$xml, the file/mail is probably not a DMARC report. # So do not storeXMLInDatabase. if ($xml && storeXMLInDatabase($xml) <= 0) { # If storeXMLInDatabase returns false, there was some sort # of database storage failure and we MUST NOT delete the # file, because it has not been pushed into the database. # The user must investigate this issue. warn "$scriptname: Skipping $f due to database errors.\n"; return 5; #xml ok(1), but database error(4), thus no delete (!2) } # Delete processed message, if the --delete option # is given. Failed reports are only deleted, if delete_failed is given. if ($delete_reports && ($xml || $delete_failed)) { if ($xml) { print "Removing after report has been processed.\n" if $debug; return 3; #xml ok (1), delete file (2) } else { # A mail which does not look like a DMARC report # has been processed and should now be deleted. # Print its content so it gets send as cron # message, so the user can still investigate. warn "$scriptname: The $f does not seem to contain a valid DMARC report. Skipped and Removed. Content:\n"; warn $filecontent."\n"; return 2; #xml not ok (!1), delete file (2) } } if ($xml) { return 1; } else { warn "$scriptname: The $f does not seem to contain a valid DMARC report. Skipped.\n"; return 0; } } ################################################################################ # Walk through a mime message and return a reference to the XML data containing # the fields of the first ZIPed XML file embedded into the message. The XML # itself is not checked to be a valid DMARC report. sub getXMLFromMessage { my ($message) = (@_); # fixup type in trustwave SEG mails $message =~ s/ContentType:/Content-Type:/; my $parser = new MIME::Parser; $parser->output_dir("/tmp"); $parser->filer->ignore_filename(1); my $ent = $parser->parse_data($message); my $body = $ent->bodyhandle; my $mtype = $ent->mime_type; my $subj = decode_mimewords($ent->get('subject')); chomp($subj); # Subject always contains a \n. if ($debug) { print "Subject: $subj\n"; print "MimeType: $mtype\n"; } my $location; my $isgzip = 0; if(lc $mtype eq "application/zip") { if ($debug) { print "This is a ZIP file \n"; } $location = $body->path; } elsif (lc $mtype eq "application/gzip" or lc $mtype eq "application/x-gzip") { if ($debug) { print "This is a GZIP file \n"; } $location = $body->path; $isgzip = 1; } elsif (lc $mtype =~ "multipart/") { # At the moment, nease.net messages are multi-part, so we need # to breakdown the attachments and find the zip. if ($debug) { print "This is a multipart attachment \n"; } #print Dumper($ent->parts); my $num_parts = $ent->parts; for (my $i=0; $i < $num_parts; $i++) { my $part = $ent->parts($i); # Find a zip file to work on... if(lc $part->mime_type eq "application/gzip" or lc $part->mime_type eq "application/x-gzip") { $location = $ent->parts($i)->{ME_Bodyhandle}->{MB_Path}; $isgzip = 1; print "$location\n" if $debug; last; # of parts } elsif(lc $part->mime_type eq "application/x-zip-compressed" or $part->mime_type eq "application/zip") { $location = $ent->parts($i)->{ME_Bodyhandle}->{MB_Path}; print "$location\n" if $debug; } elsif(lc $part->mime_type eq "application/octet-stream") { $location = $ent->parts($i)->{ME_Bodyhandle}->{MB_Path}; $isgzip = 1 if $location =~ /\.gz$/; print "$location\n" if $debug; } else { # Skip the attachment otherwise. if ($debug) { print "Skipped an unknown attachment (".lc $part->mime_type.")\n"; } next; # of parts } } } else { ## Clean up dangling mime parts in /tmp of messages without ZIP. my $num_parts = $ent->parts; for (my $i=0; $i < $num_parts; $i++) { if ($debug) { if ($ent->parts($i)->{ME_Bodyhandle} && $ent->parts($i)->{ME_Bodyhandle}->{MB_Path}) { print $ent->parts($i)->{ME_Bodyhandle}->{MB_Path}; } else { print "undef"; } print "\n"; } if($ent->parts($i)->{ME_Bodyhandle}) {$ent->parts($i)->{ME_Bodyhandle}->purge;} } } # If a ZIP has been found, extract XML and parse it. my $xml; if(defined($location)) { if ($debug) { print "body is in " . $location . "\n"; } # Open the zip file and process the XML contained inside. my $unzip = ""; if($isgzip) { open(XML, "<:gzip", $location) or $unzip = "ungzip"; } else { open(XML, "-|", "unzip", "-p", $location) or $unzip = "unzip"; # Will never happen. # Sadly unzip -p never failes, but we can check if the # filehandle points to an empty file and pretend it did # not open/failed. if (eof XML) { $unzip = "unzip"; } } # Read XML if possible (if open) if ($unzip eq "") { $xml = getXMLFromXMLString(join("", )); if (!$xml) { warn "$scriptname: Subject: $subj\n:"; warn "$scriptname: The XML found in ZIP file (temp. location: <$location>) does not seem to be valid XML! \n"; } close XML; } else { warn "$scriptname: Subject: $subj\n:"; warn "$scriptname: Failed to $unzip ZIP file (temp. location: <$location>)! \n"; close XML; } } else { warn "$scriptname: Subject: $subj\n:"; warn "$scriptname: Could not find an embedded ZIP! \n"; } if($body) {$body->purge;} if($ent) {$ent->purge;} return $xml; } ################################################################################ sub getXMLFromZip { my $filename = $_[0]; my $mtype = mimetype($filename); if ($debug) { print "Filename: $filename, MimeType: $mtype\n"; } my $isgzip = 0; if(lc $mtype eq "application/zip") { if ($debug) { print "This is a ZIP file \n"; } } elsif (lc $mtype eq "application/gzip" or lc $mtype eq "application/x-gzip") { if ($debug) { print "This is a GZIP file \n"; } $isgzip = 1; } else { if ($debug) { print "This is not an archive file \n"; } } # If a ZIP has been found, extract XML and parse it. my $xml; if(defined($filename)) { # Open the zip file and process the XML contained inside. my $unzip = ""; if($isgzip) { open(XML, "<:gzip", $filename) or $unzip = "ungzip"; } else { open(XML, "-|", "unzip", "-p", $filename) or $unzip = "unzip"; # Will never happen. # Sadly unzip -p never failes, but we can check if the # filehandle points to an empty file and pretend it did # not open/failed. if (eof XML) { $unzip = "unzip"; } } # Read XML if possible (if open) if ($unzip eq "") { $xml = getXMLFromXMLString(join("", )); if (!$xml) { warn "$scriptname: The XML found in ZIP file (<$filename>) does not seem to be valid XML! \n"; } close XML; } else { warn "$scriptname: Failed to $unzip ZIP file (<$filename>)! \n"; close XML; } } else { warn "$scriptname: Could not find an <$filename>! \n"; } return $xml; } ################################################################################ sub getXMLFromXMLString { my $raw_xml = $_[0]; eval { my $xs = XML::Simple->new(); my $ref = $xs->XMLin($raw_xml, SuppressEmpty => ''); $ref->{'raw_xml'} = $raw_xml; return $ref; } or do { return undef; } } ################################################################################ # Extract fields from the XML report data hash and store them into the database. # return 1 when ok, 0, for serious error and -1 for minor errors sub storeXMLInDatabase { my $xml = $_[0]; # $xml is a reference to the xml data my $from = $xml->{'report_metadata'}->{'date_range'}->{'begin'}; my $to = $xml->{'report_metadata'}->{'date_range'}->{'end'}; my $org = $xml->{'report_metadata'}->{'org_name'}; my $id = $xml->{'report_metadata'}->{'report_id'}; my $email = $xml->{'report_metadata'}->{'email'}; my $extra = $xml->{'report_metadata'}->{'extra_contact_info'}; my $domain = undef; my $policy_adkim = undef; my $policy_aspf = undef; my $policy_p = undef; my $policy_sp = undef; my $policy_pct = undef; if (ref $xml->{'policy_published'} eq "HASH") { $domain = $xml->{'policy_published'}->{'domain'}; $policy_adkim = $xml->{'policy_published'}->{'adkim'}; $policy_aspf = $xml->{'policy_published'}->{'aspf'}; $policy_p = $xml->{'policy_published'}->{'p'}; $policy_sp = $xml->{'policy_published'}->{'sp'}; $policy_pct = $xml->{'policy_published'}->{'pct'}; } else { $domain = $xml->{'policy_published'}[0]->{'domain'}; $policy_adkim = $xml->{'policy_published'}[0]->{'adkim'}; $policy_aspf = $xml->{'policy_published'}[0]->{'aspf'}; $policy_p = $xml->{'policy_published'}[0]->{'p'}; $policy_sp = $xml->{'policy_published'}[0]->{'sp'}; $policy_pct = $xml->{'policy_published'}[0]->{'pct'}; } my $record = $xml->{'record'}; if ( ! defined($record) ) { warn "$scriptname: $org: $id: No records in report. Skipped.\n"; return 0; } # see if already stored my $sth = $dbh->prepare(qq{SELECT org, serial FROM report WHERE reportid=?}); $sth->execute($id); while ( my ($xorg,$sid) = $sth->fetchrow_array() ) { if ($reports_replace) { # $sid is the serial of a report with reportid=$id # Remove this $sid from rptrecord and report table, but # try to continue on failure rather than skipping. print "$scriptname: $org: $id: Replacing data.\n"; $dbh->do(qq{DELETE from rptrecord WHERE serial=?}, undef, $sid); if ($dbh->errstr) { warn "$scriptname: $org: $id: Cannot remove report data from database. Try to continue.\n"; } $dbh->do(qq{DELETE from report WHERE serial=?}, undef, $sid); if ($dbh->errstr) { warn "$scriptname: $org: $id: Cannot remove report from database. Try to continue.\n"; } } else { print "$scriptname: $org: $id: Already have report, skipped\n"; # Do not store in DB, but return true, so the message can # be moved out of the way, if configured to do so. return 1; } } my $sql = qq{INSERT INTO report(mindate,maxdate,domain,org,reportid,email,extra_contact_info,policy_adkim, policy_aspf, policy_p, policy_sp, policy_pct, raw_xml) VALUES($dbx{epoch_to_timestamp_fn}(?),$dbx{epoch_to_timestamp_fn}(?),?,?,?,?,?,?,?,?,?,?,?)}; my $storexml = $xml->{'raw_xml'}; if ($compress_xml) { my $gzipdata; if(!gzip(\$storexml => \$gzipdata)) { warn "$scriptname: $org: $id: Cannot add gzip XML to database ($GzipError). Skipped.\n"; rollback($dbh); return 0; $storexml = ""; } else { $storexml = encode_base64($gzipdata, ""); } } if (length($storexml) > $maxsize_xml) { warn "$scriptname: $org: $id: Skipping storage of large XML (".length($storexml)." bytes) as defined in config file.\n"; $storexml = ""; } $dbh->do($sql, undef, $from, $to, $domain, $org, $id, $email, $extra, $policy_adkim, $policy_aspf, $policy_p, $policy_sp, $policy_pct, $storexml); if ($dbh->errstr) { warn "$scriptname: $org: $id: Cannot add report to database. Skipped.\n"; rollback($dbh); return 0; } my $serial = $dbh->last_insert_id(undef, undef, 'report', undef); if ($debug){ print " serial $serial \n"; } sub dorow($$$$) { my ($serial,$recp,$org,$id) = @_; my %r = %$recp; my $ip = $r{'row'}->{'source_ip'}; if ( $ip eq '' ) { warn "$scriptname: $org: $id: source_ip is empty. Skipped.\n"; rollback($dbh); return 0; } my $count = $r{'row'}->{'count'}; my $disp = $r{'row'}->{'policy_evaluated'}->{'disposition'}; if ( ! grep { $_ eq $disp } ALLOWED_DISPOSITION ) { $disp = 'unknown'; }; # some reports don't have dkim/spf, "unknown" is default for these my $dkim_align = $r{'row'}->{'policy_evaluated'}->{'dkim'}; if ( ! grep { $_ eq $dkim_align } ALLOWED_DKIM_ALIGN ) { $dkim_align = 'unknown'; }; my $spf_align = $r{'row'}->{'policy_evaluated'}->{'spf'}; if ( ! grep { $_ eq $spf_align } ALLOWED_SPF_ALIGN ) { $spf_align = 'unknown'; }; my $identifier_hfrom = $r{'identifiers'}->{'header_from'}; my ($dkim, $dkimresult, $spf, $spfresult, $reason); if(ref $r{'auth_results'} ne "HASH"){ warn "$scriptname: $org: $id: Report has no auth_results data. Skipped.\n"; rollback($dbh); return 0; } my $rp = $r{'auth_results'}->{'dkim'}; if(ref $rp eq "HASH") { $dkim = $rp->{'domain'}; $dkim = undef if ref $dkim eq "HASH"; $dkimresult = $rp->{'result'}; } else { # array, i.e. multiple dkim results (usually from multiple domains) # glom sigs together $dkim = join '/',map { my $d = $_->{'domain'}; ref $d eq "HASH"?"": $d } @$rp; # report results my $rp_len = scalar(@$rp); for ( my $i=0; $i < $rp_len; $i++ ) { if ( $rp->[$i]->{'result'} eq "pass" ) { # If any one dkim result is a "pass", this should yield an overall "pass" and immediately exit the for loop, ignoring any remaing results # See # RFC 6376, DomainKeys Identified Mail (DKIM) Signatures # Section 4.2: https://tools.ietf.org/html/rfc6376#section-4.2 and # Section 6.1: https://tools.ietf.org/html/rfc6376#section-6.1 # And the GitHub issues at # https://github.com/techsneeze/dmarcts-report-viewer/issues/47 # https://github.com/techsneeze/dmarcts-report-parser/pull/78 $dkimresult = "pass"; last; } else { for ( my $j=$i+1; $j < $rp_len; $j++ ) { if ( $rp->[$i]->{'result'} eq $rp->[$j]->{'result'} ) { # Compare each dkim result to the next one to see if all of the dkim results are the same. # If all of the dkim results are the same, that will be the overall result. # If any of them are different, and don't contain a "pass" result, then $dkimresult will be empty $dkimresult = $rp->[0]->{'result'}; } else { $dkimresult = 'unknown'; } } } } } if ( ! defined($dkimresult) || ! grep { $_ eq $dkimresult } ALLOWED_DKIMRESULT ) { $dkimresult = 'unknown'; }; $rp = $r{'auth_results'}->{'spf'}; if(ref $rp eq "HASH") { $spf = $rp->{'domain'}; $spf = undef if ref $spf eq "HASH"; $spfresult = $rp->{'result'}; } else { # array, i.e. multiple dkim results (usually from multiple domains) # glom sigs together $spf = join '/',map { my $d = $_->{'domain'}; ref $d eq "HASH"?"": $d } @$rp; # report results my $rp_len = scalar(@$rp); for ( my $i=0; $i < $rp_len; $i++ ) { if ( $rp->[$i]->{'result'} eq "pass" ) { # If any one spf result is a "pass", this should yield an overall "pass" and immediately exit the for loop, ignoring any remaing results $spfresult = "pass"; last; } else { for ( my $j=$i+1; $j < $rp_len; $j++ ) { if ( $rp->[$i]->{'result'} eq $rp->[$j]->{'result'} ) { # Compare each spf result to the next one to see if all of the spf results are the same. # If all of the spf results are the same, that will be the overall result. # If any of them are different, and don't contain a "pass" result, then $spfresult will be empty $spfresult = $rp->[0]->{'result'}; } else { $spfresult = 'unknown'; } } } } } if ( ! defined($spfresult) || ! grep { $_ eq $spfresult } ALLOWED_SPFRESULT ) { $spfresult = 'unknown'; }; $rp = $r{'row'}->{'policy_evaluated'}->{'reason'}; if(ref $rp eq "HASH") { $reason = $rp->{'type'}; } else { $reason = join '/',map { $_->{'type'} } @$rp; } #print "ip=$ip, count=$count, disp=$disp, r=$reason,"; #print "dkim=$dkim/$dkimresult, spf=$spf/$spfresult\n"; # What type of IP address? my ($nip, $iptype, $ipval); if ($debug) { print "ip=$ip\n"; } if($nip = inet_pton(AF_INET, $ip)) { $ipval = unpack "N", $nip; $iptype = "ip"; } elsif($nip = inet_pton(AF_INET6, $ip)) { $ipval = $dbx{to_hex_string}($nip); $iptype = "ip6"; } else { warn "$scriptname: $org: $id: ??? mystery ip $ip\n"; rollback($dbh); return 0; } $dbh->do(qq{INSERT INTO rptrecord(serial,$iptype,rcount,disposition,spf_align,dkim_align,reason,dkimdomain,dkimresult,spfdomain,spfresult,identifier_hfrom) VALUES(?,$ipval,?,?,?,?,?,?,?,?,?,?)},undef,$serial,$count,$disp,$spf_align,$dkim_align,$reason,$dkim,$dkimresult,$spf,$spfresult,$identifier_hfrom); if ($dbh->errstr) { warn "$scriptname: $org: $id: Cannot add report data to database. Skipped.\n"; rollback($dbh); return 0; } return 1; } my $res = 1; if(ref $record eq "HASH") { if ($debug){ print "single record\n"; } $res = -1 if !dorow($serial,$record,$org,$id); } elsif(ref $record eq "ARRAY") { if ($debug){ print "multi record\n"; } foreach my $row (@$record) { $res = -1 if !dorow($serial,$row,$org,$id); } } else { warn "$scriptname: $org: $id: mystery type " . ref($record) . "\n"; } if ($debug && $res <= 0) { print "Result $res XML: $xml->{raw_xml}\n"; } if ($res <= 0) { if ($db_tx_support) { warn "$scriptname: $org: $id: Cannot add records to rptrecord. Rolling back DB transaction.\n"; rollback($dbh); } else { warn "$scriptname: $org: $id: errors while adding to rptrecord, serial $serial records likely obsolete.\n"; } } else { if ($db_tx_support) { $dbh->commit; if ($dbh->errstr) { warn "$scriptname: $org: $id: Cannot commit transaction.\n"; } } } return $res; } ################################################################################ # Tries to roll back the transaction (if enabled). # If an error happens, warn the user, but continue execution. sub rollback { my $dbh = $_[0]; if ($db_tx_support) { $dbh->rollback; if ($dbh->errstr) { warn "$scriptname: Cannot rollback transaction.\n"; } } } ################################################################################ # Check, if the database contains needed tables and columns. The idea is, that # the user only has to create the database/database_user. All needed tables and # columns are created automatically. Furthermore, if new columns are introduced, # the user does not need to make any changes to the database himself. sub checkDatabase { my $dbh = $_[0]; my $tables = $dbx{tables}; # Create missing tables and missing columns. for my $table ( keys %{$tables} ) { if (!db_tbl_exists($dbh, $table)) { # Table does not exist, build CREATE TABLE cmd from tables hash. print "$scriptname: Adding missing table <" . $table . "> to the database.\n"; my $sql_create_table = "CREATE TABLE " . $table . " (\n"; for (my $i=0; $i <= $#{$tables->{$table}{"column_definitions"}}; $i+=3) { my $col_name = $tables->{$table}{"column_definitions"}[$i]; my $col_type = $tables->{$table}{"column_definitions"}[$i+1]; my $col_opts = $tables->{$table}{"column_definitions"}[$i+2]; # add comma if second or later entry if ($i != 0) { $sql_create_table .= ",\n"; } $sql_create_table .= "$col_name $col_type $col_opts"; } # Add additional_definitions, if defined. if ($tables->{$table}{"additional_definitions"} ne "") { $sql_create_table .= ",\n" . $tables->{$table}{"additional_definitions"}; } # Add options. $sql_create_table .= ") " . $tables->{$table}{"table_options"} . ";"; # Create table. print "$sql_create_table\n" if $debug; $dbh->do($sql_create_table); # Create indexes. foreach my $sql_idx (@{$tables->{$table}{indexes}}) { print "$sql_idx\n" if $debug; $dbh->do($sql_idx); } } else { #Table exists, get current columns in this table from DB. my %db_col_exists = db_column_info($dbh, $table); # Check if all needed columns are present, if not add them at the desired position. my $insert_pos; for (my $i=0; $i <= $#{$tables->{$table}{"column_definitions"}}; $i+=3) { my $col_name = $tables->{$table}{"column_definitions"}[$i]; my $col_type = $tables->{$table}{"column_definitions"}[$i+1]; my $col_opts = $tables->{$table}{"column_definitions"}[$i+2]; if (!$db_col_exists{$col_name}) { # add column my $sql_add_column = $dbx{add_column}($table, $col_name, $col_type, $col_opts, $insert_pos); print "$sql_add_column\n" if $debug; $dbh->do($sql_add_column); } elsif ($db_col_exists{$col_name} !~ /^\Q$col_type\E/) { # modify column my $sql_modify_column = $dbx{modify_column}($table, $col_name, $col_type, $col_opts); print "$sql_modify_column\n" if $debug; $dbh->do($sql_modify_column); } $insert_pos = $col_name; } } } $dbh->commit; } ################################################################################ # Checks if the table exists in the database sub db_tbl_exists { my ($dbh, $table) = @_; my @res = $dbh->tables(undef, undef, $table, undef); return scalar @res > 0; } ################################################################################ # Gets columns and their data types in a given table sub db_column_info { my ($dbh, $table) = @_; my $db_info = $dbh->column_info(undef, undef, $table, undef)->fetchall_hashref('COLUMN_NAME'); my %columns; foreach my $column (keys(%$db_info)) { $columns{$column} = $db_info->{$column}{$dbx{column_info_type_col}}; } return %columns; }