diff options
author | TechSneeze <dave@techsneeze.com> | 2019-06-28 09:17:06 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-06-28 09:17:06 +0300 |
commit | 841f6903ade2653b4601b56c5a3e8b1fa120f9bb (patch) | |
tree | ead0910961eaf6dbfe37e6c2c7845086ecaf3767 | |
parent | 45c652c8a2e65ca902dc5306b53335ffeff3abe8 (diff) | |
parent | 6797fe30b3fe0b9a7d4c1485814bae116e59f007 (diff) |
Merge pull request #64 from islander/zip-parser
Add support parse zipped reports from PATH
-rw-r--r-- | README.md | 7 | ||||
-rwxr-xr-x | dmarcts-report-parser.pl | 90 |
2 files changed, 90 insertions, 7 deletions
@@ -18,19 +18,19 @@ To install dependencies... ### on Debian: ``` -apt-get install libmail-imapclient-perl libmime-tools-perl libxml-simple-perl \ +apt-get install libfile-mimeinfo-perl libmail-imapclient-perl libmime-tools-perl libxml-simple-perl \ libclass-dbi-mysql-perl libio-socket-inet6-perl libio-socket-ip-perl libperlio-gzip-perl \ libmail-mbox-messageparser-perl unzip ``` ### on Fedora (Fedora 23): ``` -sudo dnf install perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \ +sudo dnf install perl-File-MimeInfo perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \ perl-Socket6 perl-PerlIO-gzip perl-DBD-MySQL unzip ``` ### on CentOS (CentOS 7): ``` yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm -yum install perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \ +yum install perl-File-MimeInfo perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \ perl-Socket6 perl-PerlIO-gzip perl-DBD-MySQL unzip perl-Mail-Mbox-MessageParser ``` @@ -104,6 +104,7 @@ One of the following source options must be provided: # -m : Read reports from mbox file(s) provided in PATH. # -e : Read reports from MIME email file(s) provided in PATH. # -x : Read reports from xml file(s) provided in PATH. +# -z : Read reports from zip file(s) provided in PATH. ``` The following options are always allowed: diff --git a/dmarcts-report-parser.pl b/dmarcts-report-parser.pl index 7015948..818b4b3 100755 --- a/dmarcts-report-parser.pl +++ b/dmarcts-report-parser.pl @@ -72,6 +72,7 @@ use Socket; use Socket6; use PerlIO::gzip; use File::Basename (); +use File::MimeInfo; use IO::Socket::SSL; #use IO::Socket::SSL 'debug3'; @@ -153,8 +154,8 @@ if (!defined $imapignoreerror ) { # Get command line options. my %options = (); -use constant { TS_IMAP => 0, TS_MESSAGE_FILE => 1, TS_XML_FILE => 2, TS_MBOX_FILE => 3 }; -GetOptions( \%options, 'd', 'r', 'x', 'm', 'e', 'i', 'delete' ); +use constant { TS_IMAP => 0, TS_MESSAGE_FILE => 1, TS_XML_FILE => 2, TS_MBOX_FILE => 3, TS_ZIP_FILE => 4 }; +GetOptions( \%options, 'd', 'r', 'x', 'm', 'e', 'i', 'z', 'delete' ); # Evaluate command line options my $source_options = 0; @@ -180,12 +181,17 @@ if (exists $options{i}) { $reports_source = TS_IMAP; } +if (exists $options{z}) { + $source_options++; + $reports_source = TS_ZIP_FILE; +} + if ($source_options > 1) { show_usage(); - die "Only one source option can be used (-i, -x, -m or -e).\n"; + die "Only one source option can be used (-i, -x, -m, -e or -z).\n"; } elsif ($source_options == 0) { show_usage(); - die "Please provide a source option (-i, -x, -m or -e).\n"; + die "Please provide a source option (-i, -x, -m, -e or -z).\n"; } if ($ARGV[0]) { @@ -335,6 +341,14 @@ if ($reports_source == TS_IMAP) { } } while(defined($filecontent)); + } elsif ($reports_source == TS_ZIP_FILE) { + # filecontent is zip file + $filecontent = getXMLFromZip($f); + if (processXML(TS_ZIP_FILE, $filecontent, "xml file <$f>") & 2) { + # processXML return a value with delete bit enabled + unlink($f); + } + $counts++; } elsif (open FILE, $f) { $filecontent = join("", <FILE>); @@ -390,6 +404,7 @@ sub processXML { my $xml; #TS_XML_FILE or TS_MESSAGE_FILE if ($type == TS_MESSAGE_FILE) {$xml = getXMLFromMessage($filecontent);} + elsif ($type == TS_ZIP_FILE) {$xml = $filecontent;} else {$xml = getXMLFromXMLString($filecontent);} # If !$xml, the file/mail is probably not a DMARC report. @@ -568,6 +583,73 @@ sub getXMLFromMessage { return $xml; } +################################################################################ + +sub getXMLFromZip { + my $filename = $_[0]; + my $mtype = mimetype($filename); + + if (open FILE, $filename) { + if ($debug) { + print "Filename: $filename, MimeType: $mtype\n"; + } + } + + my $isgzip = 0; + + if(lc $mtype eq "application/zip") { + if ($debug) { + print "This is a ZIP file \n"; + } + } elsif (lc $mtype eq "application/gzip" or lc $mtype eq "application/x-gzip") { + if ($debug) { + print "This is a GZIP file \n"; + } + + $isgzip = 1; + } else { + if ($debug) { + print "This is not an archive file \n"; + } + } + + # If a ZIP has been found, extract XML and parse it. + my $xml; + if(defined($filename)) { + # Open the zip file and process the XML contained inside. + my $unzip = ""; + if($isgzip) { + open(XML, "<:gzip", $filename) + or $unzip = "ungzip"; + } else { + open(XML,"unzip -p " . $filename . " |") + or $unzip = "unzip"; # Will never happen. + + # Sadly unzip -p never failes, but we can check if the + # filehandle points to an empty file and pretend it did + # not open/failed. + if (eof XML) { + $unzip = "unzip"; + close XML; + } + } + + # Read XML if possible (if open) + if ($unzip eq "") { + $xml = getXMLFromXMLString(join("", <XML>)); + if (!$xml) { + print "The XML found in ZIP file (<$filename>) does not seem to be valid XML! "; + } + close XML; + } else { + print "Failed to $unzip ZIP file (<$filename>)! "; + } + } else { + print "Could not find an <$filename>! "; + } + + return $xml; +} ################################################################################ |