Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/techsneeze/dmarcts-report-parser.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTechSneeze <dave@techsneeze.com>2019-06-28 09:17:06 +0300
committerGitHub <noreply@github.com>2019-06-28 09:17:06 +0300
commit841f6903ade2653b4601b56c5a3e8b1fa120f9bb (patch)
treeead0910961eaf6dbfe37e6c2c7845086ecaf3767
parent45c652c8a2e65ca902dc5306b53335ffeff3abe8 (diff)
parent6797fe30b3fe0b9a7d4c1485814bae116e59f007 (diff)
Merge pull request #64 from islander/zip-parser
Add support parse zipped reports from PATH
-rw-r--r--README.md7
-rwxr-xr-xdmarcts-report-parser.pl90
2 files changed, 90 insertions, 7 deletions
diff --git a/README.md b/README.md
index c82314a..b855a68 100644
--- a/README.md
+++ b/README.md
@@ -18,19 +18,19 @@ To install dependencies...
### on Debian:
```
-apt-get install libmail-imapclient-perl libmime-tools-perl libxml-simple-perl \
+apt-get install libfile-mimeinfo-perl libmail-imapclient-perl libmime-tools-perl libxml-simple-perl \
libclass-dbi-mysql-perl libio-socket-inet6-perl libio-socket-ip-perl libperlio-gzip-perl \
libmail-mbox-messageparser-perl unzip
```
### on Fedora (Fedora 23):
```
-sudo dnf install perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
+sudo dnf install perl-File-MimeInfo perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
perl-Socket6 perl-PerlIO-gzip perl-DBD-MySQL unzip
```
### on CentOS (CentOS 7):
```
yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
-yum install perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
+yum install perl-File-MimeInfo perl-Mail-IMAPClient perl-MIME-tools perl-XML-Simple perl-DBI \
perl-Socket6 perl-PerlIO-gzip perl-DBD-MySQL unzip perl-Mail-Mbox-MessageParser
```
@@ -104,6 +104,7 @@ One of the following source options must be provided:
# -m : Read reports from mbox file(s) provided in PATH.
# -e : Read reports from MIME email file(s) provided in PATH.
# -x : Read reports from xml file(s) provided in PATH.
+# -z : Read reports from zip file(s) provided in PATH.
```
The following options are always allowed:
diff --git a/dmarcts-report-parser.pl b/dmarcts-report-parser.pl
index 7015948..818b4b3 100755
--- a/dmarcts-report-parser.pl
+++ b/dmarcts-report-parser.pl
@@ -72,6 +72,7 @@ use Socket;
use Socket6;
use PerlIO::gzip;
use File::Basename ();
+use File::MimeInfo;
use IO::Socket::SSL;
#use IO::Socket::SSL 'debug3';
@@ -153,8 +154,8 @@ if (!defined $imapignoreerror ) {
# Get command line options.
my %options = ();
-use constant { TS_IMAP => 0, TS_MESSAGE_FILE => 1, TS_XML_FILE => 2, TS_MBOX_FILE => 3 };
-GetOptions( \%options, 'd', 'r', 'x', 'm', 'e', 'i', 'delete' );
+use constant { TS_IMAP => 0, TS_MESSAGE_FILE => 1, TS_XML_FILE => 2, TS_MBOX_FILE => 3, TS_ZIP_FILE => 4 };
+GetOptions( \%options, 'd', 'r', 'x', 'm', 'e', 'i', 'z', 'delete' );
# Evaluate command line options
my $source_options = 0;
@@ -180,12 +181,17 @@ if (exists $options{i}) {
$reports_source = TS_IMAP;
}
+if (exists $options{z}) {
+ $source_options++;
+ $reports_source = TS_ZIP_FILE;
+}
+
if ($source_options > 1) {
show_usage();
- die "Only one source option can be used (-i, -x, -m or -e).\n";
+ die "Only one source option can be used (-i, -x, -m, -e or -z).\n";
} elsif ($source_options == 0) {
show_usage();
- die "Please provide a source option (-i, -x, -m or -e).\n";
+ die "Please provide a source option (-i, -x, -m, -e or -z).\n";
}
if ($ARGV[0]) {
@@ -335,6 +341,14 @@ if ($reports_source == TS_IMAP) {
}
} while(defined($filecontent));
+ } elsif ($reports_source == TS_ZIP_FILE) {
+ # filecontent is zip file
+ $filecontent = getXMLFromZip($f);
+ if (processXML(TS_ZIP_FILE, $filecontent, "xml file <$f>") & 2) {
+ # processXML return a value with delete bit enabled
+ unlink($f);
+ }
+ $counts++;
} elsif (open FILE, $f) {
$filecontent = join("", <FILE>);
@@ -390,6 +404,7 @@ sub processXML {
my $xml; #TS_XML_FILE or TS_MESSAGE_FILE
if ($type == TS_MESSAGE_FILE) {$xml = getXMLFromMessage($filecontent);}
+ elsif ($type == TS_ZIP_FILE) {$xml = $filecontent;}
else {$xml = getXMLFromXMLString($filecontent);}
# If !$xml, the file/mail is probably not a DMARC report.
@@ -568,6 +583,73 @@ sub getXMLFromMessage {
return $xml;
}
+################################################################################
+
+sub getXMLFromZip {
+ my $filename = $_[0];
+ my $mtype = mimetype($filename);
+
+ if (open FILE, $filename) {
+ if ($debug) {
+ print "Filename: $filename, MimeType: $mtype\n";
+ }
+ }
+
+ my $isgzip = 0;
+
+ if(lc $mtype eq "application/zip") {
+ if ($debug) {
+ print "This is a ZIP file \n";
+ }
+ } elsif (lc $mtype eq "application/gzip" or lc $mtype eq "application/x-gzip") {
+ if ($debug) {
+ print "This is a GZIP file \n";
+ }
+
+ $isgzip = 1;
+ } else {
+ if ($debug) {
+ print "This is not an archive file \n";
+ }
+ }
+
+ # If a ZIP has been found, extract XML and parse it.
+ my $xml;
+ if(defined($filename)) {
+ # Open the zip file and process the XML contained inside.
+ my $unzip = "";
+ if($isgzip) {
+ open(XML, "<:gzip", $filename)
+ or $unzip = "ungzip";
+ } else {
+ open(XML,"unzip -p " . $filename . " |")
+ or $unzip = "unzip"; # Will never happen.
+
+ # Sadly unzip -p never failes, but we can check if the
+ # filehandle points to an empty file and pretend it did
+ # not open/failed.
+ if (eof XML) {
+ $unzip = "unzip";
+ close XML;
+ }
+ }
+
+ # Read XML if possible (if open)
+ if ($unzip eq "") {
+ $xml = getXMLFromXMLString(join("", <XML>));
+ if (!$xml) {
+ print "The XML found in ZIP file (<$filename>) does not seem to be valid XML! ";
+ }
+ close XML;
+ } else {
+ print "Failed to $unzip ZIP file (<$filename>)! ";
+ }
+ } else {
+ print "Could not find an <$filename>! ";
+ }
+
+ return $xml;
+}
################################################################################