From 94b47e466f5fe91b36f38dbb7b56c76ad28c1372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matth=C3=A4us=20Wander?= Date: Mon, 7 Jun 2021 02:08:35 +0200 Subject: More robust code for printing the XML declaration and the root element. Tested successfully with report from wp.pl, which breaks two assumption of former code: 1) There is no XML declaration. 2) There are no linebreaks that delimit the root element. --- dmarcts-report-viewer-report-data.php | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/dmarcts-report-viewer-report-data.php b/dmarcts-report-viewer-report-data.php index 11e049c..53825ba 100644 --- a/dmarcts-report-viewer-report-data.php +++ b/dmarcts-report-viewer-report-data.php @@ -208,13 +208,19 @@ function formatXML($raw_xml, $reportnumber) { $dom->formatOutput = true; $dom->loadXML($raw_xml); - // These next few lines adding and (as well as the lines adding the closing tag) are are very risky because they assume that the first two lines and the last line of the raw_xml are weel-formed - // Hopefully not too risky as the raw_xml has already gone through the dmarcts-parser routine that looks for bad XML. - // If someone can code a proper way to get those lines, it would be appreciated. - $xml_arr = explode(PHP_EOL,$raw_xml); - $out = $xml_arr[0] . "\n" . $xml_arr[1]; - // Should return first 2 lines of xml: and - $html = "
" . htmlspecialchars($out) . "
"; + // Note that the XML formatter prints expected elements only. + // If the report contains junk (or an unknown extension), it will be omitted from output. + + // Extract from raw_xml, if it matches the regex pattern. + if (preg_match("/<\?xml([^?>]*)\?>/", $raw_xml, $matches)) { + $html .= "
" . htmlspecialchars($matches[0]) . "";
+    }
+
+    // Extract root  from raw_xml.
+    $rootName = $dom->firstChild->localName;
+    if (preg_match("/<". $rootName ."([^>]*)>/", $raw_xml, $matches)) {
+        $html .= "
" . htmlspecialchars($matches[0]) . "";
+    }
 
 	$out = $dom->saveXML($dom->getElementsByTagName("report_metadata")[0]);
 	$out = htmlspecialchars($out);
@@ -239,9 +245,10 @@ function formatXML($raw_xml, $reportnumber) {
 		$i++;
 	}
 
-	$out = $xml_arr[sizeof($xml_arr)-2];
-	$out = htmlspecialchars($out);
-		$html .= "
" . $out . "
"; + // Extract closing from raw_xml. + if (preg_match("/<\/". $rootName .">/", $raw_xml, $matches)) { + $html .= "
" . htmlspecialchars($matches[0]) . "";
+    }
 
 	return $html;
 }
-- 
cgit v1.2.3