From 4eade684bdf868e513178e59e9f16567389789aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matth=C3=A4us=20Wander?= Date: Mon, 7 Jun 2021 00:40:56 +0200 Subject: show in XML report viewer --- dmarcts-report-viewer-report-data.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dmarcts-report-viewer-report-data.php b/dmarcts-report-viewer-report-data.php index 5c862cd..11e049c 100644 --- a/dmarcts-report-viewer-report-data.php +++ b/dmarcts-report-viewer-report-data.php @@ -221,6 +221,11 @@ function formatXML($raw_xml, $reportnumber) { $html .= "
" . $out . "
"; + $out = $dom->saveXML($dom->getElementsByTagName("policy_published")[0]); + $out = htmlspecialchars($out); + + $html .= "
" . $out . "
"; + $records = $dom->getElementsByTagName("record"); $i = 0; // $i++; -- cgit v1.2.3 From 94b47e466f5fe91b36f38dbb7b56c76ad28c1372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matth=C3=A4us=20Wander?= Date: Mon, 7 Jun 2021 02:08:35 +0200 Subject: More robust code for printing the XML declaration and the root element. Tested successfully with report from wp.pl, which breaks two assumption of former code: 1) There is no XML declaration. 2) There are no linebreaks that delimit the root element. --- dmarcts-report-viewer-report-data.php | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/dmarcts-report-viewer-report-data.php b/dmarcts-report-viewer-report-data.php index 11e049c..53825ba 100644 --- a/dmarcts-report-viewer-report-data.php +++ b/dmarcts-report-viewer-report-data.php @@ -208,13 +208,19 @@ function formatXML($raw_xml, $reportnumber) { $dom->formatOutput = true; $dom->loadXML($raw_xml); - // These next few lines adding and (as well as the lines adding the closing tag) are are very risky because they assume that the first two lines and the last line of the raw_xml are weel-formed - // Hopefully not too risky as the raw_xml has already gone through the dmarcts-parser routine that looks for bad XML. - // If someone can code a proper way to get those lines, it would be appreciated. - $xml_arr = explode(PHP_EOL,$raw_xml); - $out = $xml_arr[0] . "\n" . $xml_arr[1]; - // Should return first 2 lines of xml: and - $html = "
" . htmlspecialchars($out) . "
"; + // Note that the XML formatter prints expected elements only. + // If the report contains junk (or an unknown extension), it will be omitted from output. + + // Extract from raw_xml, if it matches the regex pattern. + if (preg_match("/<\?xml([^?>]*)\?>/", $raw_xml, $matches)) { + $html .= "
" . htmlspecialchars($matches[0]) . "";
+    }
+
+    // Extract root  from raw_xml.
+    $rootName = $dom->firstChild->localName;
+    if (preg_match("/<". $rootName ."([^>]*)>/", $raw_xml, $matches)) {
+        $html .= "
" . htmlspecialchars($matches[0]) . "";
+    }
 
 	$out = $dom->saveXML($dom->getElementsByTagName("report_metadata")[0]);
 	$out = htmlspecialchars($out);
@@ -239,9 +245,10 @@ function formatXML($raw_xml, $reportnumber) {
 		$i++;
 	}
 
-	$out = $xml_arr[sizeof($xml_arr)-2];
-	$out = htmlspecialchars($out);
-		$html .= "
" . $out . "
"; + // Extract closing from raw_xml. + if (preg_match("/<\/". $rootName .">/", $raw_xml, $matches)) { + $html .= "
" . htmlspecialchars($matches[0]) . "";
+    }
 
 	return $html;
 }
-- 
cgit v1.2.3


From 5441522c666bce767be9f44be87e87926eb8864f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matth=C3=A4us=20Wander?= 
Date: Mon, 7 Jun 2021 02:22:13 +0200
Subject: regression fix in output html

---
 dmarcts-report-viewer-report-data.php | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dmarcts-report-viewer-report-data.php b/dmarcts-report-viewer-report-data.php
index 53825ba..2260f48 100644
--- a/dmarcts-report-viewer-report-data.php
+++ b/dmarcts-report-viewer-report-data.php
@@ -213,13 +213,13 @@ function formatXML($raw_xml, $reportnumber) {
 
 	// Extract  from raw_xml, if it matches the regex pattern.
     if (preg_match("/<\?xml([^?>]*)\?>/", $raw_xml, $matches)) {
-        $html .= "
" . htmlspecialchars($matches[0]) . "";
+        $html .= "
" . htmlspecialchars($matches[0]) . "
"; } // Extract root from raw_xml. $rootName = $dom->firstChild->localName; if (preg_match("/<". $rootName ."([^>]*)>/", $raw_xml, $matches)) { - $html .= "
" . htmlspecialchars($matches[0]) . "";
+        $html .= "
" . htmlspecialchars($matches[0]) . "
"; } $out = $dom->saveXML($dom->getElementsByTagName("report_metadata")[0]); @@ -247,7 +247,7 @@ function formatXML($raw_xml, $reportnumber) { // Extract closing from raw_xml. if (preg_match("/<\/". $rootName .">/", $raw_xml, $matches)) { - $html .= "
" . htmlspecialchars($matches[0]) . "";
+        $html .= "
" . htmlspecialchars($matches[0]) . "
"; } return $html; -- cgit v1.2.3 From 622815e12794304a244adfde51bac9548f897b42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matth=C3=A4us=20Wander?= Date: Mon, 7 Jun 2021 22:05:08 +0200 Subject: print XML elements (other than root) fully dynamic without assumptions about structure --- dmarcts-report-viewer-report-data.php | 42 +++++++++++++---------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/dmarcts-report-viewer-report-data.php b/dmarcts-report-viewer-report-data.php index 2260f48..bb776f7 100644 --- a/dmarcts-report-viewer-report-data.php +++ b/dmarcts-report-viewer-report-data.php @@ -208,45 +208,35 @@ function formatXML($raw_xml, $reportnumber) { $dom->formatOutput = true; $dom->loadXML($raw_xml); - // Note that the XML formatter prints expected elements only. - // If the report contains junk (or an unknown extension), it will be omitted from output. - - // Extract from raw_xml, if it matches the regex pattern. + // Extract and print from raw_xml, if it matches the regex pattern. if (preg_match("/<\?xml([^?>]*)\?>/", $raw_xml, $matches)) { $html .= "
" . htmlspecialchars($matches[0]) . "
"; } - // Extract root from raw_xml. - $rootName = $dom->firstChild->localName; - if (preg_match("/<". $rootName ."([^>]*)>/", $raw_xml, $matches)) { + // Extract and print root from raw_xml. + $root = $dom->firstChild; + if (preg_match("/<". $root->localName ."([^>]*)>/", $raw_xml, $matches)) { $html .= "
" . htmlspecialchars($matches[0]) . "
"; } - $out = $dom->saveXML($dom->getElementsByTagName("report_metadata")[0]); - $out = htmlspecialchars($out); - - $html .= "
" . $out . "
"; + // Print all child nodes + foreach ($root->childNodes as $element) { + $out = $dom->saveXML($element); + $out = htmlspecialchars($out); - $out = $dom->saveXML($dom->getElementsByTagName("policy_published")[0]); - $out = htmlspecialchars($out); + $elementName = $element->localName; - $html .= "
" . $out . "
"; + // If element is a 'record', append database id to unique HTML id + if ($elementName === "record") { + $elementName .= $id_min; + $id_min++; + } - $records = $dom->getElementsByTagName("record"); - $i = 0; - // $i++; - foreach ( $records as $record) { - $out = $dom->saveXML($dom->getElementsByTagName("record")[$i]); - $out = htmlspecialchars($out); - $html .= "
";
-		$html .= $out;
-		$html .= "
"; - $id_min++; - $i++; + $html .= "
" . $out . "
"; } // Extract closing
from raw_xml. - if (preg_match("/<\/". $rootName .">/", $raw_xml, $matches)) { + if (preg_match("/<\/". $root->localName .">/", $raw_xml, $matches)) { $html .= "
" . htmlspecialchars($matches[0]) . "
"; } -- cgit v1.2.3