diff options
author | Matthäus Wander <mail@wander.science> | 2021-06-07 03:08:35 +0300 |
---|---|---|
committer | Matthäus Wander <mail@wander.science> | 2021-06-07 03:08:35 +0300 |
commit | 94b47e466f5fe91b36f38dbb7b56c76ad28c1372 (patch) | |
tree | 898aadafba28b883ae2e209309ad37b050b44507 | |
parent | 4eade684bdf868e513178e59e9f16567389789aa (diff) |
More robust code for printing the XML declaration and the root element.
Tested successfully with report from wp.pl, which breaks two assumption of former code:
1) There is no XML declaration.
2) There are no linebreaks that delimit the root element.
-rw-r--r-- | dmarcts-report-viewer-report-data.php | 27 |
1 files changed, 17 insertions, 10 deletions
diff --git a/dmarcts-report-viewer-report-data.php b/dmarcts-report-viewer-report-data.php index 11e049c..53825ba 100644 --- a/dmarcts-report-viewer-report-data.php +++ b/dmarcts-report-viewer-report-data.php @@ -208,13 +208,19 @@ function formatXML($raw_xml, $reportnumber) { $dom->formatOutput = true; $dom->loadXML($raw_xml); - // These next few lines adding <?xml version=\"1.0\" encoding=\"UTF-8\" > and <feedback> (as well as the lines adding the closing </feedback> tag) are are very risky because they assume that the first two lines and the last line of the raw_xml are weel-formed - // Hopefully not too risky as the raw_xml has already gone through the dmarcts-parser routine that looks for bad XML. - // If someone can code a proper way to get those lines, it would be appreciated. - $xml_arr = explode(PHP_EOL,$raw_xml); - $out = $xml_arr[0] . "\n" . $xml_arr[1]; - // Should return first 2 lines of xml: <?xml version=\"1.0\" encoding=\"UTF-8\"> and <feedback> - $html = "<pre><code class='xml'>" . htmlspecialchars($out) . "</code></pre>"; + // Note that the XML formatter prints expected elements only. + // If the report contains junk (or an unknown extension), it will be omitted from output. + + // Extract <?xml ...> from raw_xml, if it matches the regex pattern. + if (preg_match("/<\?xml([^?>]*)\?>/", $raw_xml, $matches)) { + $html .= "<pre><code class='xml'>" . htmlspecialchars($matches[0]) . "</code</pre>"; + } + + // Extract root <feedback> from raw_xml. + $rootName = $dom->firstChild->localName; + if (preg_match("/<". $rootName ."([^>]*)>/", $raw_xml, $matches)) { + $html .= "<pre><code class='xml'>" . htmlspecialchars($matches[0]) . "</code</pre>"; + } $out = $dom->saveXML($dom->getElementsByTagName("report_metadata")[0]); $out = htmlspecialchars($out); @@ -239,9 +245,10 @@ function formatXML($raw_xml, $reportnumber) { $i++; } - $out = $xml_arr[sizeof($xml_arr)-2]; - $out = htmlspecialchars($out); - $html .= "<pre><code class='xml'>" . $out . "</code></pre>"; + // Extract closing </feedback> from raw_xml. + if (preg_match("/<\/". $rootName .">/", $raw_xml, $matches)) { + $html .= "<pre><code class='xml'>" . htmlspecialchars($matches[0]) . "</code</pre>"; + } return $html; } |