From 015bc7a0f22c5b462ba9c373ee8079daee8775dd Mon Sep 17 00:00:00 2001 From: mathieugrimault <30295397+mathieugrimault@users.noreply.github.com> Date: Wed, 19 Dec 2018 17:27:07 +0100 Subject: [PATCH] Improve tolerance of illegal XML characters (#1290) --- .../src/VuFind/RecordDriver/MarcAdvancedTrait.php | 12 ++++++++---- module/VuFind/src/VuFind/XSLT/Processor.php | 5 ++++- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php b/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php index 5d0bd86e54e..a470005933d 100644 --- a/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php +++ b/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php @@ -853,11 +853,15 @@ trait MarcAdvancedTrait { // Special case for MARC: if ($format == 'marc21') { - $xml = $this->getMarcRecord()->toXML(); - $xml = str_replace( - [chr(27), chr(28), chr(29), chr(30), chr(31), chr(8)], ' ', $xml + $sanitizeXmlRegEx + = '[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+'; + $xml = simplexml_load_string( + trim( + preg_replace( + "/$sanitizeXmlRegEx/u", ' ', $this->getMarcRecord()->toXML() + ) + ) ); - $xml = simplexml_load_string($xml); if (!$xml || !isset($xml->record)) { return false; } diff --git a/module/VuFind/src/VuFind/XSLT/Processor.php b/module/VuFind/src/VuFind/XSLT/Processor.php index df99053ad84..38ca7469161 100644 --- a/module/VuFind/src/VuFind/XSLT/Processor.php +++ b/module/VuFind/src/VuFind/XSLT/Processor.php @@ -58,7 +58,10 @@ class Processor $xsl = new XSLTProcessor(); $xsl->importStyleSheet($style); $doc = new DOMDocument(); - if ($doc->loadXML($xml)) { + $sanitizeXmlRegEx + = '[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}]+'; + $cleanXml = trim(preg_replace("/$sanitizeXmlRegEx/u", ' ', $xml)); + if ($doc->loadXML($cleanXml)) { foreach ($params as $key => $value) { $xsl->setParameter('', $key, $value); } -- GitLab