diff --git a/module/VuFind/src/VuFind/XSLT/Import/VuFind.php b/module/VuFind/src/VuFind/XSLT/Import/VuFind.php index ac6803e46b33c2af61304103a7c6d5373c1ebc45..74a0fa25a9dc615a8948fc4e34f2df46434ec6ca 100644 --- a/module/VuFind/src/VuFind/XSLT/Import/VuFind.php +++ b/module/VuFind/src/VuFind/XSLT/Import/VuFind.php @@ -214,6 +214,19 @@ class VuFind return "{$cmd} -o {$output} -x {$input}"; } + /** + * Strip illegal XML characters from a string. + * + * @param string $in String to process + * + * @return string + */ + public static function stripBadChars($in) + { + $badChars = '/[^\\x0009\\x000A\\x000D\\x0020-\\xD7FF\\xE000-\\xFFFD]/'; + return preg_replace($badChars, ' ', $in); + } + /** * Harvest the contents of a document file (PDF, Word, etc.) using Aperture. * This method will only work if Aperture is properly configured in the @@ -253,8 +266,7 @@ class VuFind // Send back what we extracted, stripping out any illegal characters that // will prevent XML from generating correctly: - $badChars = '/[^\\x0009\\x000A\\x000D\\x0020-\\xD7FF\\xE000-\\xFFFD]/'; - return preg_replace($badChars, ' ', $final); + return static::stripBadChars($final); } /**