From 5befd3e4cd2219c5d6aecad02862ba1d97028b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eoghan=20=C3=93=20Carrag=C3=A1in?= <eoghan.ocarragain@gmail.com> Date: Fri, 11 Dec 2015 11:27:54 -0500 Subject: [PATCH] Make sure spaces in full text URLs are properly encoded - Prevents problems when passing them to Tika/Aperture command-line utilities. --- import/index_scripts/getFulltext.bsh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/import/index_scripts/getFulltext.bsh b/import/index_scripts/getFulltext.bsh index 4c24dacfa3e..6385c8640b4 100644 --- a/import/index_scripts/getFulltext.bsh +++ b/import/index_scripts/getFulltext.bsh @@ -85,8 +85,8 @@ public String getFulltext(Record record, String fieldSpec, String extension) { Iterator fieldsIter = fields.iterator(); if (fields != null) { while(fieldsIter.hasNext()) { - // Get the current string to work on: - String current = fieldsIter.next(); + // Get the current string to work on (and sanitize spaces): + String current = fieldsIter.next().replaceAll(" ", "%20"); // Filter by file extension if (extension == null || current.endsWith(extension)) { // Load the parser output for each tag into a string @@ -257,4 +257,4 @@ public String harvestWithParser(url, settings) { return harvestWithTika(url, settings[1]); } return null; -} \ No newline at end of file +} -- GitLab