diff --git a/module/VuFind/src/VuFind/Search/Base/Results.php b/module/VuFind/src/VuFind/Search/Base/Results.php index f814d04f6c9074f3c1fb905a02161f8ba6553b4a..f5d4075d12fc3168df196e89aacca0abbc44a234 100644 --- a/module/VuFind/src/VuFind/Search/Base/Results.php +++ b/module/VuFind/src/VuFind/Search/Base/Results.php @@ -451,21 +451,25 @@ abstract class Results implements ServiceLocatorAwareInterface // benighted at gmail dot com: http://php.net/manual/en/function.strtok.php $tokens = array(); $token = strtok($input, " \t"); - do { + while ($token !== false) { // find double quoted tokens - if ($token{0}=='"' && substr($token, -1) != '"') { + if (substr($token, 0, 1) == '"' && substr($token, -1) != '"') { $token .= ' '.strtok('"').'"'; } - // find single quoted tokens - if ($token{0}=="'" && substr($token, -1) != "'") { - $token .= ' '.strtok("'")."'"; - } // skip boolean operators if (!in_array($token, $joins)) { $tokens[] = $token; } - } while ($token = strtok(" \t")); + $token = strtok(" \t"); + } + // If the last token ends in a double quote but the input string does not, + // the tokenization process added the quote, which will break spelling + // replacements. We need to strip it back off again: + $last = count($tokens) > 0 ? $tokens[count($tokens) - 1] : null; + if ($last && substr($last, -1) == '"' && substr($input, -1) != '"') { + $tokens[count($tokens) - 1] = substr($last, 0, strlen($last) - 1); + } return $tokens; } diff --git a/module/VuFind/tests/unit-tests/src/Search/Base/ResultsTest.php b/module/VuFind/tests/unit-tests/src/Search/Base/ResultsTest.php index aa3bdd7d3b81e803cb9738bca17ac8d6efdb8cc6..89be58ac202d60bf7d46dd2801db770af0b8edad 100644 --- a/module/VuFind/tests/unit-tests/src/Search/Base/ResultsTest.php +++ b/module/VuFind/tests/unit-tests/src/Search/Base/ResultsTest.php @@ -55,15 +55,19 @@ class ResultsTest extends \VuFindTest\Unit\TestCase $this->assertEquals(array('apples', 'oranges'), $solr->spellingTokens('apples OR oranges')); $this->assertEquals(array('"word"'), $solr->spellingTokens('"word"')); $this->assertEquals(array('"word"', 'second'), $solr->spellingTokens('"word" second')); - $this->assertEquals(array("'word'"), $solr->spellingTokens("'word'")); - $this->assertEquals(array("'word'", 'second'), $solr->spellingTokens("'word' second")); + $this->assertEquals(array(), $solr->spellingTokens('')); + $this->assertEquals(array('0', 'is', 'zero'), $solr->spellingTokens('0 is zero')); + $this->assertEquals(array("'twas", 'successful'), $solr->spellingTokens("'twas successful")); $this->assertEquals(array('word'), $solr->spellingTokens('(word)')); $this->assertEquals(array('word', 'second'), $solr->spellingTokens('(word) second')); $this->assertEquals(array('apples', 'oranges', 'pears'), $solr->spellingTokens('(apples OR oranges) AND pears')); $this->assertEquals(array('two', 'terms'), $solr->spellingTokens("two\tterms")); $this->assertEquals( - array('"two words"', 'single', "'three word phrase'", 'single'), - $solr->spellingTokens('((("two words" OR single) NOT \'three word phrase\') AND single)') + array('"two words"', 'single', '"three word phrase"', 'single'), + $solr->spellingTokens('((("two words" OR single) NOT "three word phrase") AND single)') ); + $this->assertEquals(array('"unfinished phrase'), $solr->spellingTokens('"unfinished phrase')); + $this->assertEquals(array('"'), $solr->spellingTokens('"')); + $this->assertEquals(array('""'), $solr->spellingTokens('""')); } } \ No newline at end of file