Skip to content
Snippets Groups Projects
Commit ffb47725 authored by Demian Katz's avatar Demian Katz
Browse files

More spelling tokenization fixes related to VUFIND-737.

parent c8e96103
No related merge requests found
......@@ -451,21 +451,25 @@ abstract class Results implements ServiceLocatorAwareInterface
// benighted at gmail dot com: http://php.net/manual/en/function.strtok.php
$tokens = array();
$token = strtok($input, " \t");
do {
while ($token !== false) {
// find double quoted tokens
if ($token{0}=='"' && substr($token, -1) != '"') {
if (substr($token, 0, 1) == '"' && substr($token, -1) != '"') {
$token .= ' '.strtok('"').'"';
}
// find single quoted tokens
if ($token{0}=="'" && substr($token, -1) != "'") {
$token .= ' '.strtok("'")."'";
}
// skip boolean operators
if (!in_array($token, $joins)) {
$tokens[] = $token;
}
} while ($token = strtok(" \t"));
$token = strtok(" \t");
}
// If the last token ends in a double quote but the input string does not,
// the tokenization process added the quote, which will break spelling
// replacements. We need to strip it back off again:
$last = count($tokens) > 0 ? $tokens[count($tokens) - 1] : null;
if ($last && substr($last, -1) == '"' && substr($input, -1) != '"') {
$tokens[count($tokens) - 1] = substr($last, 0, strlen($last) - 1);
}
return $tokens;
}
......
......@@ -55,15 +55,19 @@ class ResultsTest extends \VuFindTest\Unit\TestCase
$this->assertEquals(array('apples', 'oranges'), $solr->spellingTokens('apples OR oranges'));
$this->assertEquals(array('"word"'), $solr->spellingTokens('"word"'));
$this->assertEquals(array('"word"', 'second'), $solr->spellingTokens('"word" second'));
$this->assertEquals(array("'word'"), $solr->spellingTokens("'word'"));
$this->assertEquals(array("'word'", 'second'), $solr->spellingTokens("'word' second"));
$this->assertEquals(array(), $solr->spellingTokens(''));
$this->assertEquals(array('0', 'is', 'zero'), $solr->spellingTokens('0 is zero'));
$this->assertEquals(array("'twas", 'successful'), $solr->spellingTokens("'twas successful"));
$this->assertEquals(array('word'), $solr->spellingTokens('(word)'));
$this->assertEquals(array('word', 'second'), $solr->spellingTokens('(word) second'));
$this->assertEquals(array('apples', 'oranges', 'pears'), $solr->spellingTokens('(apples OR oranges) AND pears'));
$this->assertEquals(array('two', 'terms'), $solr->spellingTokens("two\tterms"));
$this->assertEquals(
array('"two words"', 'single', "'three word phrase'", 'single'),
$solr->spellingTokens('((("two words" OR single) NOT \'three word phrase\') AND single)')
array('"two words"', 'single', '"three word phrase"', 'single'),
$solr->spellingTokens('((("two words" OR single) NOT "three word phrase") AND single)')
);
$this->assertEquals(array('"unfinished phrase'), $solr->spellingTokens('"unfinished phrase'));
$this->assertEquals(array('"'), $solr->spellingTokens('"'));
$this->assertEquals(array('""'), $solr->spellingTokens('""'));
}
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment