Skip to content
Snippets Groups Projects
Commit ffb47725 authored by Demian Katz's avatar Demian Katz
Browse files

More spelling tokenization fixes related to VUFIND-737.

parent c8e96103
No related merge requests found
...@@ -451,21 +451,25 @@ abstract class Results implements ServiceLocatorAwareInterface ...@@ -451,21 +451,25 @@ abstract class Results implements ServiceLocatorAwareInterface
// benighted at gmail dot com: http://php.net/manual/en/function.strtok.php // benighted at gmail dot com: http://php.net/manual/en/function.strtok.php
$tokens = array(); $tokens = array();
$token = strtok($input, " \t"); $token = strtok($input, " \t");
do { while ($token !== false) {
// find double quoted tokens // find double quoted tokens
if ($token{0}=='"' && substr($token, -1) != '"') { if (substr($token, 0, 1) == '"' && substr($token, -1) != '"') {
$token .= ' '.strtok('"').'"'; $token .= ' '.strtok('"').'"';
} }
// find single quoted tokens
if ($token{0}=="'" && substr($token, -1) != "'") {
$token .= ' '.strtok("'")."'";
}
// skip boolean operators // skip boolean operators
if (!in_array($token, $joins)) { if (!in_array($token, $joins)) {
$tokens[] = $token; $tokens[] = $token;
} }
} while ($token = strtok(" \t")); $token = strtok(" \t");
}
// If the last token ends in a double quote but the input string does not,
// the tokenization process added the quote, which will break spelling
// replacements. We need to strip it back off again:
$last = count($tokens) > 0 ? $tokens[count($tokens) - 1] : null;
if ($last && substr($last, -1) == '"' && substr($input, -1) != '"') {
$tokens[count($tokens) - 1] = substr($last, 0, strlen($last) - 1);
}
return $tokens; return $tokens;
} }
......
...@@ -55,15 +55,19 @@ class ResultsTest extends \VuFindTest\Unit\TestCase ...@@ -55,15 +55,19 @@ class ResultsTest extends \VuFindTest\Unit\TestCase
$this->assertEquals(array('apples', 'oranges'), $solr->spellingTokens('apples OR oranges')); $this->assertEquals(array('apples', 'oranges'), $solr->spellingTokens('apples OR oranges'));
$this->assertEquals(array('"word"'), $solr->spellingTokens('"word"')); $this->assertEquals(array('"word"'), $solr->spellingTokens('"word"'));
$this->assertEquals(array('"word"', 'second'), $solr->spellingTokens('"word" second')); $this->assertEquals(array('"word"', 'second'), $solr->spellingTokens('"word" second'));
$this->assertEquals(array("'word'"), $solr->spellingTokens("'word'")); $this->assertEquals(array(), $solr->spellingTokens(''));
$this->assertEquals(array("'word'", 'second'), $solr->spellingTokens("'word' second")); $this->assertEquals(array('0', 'is', 'zero'), $solr->spellingTokens('0 is zero'));
$this->assertEquals(array("'twas", 'successful'), $solr->spellingTokens("'twas successful"));
$this->assertEquals(array('word'), $solr->spellingTokens('(word)')); $this->assertEquals(array('word'), $solr->spellingTokens('(word)'));
$this->assertEquals(array('word', 'second'), $solr->spellingTokens('(word) second')); $this->assertEquals(array('word', 'second'), $solr->spellingTokens('(word) second'));
$this->assertEquals(array('apples', 'oranges', 'pears'), $solr->spellingTokens('(apples OR oranges) AND pears')); $this->assertEquals(array('apples', 'oranges', 'pears'), $solr->spellingTokens('(apples OR oranges) AND pears'));
$this->assertEquals(array('two', 'terms'), $solr->spellingTokens("two\tterms")); $this->assertEquals(array('two', 'terms'), $solr->spellingTokens("two\tterms"));
$this->assertEquals( $this->assertEquals(
array('"two words"', 'single', "'three word phrase'", 'single'), array('"two words"', 'single', '"three word phrase"', 'single'),
$solr->spellingTokens('((("two words" OR single) NOT \'three word phrase\') AND single)') $solr->spellingTokens('((("two words" OR single) NOT "three word phrase") AND single)')
); );
$this->assertEquals(array('"unfinished phrase'), $solr->spellingTokens('"unfinished phrase'));
$this->assertEquals(array('"'), $solr->spellingTokens('"'));
$this->assertEquals(array('""'), $solr->spellingTokens('""'));
} }
} }
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment