Skip to content
Snippets Groups Projects
Commit 43698706 authored by Demian Katz's avatar Demian Katz Committed by Robert Lange
Browse files

Fix replaceTerm() word boundary / normalization bugs. (#1712)

- Resolves VUFIND-1423.
- Includes some additional test cases.
parent 1726ff37
No related merge requests found
...@@ -93,7 +93,15 @@ class Query extends AbstractQuery ...@@ -93,7 +93,15 @@ class Query extends AbstractQuery
*/ */
protected function normalizeText($text) protected function normalizeText($text)
{ {
return strtolower($this->stripDiacritics($text)); // The input to normalizeText may be a Solr query with Boolean operators
// in it; we want to be careful not to turn this into something invalid.
$stripped = $this->stripDiacritics($text);
$booleans = ['AND', 'OR', 'NOT'];
$words = [];
foreach (preg_split('/\s+/', $stripped) as $word) {
$words[] = in_array($word, $booleans) ? $word : strtolower($word);
}
return implode(' ', $words);
} }
/** /**
...@@ -223,18 +231,14 @@ class Query extends AbstractQuery ...@@ -223,18 +231,14 @@ class Query extends AbstractQuery
$queryString = $normalize $queryString = $normalize
? $this->getNormalizedString() : $this->queryString; ? $this->getNormalizedString() : $this->queryString;
// If our "from" pattern contains non-word characters, we can't use word // Try to match within word boundaries to prevent the replacement from
// boundaries for matching. We want to try to use word boundaries when // affecting unexpected parts of the search query; if that fails to change
// possible, however, to avoid the replacement from affecting unexpected // anything, try again with a less restricted regular expression. The fall-
// parts of the search query. // back is needed when $from contains punctuation characters such as commas.
if (!preg_match('/.*[^\w].*/', $from)) { $this->queryString = preg_replace("/\b$from\b/i", $to, $queryString);
$pattern = "/\b$from\b/i"; if ($queryString === $this->queryString) {
} else { $this->queryString = preg_replace("/$from/i", $to, $queryString);
$pattern = "/$from/i";
} }
// Perform the replacement:
$this->queryString = preg_replace($pattern, $to, $queryString);
} }
/** /**
......
...@@ -82,6 +82,41 @@ class QueryTest extends TestCase ...@@ -82,6 +82,41 @@ class QueryTest extends TestCase
$q = new Query('test query we<(ird and/or'); $q = new Query('test query we<(ird and/or');
$q->replaceTerm('and', 'not'); $q->replaceTerm('and', 'not');
$this->assertEquals('test query we<(ird not/or', $q->getString()); $this->assertEquals('test query we<(ird not/or', $q->getString());
$q = new Query('th\bbbt');
$q->replaceTerm('th\bbbt', 'that');
$this->assertEquals('that', $q->getString());
}
/**
* Test replacing a term containing punctuation; this exercises a special case
* in the code.
*
* @return void
*/
public function testReplacePunctuatedTerm()
{
$q = new Query('this, that');
$q->replaceTerm('this,', 'the other,');
$this->assertEquals('the other, that', $q->getString());
}
/**
* Test multiple replacements -- this simulates the scenario discussed in the
* VUFIND-1423 JIRA ticket.
*
* @return void
*/
public function testMultipleReplacements()
{
$q = new Query("color code");
$q->replaceTerm('color code', '((color code) OR (color codes))', true);
$this->assertEquals('((color code) OR (color codes))', $q->getString());
$q->replaceTerm('color code', '((color code) OR (color coded))', true);
$this->assertEquals(
'((((color code) OR (color coded))) OR (color codes))',
$q->getString()
);
} }
/** /**
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment