Skip to content
Snippets Groups Projects
Commit 0ef58557 authored by Demian Katz's avatar Demian Katz Committed by GitHub
Browse files

Handle question marks consistently in basic and advanced queries. (#873)

- Also fixes bug where query builder had side effect of modifying incoming Query object.
- Resolves VUFIND-1217
parent 80b6ba96
No related merge requests found
......@@ -130,16 +130,15 @@ class QueryBuilder implements QueryBuilderInterface
}
if ($query instanceof QueryGroup) {
$query = $this->reduceQueryGroup($query);
$finalQuery = $this->reduceQueryGroup($query);
} else {
$query->setString(
$this->getLuceneHelper()->normalizeSearchString($query->getString())
);
// Clone the query to avoid modifying the original user-visible query
$finalQuery = clone($query);
$finalQuery->setString($this->getNormalizedQueryString($query));
}
$string = $finalQuery->getString() ?: '*:*';
$string = $query->getString() ?: '*:*';
if ($handler = $this->getSearchHandler($query->getHandler(), $string)) {
if ($handler = $this->getSearchHandler($finalQuery->getHandler(), $string)) {
if (!$handler->hasExtendedDismax()
&& $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string)
) {
......@@ -155,13 +154,6 @@ class QueryBuilder implements QueryBuilderInterface
}
}
} else if ($handler->hasDismax()) {
// If we're using extended dismax, we'll miss out on the question
// mark fix in createAdvancedInnerSearchString(), so we should
// apply it here. If other query munges arise that are valuable
// to both dismax and edismax, we should add a wrapper function
// around them and call it from here instead of this one very
// specific check.
$string = $this->fixTrailingQuestionMarks($string);
$params->set('qf', implode(' ', $handler->getDismaxFields()));
$params->set('qt', $handler->getDismaxHandler());
foreach ($handler->getDismaxParams() as $param) {
......@@ -329,8 +321,7 @@ class QueryBuilder implements QueryBuilderInterface
);
}
} else {
$searchString = $this->getLuceneHelper()
->normalizeSearchString($component->getString());
$searchString = $this->getNormalizedQueryString($component);
$searchHandler = $this->getSearchHandler(
$component->getHandler(),
$searchString
......@@ -378,7 +369,9 @@ class QueryBuilder implements QueryBuilderInterface
*/
protected function fixTrailingQuestionMarks($string)
{
$multiword = preg_match('/[^\s]\s+[^\s]/', $string);
// Treat colon and whitespace as word separators -- in either case, we
// should add parentheses for accuracy.
$multiword = preg_match('/[^\s][\s:]+[^\s]/', $string);
$callback = function ($matches) use ($multiword) {
// Make sure all question marks are properly escaped (first unescape
// any that are already escaped to prevent double-escapes, then escape
......@@ -394,11 +387,27 @@ class QueryBuilder implements QueryBuilderInterface
// Use a lookahead to skip matches found within quoted phrases.
$lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)';
$string = preg_replace_callback(
'/([^\s]+\?)(\s|$)' . $lookahead . '/', $callback, $string
'/([^\s:()]+\?)(\s|$)' . $lookahead . '/', $callback, $string
);
return rtrim($string);
}
/**
* Given a Query object, return a fully normalized version of the query string.
*
* @param Query $query Query object
*
* @return string
*/
protected function getNormalizedQueryString($query)
{
return $this->fixTrailingQuestionMarks(
$this->getLuceneHelper()->normalizeSearchString(
$query->getString()
)
);
}
/**
* Return advanced inner search string based on input and handler.
*
......@@ -423,9 +432,6 @@ class QueryBuilder implements QueryBuilderInterface
return $string;
}
// Account for trailing question marks:
$string = $this->fixTrailingQuestionMarks($string);
return $handler
? $handler->createAdvancedQueryString($string, false) : $string;
}
......
......@@ -123,6 +123,12 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase
['start t?his that', 'start t?his that', []],
// multiple ? terms:
['start? this?', '((start?) OR (start\?)) ((this?) OR (this\?))', []],
// ? term in field-specific context:
['xyzzy:this?', 'xyzzy:((this?) OR (this\?))', []],
// ? term in field-specific context w/ extra term:
['xyzzy:(this? that)', 'xyzzy:(((this?) OR (this\?)) that)', []],
// Multiple fields, one w/ ? term:
['foo:this? OR bar:tha?t', 'foo:((this?) OR (this\?)) OR bar:tha?t', []],
// repeating ? term:
['this? that? this?', '((this?) OR (this\?)) ((that?) OR (that\?)) ((this?) OR (this\?))', []],
// ? terms inside quoted phrase (basic flag set to indicate that
......@@ -132,6 +138,64 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase
// @codingStandardsIgnoreEnd
}
/**
* Run a test case through a basic query.
*
* @param QueryBuilder $qb Query builder
* @param string $handler Search handler: dismax|edismax|standard
* @param array $test Test to run
*
* @return void
*/
protected function runBasicQuestionTest($qb, $handler, $test)
{
list($input, $output, $flags) = $test;
if ($handler === 'standard'
|| ($handler === 'dismax' && empty($flags['basic']))
) {
// We expect an extra set of parentheses to be added, unless the
// string contains a colon, in which case some processing will be
// skipped due to field-specific query behavior.
$basicOutput = strstr($output, ':') ? $output : '(' . $output . ')';
} else {
$basicOutput = $output;
}
$q = new Query($input, 'test');
$before = $q->getString();
$response = $qb->build($q);
// Make sure the query builder had no side effects on the query object:
$this->assertEquals($before, $q->getString());
$processedQ = $response->get('q');
$this->assertEquals($basicOutput, $processedQ[0]);
}
/**
* Run a test case through an advanced query.
*
* @param QueryBuilder $qb Query builder
* @param string $handler Search handler: dismax|edismax|standard
* @param array $test Test to run
*
* @return void
*/
protected function runAdvancedQuestionTest($qb, $handler, $test)
{
list($input, $output, $flags) = $test;
if ($handler === 'standard'
|| ($handler === 'dismax' && empty($flags['basic']))
) {
$advOutput = '((' . $output . '))';
} else {
$mm = $handler == 'dismax' ? '100%' : '0%';
$advOutput = "((_query_:\"{!$handler qf=\\\"foo\\\" mm=\\'$mm\\'}"
. addslashes($output) . '"))';
}
$advancedQ = new QueryGroup('AND', [new Query($input, 'test')]);
$advResponse = $qb->build($advancedQ);
$advProcessedQ = $advResponse->get('q');
$this->assertEquals($advOutput, $advProcessedQ[0]);
}
/**
* Run the standard suite of question mark tests, accounting for differences
* between stanard Lucene, basic Dismax and eDismax handlers.
......@@ -147,16 +211,8 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase
$tests = $this->getQuestionTests();
$qb = new QueryBuilder($builderParams);
foreach ($tests as $test) {
list($input, $output, $flags) = $test;
if ($handler === 'standard'
|| ($handler === 'dismax' && empty($flags['basic']))
) {
$output = '(' . $output . ')';
}
$q = new Query($input, 'test');
$response = $qb->build($q);
$processedQ = $response->get('q');
$this->assertEquals($output, $processedQ[0]);
$this->runBasicQuestionTest($qb, $handler, $test);
$this->runAdvancedQuestionTest($qb, $handler, $test);
}
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment