diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 0c9d8e95019b4bc65feb530ae0a59f7c332d30e0..3367b23e789a8fc3fc1bb2e66385d1e23c2773ab 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -130,16 +130,15 @@ class QueryBuilder implements QueryBuilderInterface } if ($query instanceof QueryGroup) { - $query = $this->reduceQueryGroup($query); + $finalQuery = $this->reduceQueryGroup($query); } else { - $query->setString( - $this->getLuceneHelper()->normalizeSearchString($query->getString()) - ); + // Clone the query to avoid modifying the original user-visible query + $finalQuery = clone($query); + $finalQuery->setString($this->getNormalizedQueryString($query)); } + $string = $finalQuery->getString() ?: '*:*'; - $string = $query->getString() ?: '*:*'; - - if ($handler = $this->getSearchHandler($query->getHandler(), $string)) { + if ($handler = $this->getSearchHandler($finalQuery->getHandler(), $string)) { if (!$handler->hasExtendedDismax() && $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string) ) { @@ -155,13 +154,6 @@ class QueryBuilder implements QueryBuilderInterface } } } else if ($handler->hasDismax()) { - // If we're using extended dismax, we'll miss out on the question - // mark fix in createAdvancedInnerSearchString(), so we should - // apply it here. If other query munges arise that are valuable - // to both dismax and edismax, we should add a wrapper function - // around them and call it from here instead of this one very - // specific check. - $string = $this->fixTrailingQuestionMarks($string); $params->set('qf', implode(' ', $handler->getDismaxFields())); $params->set('qt', $handler->getDismaxHandler()); foreach ($handler->getDismaxParams() as $param) { @@ -329,8 +321,7 @@ class QueryBuilder implements QueryBuilderInterface ); } } else { - $searchString = $this->getLuceneHelper() - ->normalizeSearchString($component->getString()); + $searchString = $this->getNormalizedQueryString($component); $searchHandler = $this->getSearchHandler( $component->getHandler(), $searchString @@ -378,7 +369,9 @@ class QueryBuilder implements QueryBuilderInterface */ protected function fixTrailingQuestionMarks($string) { - $multiword = preg_match('/[^\s]\s+[^\s]/', $string); + // Treat colon and whitespace as word separators -- in either case, we + // should add parentheses for accuracy. + $multiword = preg_match('/[^\s][\s:]+[^\s]/', $string); $callback = function ($matches) use ($multiword) { // Make sure all question marks are properly escaped (first unescape // any that are already escaped to prevent double-escapes, then escape @@ -394,11 +387,27 @@ class QueryBuilder implements QueryBuilderInterface // Use a lookahead to skip matches found within quoted phrases. $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)'; $string = preg_replace_callback( - '/([^\s]+\?)(\s|$)' . $lookahead . '/', $callback, $string + '/([^\s:()]+\?)(\s|$)' . $lookahead . '/', $callback, $string ); return rtrim($string); } + /** + * Given a Query object, return a fully normalized version of the query string. + * + * @param Query $query Query object + * + * @return string + */ + protected function getNormalizedQueryString($query) + { + return $this->fixTrailingQuestionMarks( + $this->getLuceneHelper()->normalizeSearchString( + $query->getString() + ) + ); + } + /** * Return advanced inner search string based on input and handler. * @@ -423,9 +432,6 @@ class QueryBuilder implements QueryBuilderInterface return $string; } - // Account for trailing question marks: - $string = $this->fixTrailingQuestionMarks($string); - return $handler ? $handler->createAdvancedQueryString($string, false) : $string; } diff --git a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php index 23fda8d3177452bd3a5634a465918ae9fe4a314f..d9560012a96de0a688e430d37829b227bd7a5415 100644 --- a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php +++ b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php @@ -123,6 +123,12 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase ['start t?his that', 'start t?his that', []], // multiple ? terms: ['start? this?', '((start?) OR (start\?)) ((this?) OR (this\?))', []], + // ? term in field-specific context: + ['xyzzy:this?', 'xyzzy:((this?) OR (this\?))', []], + // ? term in field-specific context w/ extra term: + ['xyzzy:(this? that)', 'xyzzy:(((this?) OR (this\?)) that)', []], + // Multiple fields, one w/ ? term: + ['foo:this? OR bar:tha?t', 'foo:((this?) OR (this\?)) OR bar:tha?t', []], // repeating ? term: ['this? that? this?', '((this?) OR (this\?)) ((that?) OR (that\?)) ((this?) OR (this\?))', []], // ? terms inside quoted phrase (basic flag set to indicate that @@ -132,6 +138,64 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase // @codingStandardsIgnoreEnd } + /** + * Run a test case through a basic query. + * + * @param QueryBuilder $qb Query builder + * @param string $handler Search handler: dismax|edismax|standard + * @param array $test Test to run + * + * @return void + */ + protected function runBasicQuestionTest($qb, $handler, $test) + { + list($input, $output, $flags) = $test; + if ($handler === 'standard' + || ($handler === 'dismax' && empty($flags['basic'])) + ) { + // We expect an extra set of parentheses to be added, unless the + // string contains a colon, in which case some processing will be + // skipped due to field-specific query behavior. + $basicOutput = strstr($output, ':') ? $output : '(' . $output . ')'; + } else { + $basicOutput = $output; + } + $q = new Query($input, 'test'); + $before = $q->getString(); + $response = $qb->build($q); + // Make sure the query builder had no side effects on the query object: + $this->assertEquals($before, $q->getString()); + $processedQ = $response->get('q'); + $this->assertEquals($basicOutput, $processedQ[0]); + } + + /** + * Run a test case through an advanced query. + * + * @param QueryBuilder $qb Query builder + * @param string $handler Search handler: dismax|edismax|standard + * @param array $test Test to run + * + * @return void + */ + protected function runAdvancedQuestionTest($qb, $handler, $test) + { + list($input, $output, $flags) = $test; + if ($handler === 'standard' + || ($handler === 'dismax' && empty($flags['basic'])) + ) { + $advOutput = '((' . $output . '))'; + } else { + $mm = $handler == 'dismax' ? '100%' : '0%'; + $advOutput = "((_query_:\"{!$handler qf=\\\"foo\\\" mm=\\'$mm\\'}" + . addslashes($output) . '"))'; + } + $advancedQ = new QueryGroup('AND', [new Query($input, 'test')]); + $advResponse = $qb->build($advancedQ); + $advProcessedQ = $advResponse->get('q'); + $this->assertEquals($advOutput, $advProcessedQ[0]); + } + /** * Run the standard suite of question mark tests, accounting for differences * between stanard Lucene, basic Dismax and eDismax handlers. @@ -147,16 +211,8 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase $tests = $this->getQuestionTests(); $qb = new QueryBuilder($builderParams); foreach ($tests as $test) { - list($input, $output, $flags) = $test; - if ($handler === 'standard' - || ($handler === 'dismax' && empty($flags['basic'])) - ) { - $output = '(' . $output . ')'; - } - $q = new Query($input, 'test'); - $response = $qb->build($q); - $processedQ = $response->get('q'); - $this->assertEquals($output, $processedQ[0]); + $this->runBasicQuestionTest($qb, $handler, $test); + $this->runAdvancedQuestionTest($qb, $handler, $test); } }