From d1768637a4d8239199b0f37ff8cf8096d417dc69 Mon Sep 17 00:00:00 2001 From: Ere Maijala <ere.maijala@helsinki.fi> Date: Tue, 20 Sep 2016 15:49:09 +0300 Subject: [PATCH] Improved question mark handling (#798) - Handling words with trailing question marks in the middle of the search string just like those at the end were previously handled. --- .../Backend/Solr/QueryBuilder.php | 22 ++++++++---- .../Backend/Solr/QueryBuilderTest.php | 36 ++++++++++++------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 22b59b17840..0c9d8e95019 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -378,15 +378,25 @@ class QueryBuilder implements QueryBuilderInterface */ protected function fixTrailingQuestionMarks($string) { - if (substr($string, -1) == '?' && substr($string, -2) != '\?') { + $multiword = preg_match('/[^\s]\s+[^\s]/', $string); + $callback = function ($matches) use ($multiword) { // Make sure all question marks are properly escaped (first unescape // any that are already escaped to prevent double-escapes, then escape // all of them): - $strippedQuery - = str_replace('?', '\?', str_replace('\?', '?', $string)); - $string = "({$string}) OR (" . $strippedQuery . ")"; - } - return $string; + $s = $matches[1]; + $escaped = str_replace('?', '\?', str_replace('\?', '?', $s)); + $s = "($s) OR ($escaped)"; + if ($multiword) { + $s = "($s) "; + } + return $s; + }; + // Use a lookahead to skip matches found within quoted phrases. + $lookahead = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)'; + $string = preg_replace_callback( + '/([^\s]+\?)(\s|$)' . $lookahead . '/', $callback, $string + ); + return rtrim($string); } /** diff --git a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php index 2ea5b62f115..f966c234a59 100644 --- a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php +++ b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/QueryBuilderTest.php @@ -102,19 +102,35 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase } /** - * Test generation with a query handler + * Return array of [test query, expected result] arrays. * - * @return void + * @return array */ - public function testQueryHandler() + protected function getQuestionTests() { - // Set up an array of expected inputs and outputs: // @codingStandardsIgnoreStart - $tests = [ - ['this?', '((this?) OR (this\?))'],// trailing question mark + return [ + ['this?', '(this?) OR (this\?)'], // trailing question mark + ['this? that', '((this?) OR (this\?)) that'], // question mark after first word + ['start this? that', 'start ((this?) OR (this\?)) that'], // question mark after the middle word + ['start AND this? AND that', 'start AND ((this?) OR (this\?)) AND that'], // question mark with boolean operators + ['start t?his that', 'start t?his that'], // question mark as a wildcard in the middle of a word + ['start? this?', '((start?) OR (start\?)) ((this?) OR (this\?))'], // multiple ? terms + ['this? that? this?', '((this?) OR (this\?)) ((that?) OR (that\?)) ((this?) OR (this\?))'], // repeating ? term + ['"this? that?"', '"this? that?"'], // ? terms inside quoted phrase ]; // @codingStandardsIgnoreEnd + } + /** + * Test generation with a query handler + * + * @return void + */ + public function testQueryHandler() + { + // Set up an array of expected inputs and outputs: + $tests = $this->getQuestionTests(); $qb = new QueryBuilder( [ 'test' => [] @@ -125,7 +141,7 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase $q = new Query($input, 'test'); $response = $qb->build($q); $processedQ = $response->get('q'); - $this->assertEquals($output, $processedQ[0]); + $this->assertEquals('(' . $output . ')', $processedQ[0]); } } @@ -137,11 +153,7 @@ class QueryBuilderTest extends \VuFindTest\Unit\TestCase public function testQueryHandlerWithEdismax() { // Set up an array of expected inputs and outputs: - // @codingStandardsIgnoreStart - $tests = [ - ['this?', '(this?) OR (this\?)'],// trailing question mark - ]; - // @codingStandardsIgnoreEnd + $tests = $this->getQuestionTests(); $qb = new QueryBuilder( [ -- GitLab