From c511f85a769e21294fcb5f8b47d528a1568a72d8 Mon Sep 17 00:00:00 2001 From: Hajo Seng <hajo.seng@sub.uni-hamburg.de> Date: Thu, 12 Sep 2019 18:04:21 +0200 Subject: [PATCH] Custom Dismax Munge (#1419) --- config/vufind/searchspecs.yaml | 14 ++- .../Backend/Solr/QueryBuilder.php | 1 + .../Backend/Solr/SearchHandler.php | 98 ++++++++++++++----- .../Backend/Solr/SearchHandlerTest.php | 27 ++++- 4 files changed, 112 insertions(+), 28 deletions(-) diff --git a/config/vufind/searchspecs.yaml b/config/vufind/searchspecs.yaml index 23a64e6fe96..a3fd4a6ead7 100644 --- a/config/vufind/searchspecs.yaml +++ b/config/vufind/searchspecs.yaml @@ -4,7 +4,8 @@ # Format is: # searchType: # # CustomMunge is an optional section to define custom pre-processing of -# # user input. See below for details of munge actions. +# # user input when Dismax does not apply. See below for details of +# # munge actions. # CustomMunge: # MungeName1: # - [action1, actionParams] @@ -12,6 +13,15 @@ # - [action3, actionParams] # MungeName2: # - [action1, actionParams] +# # While CustomMunge only applies to Lucene search, DismaxMunge is a +# # counterpart that applies to Dismax and eDismax queries. DismaxMunge +# # contains a chain of custom munge operations as described below. Note +# # that using this function is NOT RECOMMENDED if you can accomplish your +# # goals more elegantly through Solr schema adjustments, but it may be +# # useful in some situations, such as when you are connecting to a Solr +# # instance that is beyond your direct control. Here is an example stanza: +# DismaxMunge: +# - [preg_replace, '/^.*$/', '"$1"'] # # DismaxFields is optional and defines the fields sent to the Dismax handler # # when we are able to use it. QueryFields will be used for advanced # # searches that Dismax cannot support. QueryFields is always used if no @@ -114,7 +124,7 @@ # #----------------------------------------------------------------------------------- # -# Munge types are based on the original Solr.php code, and consist of: +# Munge types are string manipulation rules, and consist of: # # onephrase: eliminate all quotes and do it as a single phrase. # testing "one two" diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 89105257391..cfaee9c6b32 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -144,6 +144,7 @@ class QueryBuilder implements QueryBuilderInterface $highlight = !empty($this->fieldsToHighlight); if ($handler = $this->getSearchHandler($finalQuery->getHandler(), $string)) { + $string = $handler->preprocessQueryString($string); if (!$handler->hasExtendedDismax() && $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string) ) { diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php index 84b2da6bbe2..83559813047 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php @@ -53,7 +53,7 @@ class SearchHandler */ protected static $configKeys = [ 'CustomMunge', 'DismaxFields', 'DismaxHandler', 'QueryFields', - 'DismaxParams', 'FilterQuery' + 'DismaxParams', 'FilterQuery', 'DismaxMunge' ]; /** @@ -125,6 +125,22 @@ class SearchHandler return $this->createQueryString($search, false); } + /** + * Apply standard pre-processing to the query string. + * + * @param string $search Search string + * + * @return string + */ + public function preprocessQueryString($search) + { + // Apply Dismax munging, if required: + if ($this->hasDismax()) { + return $this->dismaxMunge($search); + } + return $search; + } + /** * Return an advanced query string for specified search string. * @@ -381,34 +397,64 @@ class SearchHandler foreach ($this->specs['CustomMunge'] as $mungeName => $mungeOps) { $mungeValues[$mungeName] = $search; foreach ($mungeOps as $operation) { - switch ($operation[0]) { - case 'append': - $mungeValues[$mungeName] .= $operation[1]; - break; - case 'lowercase': - $mungeValues[$mungeName] = strtolower($mungeValues[$mungeName]); - break; - case 'preg_replace': - $mungeValues[$mungeName] = preg_replace( - $operation[1], $operation[2], $mungeValues[$mungeName] - ); - break; - case 'ucfirst': - $mungeValues[$mungeName] = ucfirst($mungeValues[$mungeName]); - break; - case 'uppercase': - $mungeValues[$mungeName] = strtoupper($mungeValues[$mungeName]); - break; - default: - throw new \InvalidArgumentException( - sprintf('Unknown munge operation: %s', $operation[0]) - ); - } + $mungeValues[$mungeName] + = $this->customMunge($mungeValues[$mungeName], $operation); } } return $mungeValues; } + /** + * Apply custom search string munging to a Dismax query. + * + * @param string $search searchstring + * + * @return string + */ + protected function dismaxMunge($search) + { + foreach ($this->specs['DismaxMunge'] as $operation) { + $search = $this->customMunge($search, $operation); + } + return $search; + } + + /** + * Apply a munge operation to a search string. + * + * @param string $string string to munge + * @param array $operation munge operation + * + * @return string + */ + protected function customMunge($string, $operation) + { + switch ($operation[0]) { + case 'append': + $string .= $operation[1]; + break; + case 'lowercase': + $string = strtolower($string); + break; + case 'preg_replace': + $string = preg_replace( + $operation[1], $operation[2], $string + ); + break; + case 'ucfirst': + $string = ucfirst($string); + break; + case 'uppercase': + $string = strtoupper($string); + break; + default: + throw new \InvalidArgumentException( + sprintf('Unknown munge operation: %s', $operation[0]) + ); + } + return $string; + } + /** * Return query string for specified search string. * @@ -426,7 +472,9 @@ class SearchHandler // Extended Dismax available), let's build a Dismax subquery to avoid // some of the ugly side effects of our Lucene query generation logic. if (($this->hasExtendedDismax() || !$advanced) && $this->hasDismax()) { - $query = $this->dismaxSubquery($search); + $query = $this->dismaxSubquery( + $this->dismaxMunge($search) + ); } else { $mungeRules = $this->mungeRules(); // Do not munge w/o rules diff --git a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php index 206ffc7e6cc..143d5eb82a5 100644 --- a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php +++ b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php @@ -75,7 +75,7 @@ class SearchHandlerTest extends TestCase { $spec = ['DismaxParams' => [['foo', 'bar'], ['mm', '100%']], 'DismaxFields' => ['field1', 'field2']]; $hndl = new SearchHandler($spec); - $defaults = ['CustomMunge' => [], 'DismaxHandler' => 'dismax', 'QueryFields' => [], 'FilterQuery' => []]; + $defaults = ['CustomMunge' => [], 'DismaxHandler' => 'dismax', 'QueryFields' => [], 'FilterQuery' => [], 'DismaxMunge' => []]; $this->assertEquals($spec + $defaults, $hndl->toArray()); } @@ -131,4 +131,29 @@ class SearchHandlerTest extends TestCase $hndl->createSimpleQueryString('abc"123*') ); } + + /** + * Test dismax munge rules. + * + * @return void + */ + public function testPreprocessQueryString() + { + // fake munge rules based on a simplified version of default searchspecs.yaml + $spec = [ + 'DismaxMunge' => [ + ['uppercase'], + ['preg_replace', '/[ "]/', ""], + ['preg_replace', '/\*+$/', ""] + ], + 'DismaxFields' => ['callnumber'], + 'DismaxHandler' => 'dismax' + ]; + + $hndl = new SearchHandler($spec); + $this->assertEquals( + 'ABC123', + $hndl->preprocessQueryString('abc"123*') + ); + } } -- GitLab