From c511f85a769e21294fcb5f8b47d528a1568a72d8 Mon Sep 17 00:00:00 2001
From: Hajo Seng <hajo.seng@sub.uni-hamburg.de>
Date: Thu, 12 Sep 2019 18:04:21 +0200
Subject: [PATCH] Custom Dismax Munge (#1419)

---
 config/vufind/searchspecs.yaml                | 14 ++-
 .../Backend/Solr/QueryBuilder.php             |  1 +
 .../Backend/Solr/SearchHandler.php            | 98 ++++++++++++++-----
 .../Backend/Solr/SearchHandlerTest.php        | 27 ++++-
 4 files changed, 112 insertions(+), 28 deletions(-)

diff --git a/config/vufind/searchspecs.yaml b/config/vufind/searchspecs.yaml
index 23a64e6fe96..a3fd4a6ead7 100644
--- a/config/vufind/searchspecs.yaml
+++ b/config/vufind/searchspecs.yaml
@@ -4,7 +4,8 @@
 # Format is:
 #  searchType:
 #    # CustomMunge is an optional section to define custom pre-processing of
-#    #     user input.  See below for details of munge actions.
+#    #     user input when Dismax does not apply.  See below for details of
+#    #     munge actions.
 #    CustomMunge:
 #      MungeName1:
 #        - [action1, actionParams]
@@ -12,6 +13,15 @@
 #        - [action3, actionParams]
 #      MungeName2:
 #        - [action1, actionParams]
+#    # While CustomMunge only applies to Lucene search, DismaxMunge is a
+#    #     counterpart that applies to Dismax and eDismax queries. DismaxMunge
+#    #     contains a chain of custom munge operations as described below. Note
+#    #     that using this function is NOT RECOMMENDED if you can accomplish your
+#    #     goals more elegantly through Solr schema adjustments, but it may be
+#    #     useful in some situations, such as when you are connecting to a Solr
+#    #     instance that is beyond your direct control. Here is an example stanza:
+#    DismaxMunge:
+#      - [preg_replace, '/^.*$/', '"$1"']
 #    # DismaxFields is optional and defines the fields sent to the Dismax handler
 #    #     when we are able to use it.  QueryFields will be used for advanced
 #    #     searches that Dismax cannot support.  QueryFields is always used if no
@@ -114,7 +124,7 @@
 #
 #-----------------------------------------------------------------------------------
 #
-# Munge types are based on the original Solr.php code, and consist of:
+# Munge types are string manipulation rules, and consist of:
 #
 # onephrase: eliminate all quotes and do it as a single phrase.
 #   testing "one two"
diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php
index 89105257391..cfaee9c6b32 100644
--- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php
+++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php
@@ -144,6 +144,7 @@ class QueryBuilder implements QueryBuilderInterface
         $highlight = !empty($this->fieldsToHighlight);
 
         if ($handler = $this->getSearchHandler($finalQuery->getHandler(), $string)) {
+            $string = $handler->preprocessQueryString($string);
             if (!$handler->hasExtendedDismax()
                 && $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string)
             ) {
diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php
index 84b2da6bbe2..83559813047 100644
--- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php
+++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/SearchHandler.php
@@ -53,7 +53,7 @@ class SearchHandler
      */
     protected static $configKeys = [
         'CustomMunge', 'DismaxFields', 'DismaxHandler', 'QueryFields',
-        'DismaxParams', 'FilterQuery'
+        'DismaxParams', 'FilterQuery', 'DismaxMunge'
     ];
 
     /**
@@ -125,6 +125,22 @@ class SearchHandler
         return $this->createQueryString($search, false);
     }
 
+    /**
+     * Apply standard pre-processing to the query string.
+     *
+     * @param string $search Search string
+     *
+     * @return string
+     */
+    public function preprocessQueryString($search)
+    {
+        // Apply Dismax munging, if required:
+        if ($this->hasDismax()) {
+            return $this->dismaxMunge($search);
+        }
+        return $search;
+    }
+
     /**
      * Return an advanced query string for specified search string.
      *
@@ -381,34 +397,64 @@ class SearchHandler
         foreach ($this->specs['CustomMunge'] as $mungeName => $mungeOps) {
             $mungeValues[$mungeName] = $search;
             foreach ($mungeOps as $operation) {
-                switch ($operation[0]) {
-                case 'append':
-                    $mungeValues[$mungeName] .= $operation[1];
-                    break;
-                case 'lowercase':
-                    $mungeValues[$mungeName] = strtolower($mungeValues[$mungeName]);
-                    break;
-                case 'preg_replace':
-                    $mungeValues[$mungeName] = preg_replace(
-                        $operation[1], $operation[2], $mungeValues[$mungeName]
-                    );
-                    break;
-                case 'ucfirst':
-                    $mungeValues[$mungeName] = ucfirst($mungeValues[$mungeName]);
-                    break;
-                case 'uppercase':
-                    $mungeValues[$mungeName] = strtoupper($mungeValues[$mungeName]);
-                    break;
-                default:
-                    throw new \InvalidArgumentException(
-                        sprintf('Unknown munge operation: %s', $operation[0])
-                    );
-                }
+                $mungeValues[$mungeName]
+                    = $this->customMunge($mungeValues[$mungeName], $operation);
             }
         }
         return $mungeValues;
     }
 
+    /**
+     * Apply custom search string munging to a Dismax query.
+     *
+     * @param string $search searchstring
+     *
+     * @return string
+     */
+    protected function dismaxMunge($search)
+    {
+        foreach ($this->specs['DismaxMunge'] as $operation) {
+            $search = $this->customMunge($search, $operation);
+        }
+        return $search;
+    }
+
+    /**
+     * Apply a munge operation to a search string.
+     *
+     * @param string $string    string to munge
+     * @param array  $operation munge operation
+     *
+     * @return string
+     */
+    protected function customMunge($string, $operation)
+    {
+        switch ($operation[0]) {
+        case 'append':
+            $string .= $operation[1];
+            break;
+        case 'lowercase':
+            $string = strtolower($string);
+            break;
+        case 'preg_replace':
+            $string = preg_replace(
+                $operation[1], $operation[2], $string
+            );
+            break;
+        case 'ucfirst':
+            $string = ucfirst($string);
+            break;
+        case 'uppercase':
+            $string = strtoupper($string);
+            break;
+        default:
+            throw new \InvalidArgumentException(
+                sprintf('Unknown munge operation: %s', $operation[0])
+            );
+        }
+        return $string;
+    }
+
     /**
      * Return query string for specified search string.
      *
@@ -426,7 +472,9 @@ class SearchHandler
         // Extended Dismax available), let's build a Dismax subquery to avoid
         // some of the ugly side effects of our Lucene query generation logic.
         if (($this->hasExtendedDismax() || !$advanced) && $this->hasDismax()) {
-            $query = $this->dismaxSubquery($search);
+            $query = $this->dismaxSubquery(
+                $this->dismaxMunge($search)
+            );
         } else {
             $mungeRules  = $this->mungeRules();
             // Do not munge w/o rules
diff --git a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php
index 206ffc7e6cc..143d5eb82a5 100644
--- a/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php
+++ b/module/VuFindSearch/tests/unit-tests/src/VuFindTest/Backend/Solr/SearchHandlerTest.php
@@ -75,7 +75,7 @@ class SearchHandlerTest extends TestCase
     {
         $spec = ['DismaxParams' => [['foo', 'bar'], ['mm', '100%']], 'DismaxFields' => ['field1', 'field2']];
         $hndl = new SearchHandler($spec);
-        $defaults = ['CustomMunge' => [], 'DismaxHandler' => 'dismax', 'QueryFields' => [], 'FilterQuery' => []];
+        $defaults = ['CustomMunge' => [], 'DismaxHandler' => 'dismax', 'QueryFields' => [], 'FilterQuery' => [], 'DismaxMunge' => []];
         $this->assertEquals($spec + $defaults, $hndl->toArray());
     }
 
@@ -131,4 +131,29 @@ class SearchHandlerTest extends TestCase
             $hndl->createSimpleQueryString('abc"123*')
         );
     }
+
+    /**
+     * Test dismax munge rules.
+     *
+     * @return void
+     */
+    public function testPreprocessQueryString()
+    {
+        // fake munge rules based on a simplified version of default searchspecs.yaml
+        $spec = [
+            'DismaxMunge' => [
+                ['uppercase'],
+                ['preg_replace', '/[ "]/', ""],
+                ['preg_replace', '/\*+$/', ""]
+            ],
+            'DismaxFields' => ['callnumber'],
+            'DismaxHandler' => 'dismax'
+        ];
+
+        $hndl = new SearchHandler($spec);
+        $this->assertEquals(
+            'ABC123',
+            $hndl->preprocessQueryString('abc"123*')
+        );
+    }
 }
-- 
GitLab