From 835f253ac73be050126e68243533c4e12c7bd3b6 Mon Sep 17 00:00:00 2001
From: Dorian Merz <merz@ub.uni-leipzig.de>
Date: Thu, 23 Jul 2020 14:34:16 +0200
Subject: [PATCH] refs #17528 [master] refactor MungerInjection

* can now apply more than one type of pre munge
* works for other query handlers now
* TODO: adapt running instances
---
 local/config/vufind/searches.ini              | 15 ++++--
 .../MungerInjectionDelegatorFactory.php       | 53 ++++++++++++++-----
 2 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/local/config/vufind/searches.ini b/local/config/vufind/searches.ini
index f6d6e5cdecf..1755366b266 100644
--- a/local/config/vufind/searches.ini
+++ b/local/config/vufind/searches.ini
@@ -98,11 +98,6 @@ retain_filters_by_default = true
 ;default_filters[] = "institution:MyInstitution"
 ;default_filters[] = "(format:Book AND institution:MyInstitution)"
 
-; the escaped_colon_searches is used by a listener on the search-pre event
-; registered by the MungerInjectionFactory. This listener masks colons in the query string with a backslash
-; whenever the search handler is one of the following
-escaped_colon_searches[] = "Signatur"
-
 [Cache]
 ; This controls whether the parsed searchspecs.yaml file will be stored to
 ; improve search performance; legal options are APC (use APC cache), File (store
@@ -677,3 +672,13 @@ height = 320
 ;params = "qf=title,title_short,callnumber-label,topic,language,author,publishDate mintf=1 mindf=1";
 ; This setting can be used to limit the maximum number of suggestions. Default is 5.
 ;count = 5
+
+; PreMunge is used by a listener on the search-pre event
+; registered by the MungerInjectionDelegatorFactory. This listener applies
+; the regex patterns as configured for the search handlers
+; use the following configuration to define pattern and replacement as used in @see preg_replace
+; Handler[pattern] = regex_pattern with delimiters (e.g. slashes at beginning and end)
+; Handler[replace] = replacement
+[PreMunge]
+Signatur[pattern] = '/(?<=\:)\s/'
+Signatur[replace] = '\ '
\ No newline at end of file
diff --git a/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php b/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php
index 1d65c7b2af5..9bc29e20a83 100644
--- a/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php
+++ b/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php
@@ -56,9 +56,9 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface
     protected $instance;
 
     /**
-     * @var array names of search handlers for which colons should be escaped
+     * @var array configuration for pre-search munging
      */
-    protected $searches_to_escape;
+    protected $preMungerConfig;
 
     /**
      * @var array shard configuration to register in all queries
@@ -86,16 +86,14 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface
         $searchConfig = $container->get('VuFind\Config')->get('searches');
         $e = $instance->getEventManager()->getSharedManager();
 
-        $handlers = $searchConfig->General->escaped_colon_searches;
-        if (!empty($handlers)) {
-            $this->searches_to_escape = $handlers->toArray();
+        if ($this->validateMungerConfig($searchConfig)) {
             $e->attach(
                 'VuFindSearch',
                 'pre',
                 function (EventInterface $event) {
                     $params = $event->getParams();
                     if (isset($params['query'])) {
-                        $params['query'] = $this->escapeColons($params['query']);
+                        $params['query'] = $this->preMunge($params['query']);
                     }
                 }
             );
@@ -120,26 +118,53 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface
      *
      * @return mixed
      */
-    private function escapeColons($queryOrGroup)
+    private function preMunge($queryOrGroup)
     {
         if ($queryOrGroup instanceof QueryGroup) {
             $handler = $queryOrGroup->getReducedHandler();
-            if (is_null($handler) || in_array($handler, $this->searches_to_escape)) {
+            if (is_null($handler) || isset($this->preMungerConfig[$handler])) {
                 foreach ($queryOrGroup->getQueries() as $query) {
-                    $this->escapeColons($query);
+                    $this->preMunge($query);
                 }
             }
-        } elseif (in_array($queryOrGroup->getHandler(), $this->searches_to_escape)) {
+        } elseif ($mungerConfig = $this->preMungerConfig[$queryOrGroup->getHandler()] ?? null) {
             $queryOrGroup->setString(
-                // mask whitespaces that follow a colon
-                // that avoids the removal of that very colon via
-                // \VuFindSearch\Backend\Solr\LuceneSyntaxHelper::normalizeColons
-                preg_replace('/(?<=\:)\s/', '\ ', $queryOrGroup->getString())
+                // apply preg_replace as provided by config
+                preg_replace($mungerConfig['pattern'], $mungerConfig['replace'], $queryOrGroup->getString())
             );
         }
         return $queryOrGroup;
     }
 
+    /**
+     * @param Config $searchConfig
+     */
+    protected function validateMungerConfig($searchConfig) {
+
+        $config = $searchConfig->PreMunge;
+        if (empty($config)) {
+            // an empty configuration is always valid
+            return null;
+        }
+        $preMungerConfig = $config->toArray();
+        foreach ($preMungerConfig as $handler => $handlerConfig) {
+            if (!isset($handlerConfig['pattern'])
+                ||
+                !isset($handlerConfig['replace'])
+            ) {
+                throw new \ConfigurationException("PreMunge configuration for $handler is invalid");
+            }
+            try {
+                // try preg_replace so the regex engine tells us about more errors
+                preg_replace($handlerConfig['pattern'], $handlerConfig['replace'], '');
+            } catch (\ErrorException $e) {
+                throw new \ConfigurationException("PreMunge configuration for $handler is invalid. Regex error: ".'"'.$e->getMessage().'"');
+            }
+        }
+        $this->preMungerConfig = $preMungerConfig;
+        return true;
+    }
+
     /**
      * Event Listener on Search/Pre that registers all configured shards for every
      * search request
-- 
GitLab