From 52eaab95392ec1c70cbff0c77e68db6ec49fbfdc Mon Sep 17 00:00:00 2001
From: Dorian Merz <merz@ub.uni-leipzig.de>
Date: Thu, 17 Jun 2021 08:24:40 +0200
Subject: [PATCH] refs #19761 [finc] override lucene syntax detection via
 search-pre listener
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* set non-lucene-syntax handling configurable on a per-SearchType basis

co-authored by: André Lahmann <lahmann@ub.uni-leipzig.de>
* added some more comments to searches.ini
---
 local/config/vufind/searches.ini              |  9 +++
 .../Service/LuceneSyntaxHelperOverride.php    | 63 +++++++++++++++++++
 .../MungerInjectionDelegatorFactory.php       | 55 ++++++++++++++++
 3 files changed, 127 insertions(+)
 create mode 100644 module/finc/src/finc/Service/LuceneSyntaxHelperOverride.php

diff --git a/local/config/vufind/searches.ini b/local/config/vufind/searches.ini
index 1dbec8eddd3..82440759a23 100644
--- a/local/config/vufind/searches.ini
+++ b/local/config/vufind/searches.ini
@@ -98,6 +98,15 @@ retain_filters_by_default = true
 ;default_filters[] = "institution:MyInstitution"
 ;default_filters[] = "(format:Book AND institution:MyInstitution)"
 
+; Avoid query rebuilding in case the user injects a query in lucene like syntax.
+; Provide an array of Search Types for which the lucene query syntax detection
+; MUST NOT be applied.
+; Note: this does not work for Search Types used in advanced search as avoiding
+;       advanced lucene detection needs would have to be set either for all
+;       Search Types used in advanced search or for none at all. Disabling
+;       advanced lucene detection for a search does not work partially!
+;override_advanced_lucene_detection[] = Signatur;
+
 [Cache]
 ; This controls whether the parsed searchspecs.yaml file will be stored to
 ; improve search performance; legal options are APC (use APC cache), File (store
diff --git a/module/finc/src/finc/Service/LuceneSyntaxHelperOverride.php b/module/finc/src/finc/Service/LuceneSyntaxHelperOverride.php
new file mode 100644
index 00000000000..0629e85c8cf
--- /dev/null
+++ b/module/finc/src/finc/Service/LuceneSyntaxHelperOverride.php
@@ -0,0 +1,63 @@
+<?php
+/**
+ * LuceneSyntaxHelper Override
+ *
+ * A Delegator Factory that registers several listeners at events triggered by the
+ * VuFind\Search service.
+ *
+ * PHP version 7
+ *
+ * Copyright (C) Leipzig University Library 2021.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * @category VuFind
+ * @package  Finc/Service
+ * @author   Dorian Merz <merz@ub.uni-leipzig.de>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     https://vufind.org Main Page
+ */
+namespace finc\Service;
+
+/**
+ * LuceneSyntaxHelper Override
+ *
+ * @category VuFind
+ * @package  Finc/Service
+ * @author   Dorian Merz <merz@ub.uni-leipzig.de>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     https://vufind.org Main Page
+ */
+class LuceneSyntaxHelperOverride extends \VuFindSearch\Backend\Solr\LuceneSyntaxHelper
+{
+    public function __construct(\VuFindSearch\Backend\Solr\LuceneSyntaxHelper $parentHelper)
+    {
+        parent::__construct(
+            $parentHelper->caseSensitiveBooleans,
+            $parentHelper->caseSensitiveRanges
+        );
+    }
+
+    /**
+     * {@inheritDoc}
+     * overridden to avoid query rebuilding
+     *
+     * @param string $searchString
+     * @return false
+     */
+    public function containsAdvancedLuceneSyntax($searchString)
+    {
+        return false;
+    }
+}
diff --git a/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php b/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php
index 9bc29e20a83..a7e35943f00 100644
--- a/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php
+++ b/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php
@@ -65,6 +65,11 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface
      */
     protected $shards_to_register;
 
+    /**
+     * @var array search types to be excluded from lucene query syntax detection
+     */
+    protected $override_advanced_lucene_detection;
+
     /**
      * Creates a delegator of VuFind/Search to register several listeners.
      *
@@ -106,6 +111,12 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface
             $this->shards_to_register = $shards;
             $e->attach('VuFindSearch', 'pre', [$this, 'registerShards']);
         }
+        if (isset($searchConfig->General->override_advanced_lucene_detection)) {
+            $this->override_advanced_lucene_detection = array_flip($searchConfig->General->override_advanced_lucene_detection->toArray());
+            if (!empty($this->override_advanced_lucene_detection)) {
+                $e->attach('VuFindSearch', 'pre', [$this, 'overrideLuceneSyntaxHelper']);
+            }
+        }
         return $instance;
     }
 
@@ -180,4 +191,48 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface
             $params->set('shards', implode(',', $this->shards_to_register));
         }
     }
+
+    public function overrideLuceneSyntaxHelper(EventInterface $event)
+    {
+        $params = $event->getParams();
+        if (
+            $params['context'] == 'search'
+            && isset($params['query'])
+            && $this->isLuceneReducible($params['query'])
+        ) {
+            $builder = $event->getTarget()->getQueryBuilder();
+            $currentHelper = $builder->getLuceneHelper();
+            $newHelper = new LuceneSyntaxHelperOverride($currentHelper);
+            $builder->setLuceneHelper($newHelper);
+        }
+    }
+
+
+    /**
+     * Escapes colons in Queries or recursively in QueryGroups.
+     * This prevents queries from being interpreted as advanced queries in Lucene syntax.
+     * cf. \VuFindSearch\Backend\Solr\LuceneSyntaxHelper::containsAdvancedLuceneSyntax
+     *
+     * @param Query|QueryGroup $queryOrGroup
+     *
+     * @return mixed
+     */
+    private function isLuceneReducible($queryOrGroup)
+    {
+        if ($queryOrGroup instanceof QueryGroup) {
+            $handler = $queryOrGroup->getReducedHandler();
+            if (is_null($handler) || isset($this->override_advanced_lucene_detection[$handler])) {
+                foreach ($queryOrGroup->getQueries() as $query) {
+                    // cycle through all recursive subqueries, if any of these matches
+                    // the whole query should not be checked for lucene syntax
+                    if ($this->isLuceneReducible($query)) {
+                        return true;
+                    }
+                }
+            }
+        } elseif (isset($this->override_advanced_lucene_detection[$queryOrGroup->getHandler()])) {
+            return true;
+        }
+        return false;
+    }
 }
-- 
GitLab