From 835f253ac73be050126e68243533c4e12c7bd3b6 Mon Sep 17 00:00:00 2001 From: Dorian Merz <merz@ub.uni-leipzig.de> Date: Thu, 23 Jul 2020 14:34:16 +0200 Subject: [PATCH] refs #17528 [master] refactor MungerInjection * can now apply more than one type of pre munge * works for other query handlers now * TODO: adapt running instances --- local/config/vufind/searches.ini | 15 ++++-- .../MungerInjectionDelegatorFactory.php | 53 ++++++++++++++----- 2 files changed, 49 insertions(+), 19 deletions(-) diff --git a/local/config/vufind/searches.ini b/local/config/vufind/searches.ini index f6d6e5cdecf..1755366b266 100644 --- a/local/config/vufind/searches.ini +++ b/local/config/vufind/searches.ini @@ -98,11 +98,6 @@ retain_filters_by_default = true ;default_filters[] = "institution:MyInstitution" ;default_filters[] = "(format:Book AND institution:MyInstitution)" -; the escaped_colon_searches is used by a listener on the search-pre event -; registered by the MungerInjectionFactory. This listener masks colons in the query string with a backslash -; whenever the search handler is one of the following -escaped_colon_searches[] = "Signatur" - [Cache] ; This controls whether the parsed searchspecs.yaml file will be stored to ; improve search performance; legal options are APC (use APC cache), File (store @@ -677,3 +672,13 @@ height = 320 ;params = "qf=title,title_short,callnumber-label,topic,language,author,publishDate mintf=1 mindf=1"; ; This setting can be used to limit the maximum number of suggestions. Default is 5. ;count = 5 + +; PreMunge is used by a listener on the search-pre event +; registered by the MungerInjectionDelegatorFactory. This listener applies +; the regex patterns as configured for the search handlers +; use the following configuration to define pattern and replacement as used in @see preg_replace +; Handler[pattern] = regex_pattern with delimiters (e.g. slashes at beginning and end) +; Handler[replace] = replacement +[PreMunge] +Signatur[pattern] = '/(?<=\:)\s/' +Signatur[replace] = '\ ' \ No newline at end of file diff --git a/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php b/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php index 1d65c7b2af5..9bc29e20a83 100644 --- a/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php +++ b/module/finc/src/finc/Service/MungerInjectionDelegatorFactory.php @@ -56,9 +56,9 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface protected $instance; /** - * @var array names of search handlers for which colons should be escaped + * @var array configuration for pre-search munging */ - protected $searches_to_escape; + protected $preMungerConfig; /** * @var array shard configuration to register in all queries @@ -86,16 +86,14 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface $searchConfig = $container->get('VuFind\Config')->get('searches'); $e = $instance->getEventManager()->getSharedManager(); - $handlers = $searchConfig->General->escaped_colon_searches; - if (!empty($handlers)) { - $this->searches_to_escape = $handlers->toArray(); + if ($this->validateMungerConfig($searchConfig)) { $e->attach( 'VuFindSearch', 'pre', function (EventInterface $event) { $params = $event->getParams(); if (isset($params['query'])) { - $params['query'] = $this->escapeColons($params['query']); + $params['query'] = $this->preMunge($params['query']); } } ); @@ -120,26 +118,53 @@ class MungerInjectionDelegatorFactory implements DelegatorFactoryInterface * * @return mixed */ - private function escapeColons($queryOrGroup) + private function preMunge($queryOrGroup) { if ($queryOrGroup instanceof QueryGroup) { $handler = $queryOrGroup->getReducedHandler(); - if (is_null($handler) || in_array($handler, $this->searches_to_escape)) { + if (is_null($handler) || isset($this->preMungerConfig[$handler])) { foreach ($queryOrGroup->getQueries() as $query) { - $this->escapeColons($query); + $this->preMunge($query); } } - } elseif (in_array($queryOrGroup->getHandler(), $this->searches_to_escape)) { + } elseif ($mungerConfig = $this->preMungerConfig[$queryOrGroup->getHandler()] ?? null) { $queryOrGroup->setString( - // mask whitespaces that follow a colon - // that avoids the removal of that very colon via - // \VuFindSearch\Backend\Solr\LuceneSyntaxHelper::normalizeColons - preg_replace('/(?<=\:)\s/', '\ ', $queryOrGroup->getString()) + // apply preg_replace as provided by config + preg_replace($mungerConfig['pattern'], $mungerConfig['replace'], $queryOrGroup->getString()) ); } return $queryOrGroup; } + /** + * @param Config $searchConfig + */ + protected function validateMungerConfig($searchConfig) { + + $config = $searchConfig->PreMunge; + if (empty($config)) { + // an empty configuration is always valid + return null; + } + $preMungerConfig = $config->toArray(); + foreach ($preMungerConfig as $handler => $handlerConfig) { + if (!isset($handlerConfig['pattern']) + || + !isset($handlerConfig['replace']) + ) { + throw new \ConfigurationException("PreMunge configuration for $handler is invalid"); + } + try { + // try preg_replace so the regex engine tells us about more errors + preg_replace($handlerConfig['pattern'], $handlerConfig['replace'], ''); + } catch (\ErrorException $e) { + throw new \ConfigurationException("PreMunge configuration for $handler is invalid. Regex error: ".'"'.$e->getMessage().'"'); + } + } + $this->preMungerConfig = $preMungerConfig; + return true; + } + /** * Event Listener on Search/Pre that registers all configured shards for every * search request -- GitLab