From c3b649b1acf0976a19734f818cb4e80c95fbbbaa Mon Sep 17 00:00:00 2001 From: Dorian Merz <merz@ub.uni-leipzig.de> Date: Tue, 22 Jan 2019 10:50:23 +0100 Subject: [PATCH] refs #14353 - search for callnumber w/ colon * introduces MungerInjectionFactory * registers several listeners on Search Pre event ** escapes colons in queries for configured handlers (e.g. Signatur) ** registers all configured shards for every query --- local/config/vufind/searches.ini | 10 ++ module/finc/config/module.config.php | 10 +- .../finc/Service/MungerInjectionFactory.php | 109 ++++++++++++++++++ 3 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 module/finc/src/finc/Service/MungerInjectionFactory.php diff --git a/local/config/vufind/searches.ini b/local/config/vufind/searches.ini index ab20a8fc4da..f6d6e5cdecf 100644 --- a/local/config/vufind/searches.ini +++ b/local/config/vufind/searches.ini @@ -98,6 +98,11 @@ retain_filters_by_default = true ;default_filters[] = "institution:MyInstitution" ;default_filters[] = "(format:Book AND institution:MyInstitution)" +; the escaped_colon_searches is used by a listener on the search-pre event +; registered by the MungerInjectionFactory. This listener masks colons in the query string with a backslash +; whenever the search handler is one of the following +escaped_colon_searches[] = "Signatur" + [Cache] ; This controls whether the parsed searchspecs.yaml file will be stored to ; improve search performance; legal options are APC (use APC cache), File (store @@ -554,6 +559,11 @@ topic = "Subjects" ; specified preferences using checkboxes (default if commented out = all shards): ;defaultChecked[] = "Library Catalog" ;defaultChecked[] = "Website" +; The following line defines shards that shall NOT be queried by solr requests +; all other shards present in the [IndexShards] section will always be added to any solr query +; via a listener from the MungerInjectionFactory +;on_user_search_only = "finc-live" + ; Fields must be stripped if you have a field in your main index which is missing ; from any index includable by shards. This section can be ignored if you are diff --git a/module/finc/config/module.config.php b/module/finc/config/module.config.php index b6b91aa270e..2f5bcbbc70e 100644 --- a/module/finc/config/module.config.php +++ b/module/finc/config/module.config.php @@ -13,7 +13,15 @@ $config = [ 'VuFind\Export' => 'finc\Service\Factory::getExport', 'VuFind\SessionManager' => 'finc\Session\ManagerFactory', 'VuFind\CookieManager' => 'finc\Service\Factory::getCookieManager' - ] + ], + 'invokables' => [ + 'mungerinjectionfactory' => 'finc\Service\MungerInjectionFactory' + ], + 'delegators' => [ + 'VuFind\Search' => [ + 'mungerinjectionfactory' + ], + ], ], 'controllers' => [ 'factories' => [ diff --git a/module/finc/src/finc/Service/MungerInjectionFactory.php b/module/finc/src/finc/Service/MungerInjectionFactory.php new file mode 100644 index 00000000000..b0a4460f826 --- /dev/null +++ b/module/finc/src/finc/Service/MungerInjectionFactory.php @@ -0,0 +1,109 @@ +<?php + +namespace finc\Service; + +use VuFindSearch\Query\Query, + VuFindSearch\Query\QueryGroup; +use Zend\Config\Config; +use Zend\ServiceManager\DelegatorFactoryInterface, + Zend\ServiceManager\ServiceLocatorInterface; +use Zend\EventManager\EventInterface; +use VuFindSearch\Service as SearchService; + +/** + * Class MungerInjectionFactory + * A Delegator Factory that registers several listeners at events triggered by the VuFind\Search service. + * @package finc\Service + */ +class MungerInjectionFactory implements DelegatorFactoryInterface { + + /** + * @var SearchService + */ + protected $instance; + + /** + * @var array names of search handlers for which colons should be escaped + */ + protected $searches_to_escape; + + /** + * @var array shard configuration to register in all queries + */ + protected $shards_to_register; + + /** + * Creates a delegator of VuFind/Search to register several listeners. + * @param ServiceLocatorInterface $serviceLocator + * @param string $name + * @param string $requestedName + * @param callable $callback + * @return mixed + */ + public function createDelegatorWithName(ServiceLocatorInterface $serviceLocator, $name, $requestedName, $callback) + { + $instance = call_user_func($callback); + $searchConfig = $serviceLocator->get('VuFind\Config')->get('searches'); + $e = $instance->getEventManager()->getSharedManager(); + $handlers = $searchConfig->General->escaped_colon_searches; + if (!empty($handlers)) { + $this->searches_to_escape = $handlers->toArray(); + $e->attach('VuFind\Search', 'pre', + function (EventInterface $event) { + $params = $event->getParams(); + if (isset($params['query'])) { + $params['query'] = $this->escapeColons($params['query']); + } + } + ); + } + $shards = $searchConfig->IndexShards->toArray(); + if ($excludedShards = $searchConfig->ShardPreferences->on_user_search_only) { + $shards = array_diff_key($shards, array_flip(explode(',', $excludedShards))); + } + if (!empty($shards)) { + $this->shards_to_register = $shards; + $e->attach('VuFind\Search', 'pre', [$this, 'registerShards']); + } + return $instance; + } + + /** + * Escapes colons in Queries or recursively in QueryGroups. + * This prevents queries from being interpreted as advanced queries in Lucene syntax. + * cf. \VuFindSearch\Backend\Solr\LuceneSyntaxHelper::containsAdvancedLuceneSyntax + * @param Query|QueryGroup $queryOrGroup + * @return mixed + */ + private function escapeColons($queryOrGroup) { + + if ($queryOrGroup instanceof QueryGroup) { + $handler = $queryOrGroup->getReducedHandler(); + if (is_null($handler) || in_array($handler,$this->searches_to_escape)) { + foreach ($queryOrGroup->getQueries() as $query) { + $this->escapeColons($query); + } + } + } elseif (in_array($queryOrGroup->getHandler(),$this->searches_to_escape)) { + $queryOrGroup->setString( + // mask whitespaces that follow a colon + // that avoids the removal of that very colon via + // \VuFindSearch\Backend\Solr\LuceneSyntaxHelper::normalizeColons + preg_replace('/(?<=\:)\s/', '\ ', $queryOrGroup->getString()) + ); + } + return $queryOrGroup; + } + + /** + * Event Listener on Search/Pre that registers all configured shards for every search request + * @param EventInterface $event + */ + public function registerShards(EventInterface $event) { + + $params = $event->getParam('params'); + if (empty($params->get('shards'))) { + $params->set('shards',implode(',',$this->shards_to_register)); + } + } +} \ No newline at end of file -- GitLab