From 41db58c76cf8e2d8c82bb63a22d4e0fbf449557a Mon Sep 17 00:00:00 2001 From: Ere Maijala <ere.maijala@helsinki.fi> Date: Mon, 6 Jan 2014 12:26:29 -0500 Subject: [PATCH] Added support for "exact" searches - Exact search = search terms in quotes using different search specs. --- config/vufind/searchspecs.yaml | 52 +++++++++++++++++++ .../Backend/Solr/QueryBuilder.php | 48 +++++++++++++---- 2 files changed, 91 insertions(+), 9 deletions(-) diff --git a/config/vufind/searchspecs.yaml b/config/vufind/searchspecs.yaml index 5f5f86fbd41..e9733937d27 100644 --- a/config/vufind/searchspecs.yaml +++ b/config/vufind/searchspecs.yaml @@ -51,6 +51,12 @@ # # for an example. This is applied whether we use DismaxFields or # # QueryFields. # FilterQuery: (optional Lucene filter query) +# ExactSettings: +# DismaxFields: ... +# QueryFields: ... +# # All the same settings as above, but for exact searches, i.e. search terms +# # enclosed in quotes. Allows different fields or weights for exact +# # searches. See below for commented-out examples. # # ...etc. # @@ -208,6 +214,14 @@ Subject: era: - [and, 100] - [or, ~] +# ExactSettings: +# DismaxFields: +# - topic_unstemmed^150 +# QueryFields: +# - topic_unstemmed: +# - [onephrase, 350] +# - [and, 150] +# - [or, ~] # This field definition is a compromise that supports both journal-level and # article-level data. The disadvantage is that hits in article titles will @@ -253,6 +267,14 @@ JournalTitle: - [onephrase, 50] - [and , ~] FilterQuery: "format:Journal OR format:Article" +# ExactSettings: +# DismaxFields: +# - title_full_unstemmed^450 +# QueryFields: +# - title_full_unstemmed: +# - [onephrase, 450] +# - [and, 400] +# FilterQuery: "format:Journal OR format:Article" Title: DismaxFields: @@ -288,6 +310,13 @@ Title: series2: - [onephrase, 50] - [and , ~] +# ExactSettings: +# DismaxFields: +# - title_full_unstemmed^450 +# QueryFields: +# - title_full_unstemmed: +# - [onephrase, 450] +# - [and, 400] Series: DismaxFields: @@ -382,6 +411,29 @@ AllFields: - [onephrase, ~] issn: - [onephrase, ~] +# ExactSettings: +# DismaxFields: +# - title_full_unstemmed^600 +# - topic_unstemmed^550 +# - allfields_unstemmed^10 +# - fulltext_unstemmed^10 +# - isbn +# - issn +# QueryFields: +# title_full_unstemmed: +# - [onephrase, 600] +# - [and, 500] +# topic_unstemmed: +# - [onephrase, 550] +# - [and, 500] +# allfields_unstemmed: +# - [or, 10] +# fulltext_unstemmed: +# - [or, 10] +# isbn: +# - [onephrase, ~] +# issn: +# - [onephrase, ~] # These are advanced searches that never use Dismax: id: diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php index 2571bdc92db..cc821c5dfc9 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/QueryBuilder.php @@ -61,7 +61,14 @@ class QueryBuilder implements QueryBuilderInterface * * @var array */ - protected $specs; + protected $specs = array(); + + /** + * Search specs for exact searches. + * + * @var array + */ + protected $exactSpecs = array(); /** * Should we create the hl.q parameter when appropriate? @@ -128,7 +135,7 @@ class QueryBuilder implements QueryBuilderInterface } $string = $query->getString() ?: '*:*'; - $handler = $this->getSearchHandler($query->getHandler()); + $handler = $this->getSearchHandler($query->getHandler(), $string); if (!($handler && $handler->hasExtendedDismax()) && $this->getLuceneHelper()->containsAdvancedLuceneSyntax($string) @@ -204,6 +211,12 @@ class QueryBuilder implements QueryBuilderInterface public function setSpecs(array $specs) { foreach ($specs as $handler => $spec) { + if (isset($spec['ExactSettings'])) { + $this->exactSpecs[strtolower($handler)] = new SearchHandler( + $spec['ExactSettings'], $this->defaultDismaxHandler + ); + unset($spec['ExactSettings']); + } $this->specs[strtolower($handler)] = new SearchHandler($spec, $this->defaultDismaxHandler); } @@ -239,18 +252,32 @@ class QueryBuilder implements QueryBuilderInterface /** * Return named search handler. * - * @param string $handler Search handler name + * @param string $handler Search handler name + * @param string $searchString Search query * * @return SearchHandler|null */ - protected function getSearchHandler($handler) + protected function getSearchHandler($handler, $searchString) { $handler = $handler ? strtolower($handler) : $handler; - if ($handler && isset($this->specs[$handler])) { - return $this->specs[$handler]; - } else { - return null; + if ($handler) { + // Since we will rarely have exactSpecs set, it is less expensive + // to check for a handler first before doing multiple string + // operations to determine eligibility for exact handling. + if (isset($this->exactSpecs[$handler])) { + $searchString = isset($searchString) ? trim($searchString) : ''; + if (strlen($searchString) > 1 + && substr($searchString, 0, 1) == '"' + && substr($searchString, -1, 1) == '"' + ) { + return $this->exactSpecs[$handler]; + } + } + if (isset($this->specs[$handler])) { + return $this->specs[$handler]; + } } + return null; } /** @@ -292,7 +319,10 @@ class QueryBuilder implements QueryBuilderInterface } else { $searchString = $this->getLuceneHelper() ->normalizeSearchString($component->getString()); - $searchHandler = $this->getSearchHandler($component->getHandler()); + $searchHandler = $this->getSearchHandler( + $component->getHandler(), + $searchString + ); if ($searchHandler) { $searchString = $this->createSearchString($searchString, $searchHandler); -- GitLab