From 476eeb08db361d8f30c5245828d33c58f30fd2ee Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Mon, 11 Jan 2021 14:05:15 -0500 Subject: [PATCH] Upgrade to Solr 7.7.3. (#1793) --- build.xml | 2 +- config/vufind/searches.ini | 5 +- solr/vufind/authority/conf/solrconfig.xml | 566 +++++++++++++++++ solr/vufind/biblio/conf/solrconfig.xml | 722 ++++++++++++++++++++++ solr/vufind/reserves/conf/solrconfig.xml | 570 +++++++++++++++++ solr/vufind/website/conf/solrconfig.xml | 659 ++++++++++++++++++++ 6 files changed, 2522 insertions(+), 2 deletions(-) create mode 100644 solr/vufind/authority/conf/solrconfig.xml create mode 100644 solr/vufind/biblio/conf/solrconfig.xml create mode 100644 solr/vufind/reserves/conf/solrconfig.xml create mode 100644 solr/vufind/website/conf/solrconfig.xml diff --git a/build.xml b/build.xml index f646a3d044e..bce7ff961f1 100644 --- a/build.xml +++ b/build.xml @@ -28,7 +28,7 @@ <property name="selenium_browser" value="firefox" /> <property name="snooze_multiplier" value="1" /><!-- can be used to slow down tests (selenium only) --> <property name="solr_startup_sleep" value="0" /> - <property name="solr_version" value="7.4.0" /> + <property name="solr_version" value="7.7.3" /> <property name="skip_phpdoc" value="false" /> <property name="phpdoc_version" value="3.0.0-alpha.4" /> diff --git a/config/vufind/searches.ini b/config/vufind/searches.ini index fe51f11395d..65d8446857f 100644 --- a/config/vufind/searches.ini +++ b/config/vufind/searches.ini @@ -577,7 +577,10 @@ container_title = "Journal Title" ; This section allows sharding to be used to pull in content from additional Solr ; servers. All servers used in sharding must contain the same index fields needed ; to satisfy queries sent to them AND they must all include different ID numbers! -; Leave this commented out to disable sharding. +; Note that additional Solr configuration may also be required; see the "Configuring +; the ShardHandlerFactory" section of this document for details: +; https://lucene.apache.org/solr/guide/7_7/distributed-requests.html +; Leave this section commented out to disable sharding. ; To use sharding, simply fill in lines using the format: ; [display name of shard] = [URL of shard (without http://)] ;[IndexShards] diff --git a/solr/vufind/authority/conf/solrconfig.xml b/solr/vufind/authority/conf/solrconfig.xml new file mode 100644 index 00000000000..7e9d9d67d50 --- /dev/null +++ b/solr/vufind/authority/conf/solrconfig.xml @@ -0,0 +1,566 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<config> + <!-- Set this to 'false' if you want solr to continue working after it has + encountered an severe configuration error. In a production environment, + you may want solr to keep working even if one handler is mis-configured. + + You may also set this to false using by setting the system property: + -Dsolr.abortOnConfigurationError=false + --> + <abortOnConfigurationError>${solr.abortOnConfigurationError:false}</abortOnConfigurationError> + + <!-- Controls what version of Lucene various components of Solr + adhere to. Generally, you want to use the latest version to + get all bug fixes and improvements. It is highly recommended + that you fully re-index after changing this setting as it can + affect both how text is indexed and queried. + --> + <luceneMatchVersion>7.7.3</luceneMatchVersion> + + <!-- Use the classic schema style by default for VuFind --> + <schemaFactory class="ClassicIndexSchemaFactory"/> + + <!-- Used to specify an alternate directory to hold all index data + other than the default ./data under the Solr home. + If replication is in use, this should match the replication configuration. --> + <dataDir>${solr.solr.home:./solr}/authority</dataDir> + + + <indexConfig> + + <useCompoundFile>false</useCompoundFile> + + + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene + indexing for buffering added documents and deletions before they are + flushed to the Directory. + maxBufferedDocs sets a limit on the number of documents buffered + before flushing. + If both ramBufferSizeMB and maxBufferedDocs is set, then + Lucene will flush based on whichever limit is hit first. --> + <ramBufferSizeMB>100</ramBufferSizeMB> + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> + + + <!-- Commit Deletion Policy + + Custom deletion policies can be specified here. The class must + implement org.apache.lucene.index.IndexDeletionPolicy. + + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html + + The default Solr IndexDeletionPolicy implementation supports + deleting index commit points on number of commits, age of + commit point and optimized status. + + The latest commit point should always be preserved regardless + of the criteria. + --> + <deletionPolicy class="solr.SolrDeletionPolicy"> + <!-- Keep only optimized commit points --> + <str name="keepOptimizedOnly">false</str> + <!-- The maximum number of commit points to be kept --> + <str name="maxCommitsToKeep">1</str> + <!-- + Delete all commit points once they have reached the given age. + Supports DateMathParser syntax e.g. + + <str name="maxCommitAge">30MINUTES</str> + <str name="maxCommitAge">1DAY</str> + --> + </deletionPolicy> + + </indexConfig> + + <!-- Enables JMX if and only if an existing MBeanServer is found, use + this if you want to configure JMX through JVM parameters. Remove + this to disable exposing Solr configuration and statistics to JMX. + + If you want to connect to a particular server, specify the agentId + e.g. <jmx agentId="myAgent" /> + + If you want to start a new MBeanServer, specify the serviceUrl + e.g <jmx serviceurl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> + + For more details see http://wiki.apache.org/solr/SolrJmx + --> + <jmx /> + + <!-- the default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + + <!-- Enables a transaction log, used for real-time get, durability, and + and solr cloud replica recovery. The log can grow as big as + uncommitted changes to the index, so use of a hard autoCommit + is recommended (see below). + "dir" - the target directory for transaction logs, defaults to the + solr data directory. --> + <updateLog> + <str name="dir">${solr.ulog.dir:}</str> + </updateLog> + + <!-- A prefix of "solr." for class names is an alias that + causes solr to search appropriate packages, including + org.apache.solr.(search|update|request|core|analysis) + --> + + <!-- Perform a <commit/> automatically under certain conditions: + maxDocs - number of updates since last commit is greater than this + maxTime - oldest uncommited update (in ms) is this long ago + --> + <autoCommit> + <maxTime>15000</maxTime> + <openSearcher>false</openSearcher> + </autoCommit> + + <!-- The RunExecutableListener executes an external command. + exe - the name of the executable to run + dir - dir to use as the current working directory. default="." + wait - the calling thread waits until the executable returns. default="true" + args - the arguments to pass to the program. default=nothing + env - environment variables to set. default=nothing + --> + <!-- A postCommit event is fired after every commit or optimize command + <listener event="postCommit" class="solr.RunExecutableListener"> + <str name="exe">solr/bin/snapshooter</str> + <str name="dir">.</str> + <bool name="wait">true</bool> + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> + <arr name="env"> <str>MYVAR=val1</str> </arr> + </listener> + --> + <!-- A postOptimize event is fired only after every optimize command, useful + in conjunction with index distribution to only distribute optimized indices + <listener event="postOptimize" class="solr.RunExecutableListener"> + <str name="exe">snapshooter</str> + <str name="dir">solr/bin</str> + <bool name="wait">true</bool> + </listener> + --> + + </updateHandler> + + + <query> + <!-- Maximum number of clauses in a boolean query... can affect + range or prefix queries that expand to big boolean + queries. An exception is thrown if exceeded. --> + <maxBooleanClauses>1024</maxBooleanClauses> + + + <!-- Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. + When a new searcher is opened, its caches may be prepopulated + or "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For LRUCache, + the autowarmed items will be the most recently accessed items. + Parameters: + class - the SolrCache implementation (currently only LRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (seel java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + --> + + <!-- VuFind uses the default Solr settings for filterCache, which should produce a high + hit rate. You can carefully adjust the filterCache and check the hit ratio in + admin>plugins>stats --> + <filterCache + class="solr.FastLRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- queryResultCache caches results of searches - ordered lists of + document ids (DocList) based on a query, a sort, and the range + of documents requested. --> + <queryResultCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- documentCache caches Lucene Document objects (the stored fields for each document). + Since Lucene internal document ids are transient, this cache will not be autowarmed. --> + <documentCache + class="solr.LRUCache" + size="16384" + initialSize="16384"/> + + <!-- If true, stored fields that are not requested will be loaded lazily. + + This can result in a significant speed improvement if the usual case is to + not load all stored fields, especially if the skipped fields are large compressed + text fields. + --> + <enableLazyFieldLoading>false</enableLazyFieldLoading> + + <!-- Example of a generic cache. These caches may be accessed by name + through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert(). + The purpose is to enable easy caching of user/application level data. + The regenerator argument should be specified as an implementation + of solr.search.CacheRegenerator if autowarming is desired. --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="org.mycompany.mypackage.MyRegenerator" + /> + --> + + <!-- An optimization that attempts to use a filter to satisfy a search. + If the requested sort does not include score, then the filterCache + will be checked for a filter matching the query. If found, the filter + will be used as the source of document ids, and then the sort will be + applied to that. + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. --> + <queryResultWindowSize>50</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- This entry enables an int hash representation for filters (DocSets) + when the number of items in the set is less than maxSize. For smaller + sets, this representation is more memory efficient, more efficient to + iterate over, and faster to take intersections. --> + <HashDocSet maxSize="3000" loadFactor="0.75"/> + + <!-- a newSearcher event is fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka registered). --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. --> + <!-- + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst> + <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst> + </arr> + </listener> + --> + + <!-- a firstSearcher event is fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. --> + <!-- + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst> + </arr> + </listener> + --> + + <!-- If a search request comes in and there is no current registered searcher, + then immediately register the still warming searcher and use it. If + "false" then all requests will block until the first searcher is done + warming. --> + <useColdSearcher>false</useColdSearcher> + + <!-- Maximum number of searchers that may be warming in the background + concurrently. An error is returned if this limit is exceeded. Recommend + 1-2 for read-only slaves, higher for masters w/o cache warming. --> + <maxWarmingSearchers>4</maxWarmingSearchers> + + </query> + + <!-- + Let the dispatch filter handler /select?qt=XXX + handleSelect=true will use consistent error handling for /select and /update + handleSelect=false will use solr1.1 style error formatting + --> + <requestDispatcher handleSelect="true" > + <!--Make sure your system has some authentication before enabling remote streaming! --> + <requestParsers enableRemoteStreaming="false" + multipartUploadLimitInKB="2048" + formdataUploadLimitInKB="2048"/> + + <!-- Set HTTP caching related parameters (for proxy caches and clients). + + To get the behaviour of Solr 1.2 (ie: no caching related headers) + use the never304="true" option and do not specify a value for + <cacheControl> + --> + <!-- <httpCaching never304="true"> --> + <httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"> + <!-- lastModFrom="openTime" is the default, the Last-Modified value + (and validation against If-Modified-Since requests) will all be + relative to when the current Searcher was opened. + You can change it to lastModFrom="dirLastMod" if you want the + value to exactly correspond to when the physical index was last + modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + different even if the index has not changed (ie: when making + significant changes to your config file) + + lastModifiedFrom and etagSeed are both ignored if you use the + never304="true" option. + --> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header, as well as an Expires header + if the value contains "max-age=" + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- <cacheControl>max-age=30, public</cacheControl> --> + </httpCaching> + </requestDispatcher> + + + <!-- requestHandler plugins... incoming queries will be dispatched to the + correct handler based on the path or the qt (query type) param. + Names starting with a '/' are accessed with the a path equal to the + registered name. Names without a leading '/' are accessed with: + http://host/app/select?qt=name + If no qt is defined, the requestHandler that declares default="true" + will be used. + --> + <requestHandler name="standard" class="solr.SearchHandler" default="true"> + <!-- default values for query parameters may optionally be defined here + <lst name="defaults"> + <int name="rows">10</int> + <str name="fl">*</str> + <str name="version">2.1</str> + <lst> + --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <str name="q.op">AND</str> + <str name="df">allfields</str> + </lst> + </requestHandler> + + <!-- the following two handlers will be used for eligible dismax searches defined + in searchspecs.yaml. The edismax handler will be used most of the time, unless + a specific configuration tells VuFind to use traditional dismax instead. You + can use these handler definitions to set global Dismax settings (e.g. mm / bf). + If you need different settings for different types of searches (e.g. Title vs. + Author), you can also configure individual settings in the searchspecs.yaml + file. + --> + <requestHandler name="dismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">dismax</str> + <str name="echoParams">explicit</str> + <str name="q.op">AND</str> + <str name="df">allfields</str> + </lst> + </requestHandler> + + <requestHandler name="edismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">edismax</str> + <str name="echoParams">explicit</str> + <str name="lowercaseOperators">false</str> + <str name="q.op">AND</str> + <str name="df">allfields</str> + </lst> + </requestHandler> + + <!-- Search component for extracting terms (useful for sitemap generation) --> + <searchComponent name="terms" class="solr.TermsComponent"> + </searchComponent> + + <!-- + + Search components are registered to SolrCore and used by Search Handlers + + By default, the following components are available: + + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" /> + <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" /> + <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" /> + <searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" /> + <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" /> + + If you register a searchComponent to one of the standard names, that will be used instead. + + --> + + <!-- Request handler to extract terms (useful for sitemap generation) --> + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> + <arr name="components"> + <str>terms</str> + </arr> + </requestHandler> + + <!-- Field Analysis Request Handler + + RequestHandler that provides much the same functionality as + analysis.jsp. Provides the ability to specify multiple field + types and field names in the same request and outputs + index-time and query-time analysis for each of them. + + Request parameters are: + analysis.fieldname - field name whose analyzers are to be used + + analysis.fieldtype - field type whose analyzers are to be used + analysis.fieldvalue - text for index-time analysis + q (or analysis.q) - text for query time analysis + analysis.showmatch (true|false) - When set to true and when + query analysis is performed, the produced tokens of the + field value analysis will be marked as "matched" for every + token that is produces by the query analysis + --> + <requestHandler name="/analysis/field" + startup="lazy" + class="solr.FieldAnalysisRequestHandler" /> + + + <!-- Document Analysis Handler + + http://wiki.apache.org/solr/AnalysisRequestHandler + + An analysis handler that provides a breakdown of the analysis + process of provided documents. This handler expects a (single) + content stream with the following format: + + <docs> + <doc> + <field name="id">1</field> + <field name="name">The Name</field> + <field name="text">The Text Value</field> + </doc> + <doc>...</doc> + <doc>...</doc> + ... + </docs> + + Note: Each document must contain a field which serves as the + unique key. This key is used in the returned response to associate + an analysis breakdown to the analyzed document. + + Like the FieldAnalysisRequestHandler, this handler also supports + query analysis by sending either an "analysis.query" or "q" + request parameter that holds the query text to be analyzed. It + also supports the "analysis.showmatch" parameter which when set to + true, all field tokens that match the query tokens will be marked + as a "match". + --> + <requestHandler name="/analysis/document" + class="solr.DocumentAnalysisRequestHandler" + startup="lazy" /> + + + <!-- ping/healthcheck --> + <requestHandler name="/admin/ping" class="PingRequestHandler"> + <lst name="defaults"> + <str name="qt">standard</str> + <str name="q">solrpingquery</str> + <str name="echoParams">all</str> + </lst> + </requestHandler> + + <!-- Echo the request contents back to the client --> + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > + <lst name="defaults"> + <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' --> + <str name="echoHandler">true</str> + </lst> + </requestHandler> + + <searchComponent class="solr.HighlightComponent" name="highlight"> + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) --> + <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + </highlighting> + </searchComponent> + + <!-- queryResponseWriter plugins... query responses will be written using the + writer specified by the 'wt' request parameter matching the name of a registered + writer. + The "default" writer is the default and will be used if 'wt' is not specified + in the request. XMLResponseWriter will be used if nothing is specified here. + The json, python, and ruby writers are also available by default. + + <queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/> + <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/> + <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/> + <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/> + <queryResponseWriter name="php" class="org.apache.solr.request.PHPResponseWriter"/> + <queryResponseWriter name="phps" class="org.apache.solr.request.PHPSerializedResponseWriter"/> + + <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/> + --> + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="org.apache.solr.response.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + <!-- Mitigate CVE-2017-12629 --> + <queryParser name="xmlparser" class="solr.ExtendedDismaxQParserPlugin"/> + + <!-- config for the admin interface --> + <admin> + <defaultQuery>shakespeare</defaultQuery> + + <!-- configure a healthcheck file for servers behind a loadbalancer + <healthcheck type="file">server-enabled</healthcheck> + --> + </admin> + +</config> diff --git a/solr/vufind/biblio/conf/solrconfig.xml b/solr/vufind/biblio/conf/solrconfig.xml new file mode 100644 index 00000000000..63333e141c7 --- /dev/null +++ b/solr/vufind/biblio/conf/solrconfig.xml @@ -0,0 +1,722 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<config> + <!-- Set this to 'false' if you want solr to continue working after it has + encountered an severe configuration error. In a production environment, + you may want solr to keep working even if one handler is mis-configured. + + You may also set this to false using by setting the system property: + -Dsolr.abortOnConfigurationError=false + --> + <abortOnConfigurationError>${solr.abortOnConfigurationError:false}</abortOnConfigurationError> + + <!-- Controls what version of Lucene various components of Solr + adhere to. Generally, you want to use the latest version to + get all bug fixes and improvements. It is highly recommended + that you fully re-index after changing this setting as it can + affect both how text is indexed and queried. + --> + <luceneMatchVersion>7.7.3</luceneMatchVersion> + + <!-- Use the classic schema style by default for VuFind --> + <schemaFactory class="ClassicIndexSchemaFactory"/> + + <!-- Used to specify an alternate directory to hold all index data + other than the default ./data under the Solr home. + If replication is in use, this should match the replication configuration. --> + <dataDir>${solr.solr.home:./solr}/biblio</dataDir> + + <lib dir="../../vendor/contrib/analysis-extras/lib" regex=".*\.jar" /> + <lib dir="../../vendor/contrib/analysis-extras/lucene-libs" regex=".*\.jar" /> + <lib dir="../jars" regex=".*\.jar" /> + + <indexConfig> + + <useCompoundFile>false</useCompoundFile> + + + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene + indexing for buffering added documents and deletions before they are + flushed to the Directory. + maxBufferedDocs sets a limit on the number of documents buffered + before flushing. + If both ramBufferSizeMB and maxBufferedDocs is set, then + Lucene will flush based on whichever limit is hit first. --> + <ramBufferSizeMB>100</ramBufferSizeMB> + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> + + + <!-- Commit Deletion Policy + + Custom deletion policies can be specified here. The class must + implement org.apache.lucene.index.IndexDeletionPolicy. + + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html + + The default Solr IndexDeletionPolicy implementation supports + deleting index commit points on number of commits, age of + commit point and optimized status. + + The latest commit point should always be preserved regardless + of the criteria. + --> + <deletionPolicy class="solr.SolrDeletionPolicy"> + <!-- Keep only optimized commit points --> + <str name="keepOptimizedOnly">false</str> + <!-- The maximum number of commit points to be kept --> + <str name="maxCommitsToKeep">1</str> + <!-- + Delete all commit points once they have reached the given age. + Supports DateMathParser syntax e.g. + + <str name="maxCommitAge">30MINUTES</str> + <str name="maxCommitAge">1DAY</str> + --> + </deletionPolicy> + + </indexConfig> + + <!-- Enables JMX if and only if an existing MBeanServer is found, use + this if you want to configure JMX through JVM parameters. Remove + this to disable exposing Solr configuration and statistics to JMX. + + If you want to connect to a particular server, specify the agentId + e.g. <jmx agentId="myAgent" /> + + If you want to start a new MBeanServer, specify the serviceUrl + e.g <jmx serviceurl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> + + For more details see http://wiki.apache.org/solr/SolrJmx + --> + <jmx /> + + <!-- the default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + + <!-- Enables a transaction log, used for real-time get, durability, and + and solr cloud replica recovery. The log can grow as big as + uncommitted changes to the index, so use of a hard autoCommit + is recommended (see below). + "dir" - the target directory for transaction logs, defaults to the + solr data directory. --> + <updateLog> + <str name="dir">${solr.ulog.dir:}</str> + </updateLog> + + <!-- A prefix of "solr." for class names is an alias that + causes solr to search appropriate packages, including + org.apache.solr.(search|update|request|core|analysis) + --> + + <!-- Perform a <commit/> automatically under certain conditions: + maxDocs - number of updates since last commit is greater than this + maxTime - oldest uncommited update (in ms) is this long ago + --> + <autoCommit> + <maxTime>15000</maxTime> + <openSearcher>false</openSearcher> + </autoCommit> + + <!-- The RunExecutableListener executes an external command. + exe - the name of the executable to run + dir - dir to use as the current working directory. default="." + wait - the calling thread waits until the executable returns. default="true" + args - the arguments to pass to the program. default=nothing + env - environment variables to set. default=nothing + --> + <!-- A postCommit event is fired after every commit or optimize command + <listener event="postCommit" class="solr.RunExecutableListener"> + <str name="exe">solr/bin/snapshooter</str> + <str name="dir">.</str> + <bool name="wait">true</bool> + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> + <arr name="env"> <str>MYVAR=val1</str> </arr> + </listener> + --> + <!-- A postOptimize event is fired only after every optimize command, useful + in conjunction with index distribution to only distribute optimized indices + <listener event="postOptimize" class="solr.RunExecutableListener"> + <str name="exe">snapshooter</str> + <str name="dir">solr/bin</str> + <bool name="wait">true</bool> + </listener> + --> + + </updateHandler> + + + <query> + <!-- Maximum number of clauses in a boolean query... can affect + range or prefix queries that expand to big boolean + queries. An exception is thrown if exceeded. --> + <maxBooleanClauses>1024</maxBooleanClauses> + + + <!-- Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. + When a new searcher is opened, its caches may be prepopulated + or "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For LRUCache, + the autowarmed items will be the most recently accessed items. + Parameters: + class - the SolrCache implementation (currently only LRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (seel java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + --> + + <!-- VuFind uses the default Solr settings for filterCache, which should produce a high + hit rate. You can carefully adjust the filterCache and check the hit ratio in + admin>plugins>stats --> + <filterCache + class="solr.FastLRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- queryResultCache caches results of searches - ordered lists of + document ids (DocList) based on a query, a sort, and the range + of documents requested. --> + <queryResultCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- documentCache caches Lucene Document objects (the stored fields for each document). + Since Lucene internal document ids are transient, this cache will not be autowarmed. --> + <documentCache + class="solr.LRUCache" + size="16384" + initialSize="16384"/> + + <!-- If true, stored fields that are not requested will be loaded lazily. + + This can result in a significant speed improvement if the usual case is to + not load all stored fields, especially if the skipped fields are large compressed + text fields. + --> + <enableLazyFieldLoading>true</enableLazyFieldLoading> + + <!-- Example of a generic cache. These caches may be accessed by name + through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert(). + The purpose is to enable easy caching of user/application level data. + The regenerator argument should be specified as an implementation + of solr.search.CacheRegenerator if autowarming is desired. --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="org.mycompany.mypackage.MyRegenerator" + /> + --> + + <!-- An optimization that attempts to use a filter to satisfy a search. + If the requested sort does not include score, then the filterCache + will be checked for a filter matching the query. If found, the filter + will be used as the source of document ids, and then the sort will be + applied to that. + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. --> + <queryResultWindowSize>50</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- This entry enables an int hash representation for filters (DocSets) + when the number of items in the set is less than maxSize. For smaller + sets, this representation is more memory efficient, more efficient to + iterate over, and faster to take intersections. --> + <HashDocSet maxSize="3000" loadFactor="0.75"/> + + <!-- a newSearcher event is fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka registered). --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. --> + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> + <str name="q">science art business engineering history</str> + <str name="start">0</str> + <str name="rows">10</str> + </lst> + </arr> + </listener> + + <!-- a firstSearcher event is fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. --> + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> + <str name="q">science art business engineering history</str> + <str name="facet.field">format</str> + <str name="fq">format:book</str> + </lst> + </arr> + </listener> + + <!-- If a search request comes in and there is no current registered searcher, + then immediately register the still warming searcher and use it. If + "false" then all requests will block until the first searcher is done + warming. --> + <useColdSearcher>false</useColdSearcher> + + <!-- Maximum number of searchers that may be warming in the background + concurrently. An error is returned if this limit is exceeded. Recommend + 1-2 for read-only slaves, higher for masters w/o cache warming. --> + <maxWarmingSearchers>2</maxWarmingSearchers> + + </query> + + <!-- + Let the dispatch filter handler /select?qt=XXX + handleSelect=true will use consistent error handling for /select and /update + handleSelect=false will use solr1.1 style error formatting + --> + <requestDispatcher handleSelect="true" > + <!--Make sure your system has some authentication before enabling remote streaming! --> + <requestParsers enableRemoteStreaming="true" + multipartUploadLimitInKB="2048000" + formdataUploadLimitInKB="2048"/> + + <!-- Set HTTP caching related parameters (for proxy caches and clients). + + To get the behaviour of Solr 1.2 (ie: no caching related headers) + use the never304="true" option and do not specify a value for + <cacheControl> + --> + <!-- <httpCaching never304="true"> --> + <httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"> + <!-- lastModFrom="openTime" is the default, the Last-Modified value + (and validation against If-Modified-Since requests) will all be + relative to when the current Searcher was opened. + You can change it to lastModFrom="dirLastMod" if you want the + value to exactly correspond to when the physical index was last + modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + different even if the index has not changed (ie: when making + significant changes to your config file) + + lastModifiedFrom and etagSeed are both ignored if you use the + never304="true" option. + --> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header, as well as an Expires header + if the value contains "max-age=" + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- <cacheControl>max-age=30, public</cacheControl> --> + </httpCaching> + </requestDispatcher> + + + <!-- requestHandler plugins... incoming queries will be dispatched to the + correct handler based on the path or the qt (query type) param. + Names starting with a '/' are accessed with the a path equal to the + registered name. Names without a leading '/' are accessed with: + http://host/app/select?qt=name + If no qt is defined, the requestHandler that declares default="true" + will be used. + --> + <requestHandler name="standard" class="solr.SearchHandler" default="true"> + <!-- default values for query parameters --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <!-- + <int name="rows">10</int> + <str name="fl">*</str> + <str name="version">2.1</str> + --> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.onlyMorePopular">true</str> + <str name="spellcheck.count">20</str> + <str name="q.op">AND</str> + <str name="df">allfields</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <!-- the following two handlers will be used for eligible dismax searches defined + in searchspecs.yaml. The edismax handler will be used most of the time, unless + a specific configuration tells VuFind to use traditional dismax instead. You + can use these handler definitions to set global Dismax settings (e.g. mm / bf). + If you need different settings for different types of searches (e.g. Title vs. + Author), you can also configure individual settings in the searchspecs.yaml + file. + --> + <requestHandler name="dismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">dismax</str> + <!-- str name="fl">a*,b*,c*,d*,e*,first_indexed,format,fullrecord,g*,h*,i*,l*,o*,p*,r*,s*,t*,u*,*_date*,*_isn*,*_str*,*_txt*</str --> + <str name="echoParams">explicit</str> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.onlyMorePopular">true</str> + <str name="spellcheck.count">20</str> + <str name="q.op">AND</str> + <str name="df">allfields</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <requestHandler name="edismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">edismax</str> + <!-- str name="fl">a*,b*,c*,d*,e*,first_indexed,format,fullrecord,g*,h*,i*,l*,o*,p*,r*,s*,t*,u*,*_date*,*_isn*,*_str*,*_txt*</str --> + <str name="echoParams">explicit</str> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.onlyMorePopular">true</str> + <str name="spellcheck.count">20</str> + <str name="lowercaseOperators">false</str> + <str name="q.op">AND</str> + <str name="df">allfields</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <requestHandler name="morelikethis" class="solr.MoreLikeThisHandler"> + <lst name="defaults"> + <str name="mlt.fl">title,title_short,callnumber-label,topic,language,author,publishDate</str> + <str name="mlt.qf"> + title^75 + title_short^100 + callnumber-label^400 + topic^300 + language^30 + author^75 + publishDate + </str> + <int name="mlt.mintf">1</int> + <int name="mlt.mindf">1</int> + <str name="mlt.boost">true</str> + <int name="mlt.count">5</int> + <int name="rows">5</int> + </lst> + </requestHandler> + + <requestHandler name="/browse" class="org.vufind.solr.handler.BrowseRequestHandler"> + <!-- These definitions should match the field names used in the authority index. --> + <str name="preferredHeadingField">heading</str> + <str name="useInsteadHeadingField">use_for</str> + <str name="seeAlsoHeadingField">see_also</str> + <str name="scopeNoteField">scope_note</str> + + + <str name="sources">topic,author,title,lcc,dewey,hierarchy</str> + + <lst name="topic"> + <str name="DBpath">${solr.solr.home:./solr}/alphabetical_browse/topic_browse.db</str> + <str name="field">topic_browse</str> + </lst> + + <lst name="author"> + <str name="DBpath">${solr.solr.home:./solr}/alphabetical_browse/author_browse.db</str> + <str name="field">author_browse</str> + </lst> + + <lst name="title"> + <str name="DBpath">${solr.solr.home:./solr}/alphabetical_browse/title_browse.db</str> + <str name="field">title_fullStr</str> + </lst> + + <lst name="lcc"> + <str name="DBpath">${solr.solr.home:./solr}/alphabetical_browse/lcc_browse.db</str> + <str name="field">callnumber-raw</str> + <str name="normalizer">org.vufind.util.LCCallNormalizer</str> + </lst> + + <lst name="dewey"> + <str name="DBpath">${solr.solr.home:./solr}/alphabetical_browse/dewey_browse.db</str> + <str name="field">dewey-raw</str> + <str name="ignoreDiacritics">yes</str> + <str name="normalizer">org.vufind.util.DeweyCallNormalizer</str> + </lst> + + <lst name="hierarchy"> + <str name="DBpath">${solr.solr.home:./solr}/alphabetical_browse/hierarchy_browse.db</str> + <str name="field">hierarchy_browse</str> + </lst> + </requestHandler> + + <searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent"> + <lst name="spellchecker"> + <str name="name">default</str> + <str name="field">spellingShingle</str> + <str name="accuracy">0.75</str> + <str name="spellcheckIndexDir">./spellShingle</str> + <str name="queryAnalyzerFieldType">textSpellShingle</str> + <str name="buildOnOptimize">true</str> + </lst> + <lst name="spellchecker"> + <str name="name">basicSpell</str> + <str name="field">spelling</str> + <str name="accuracy">0.75</str> + <str name="spellcheckIndexDir">./spellchecker</str> + <str name="queryAnalyzerFieldType">textSpell</str> + <str name="buildOnOptimize">true</str> + </lst> + </searchComponent> + <queryConverter name="queryConverter" class="org.apache.solr.spelling.SpellingQueryConverter"/> + + <!-- Search component for extracting terms (useful for sitemap generation) --> + <searchComponent name="terms" class="solr.TermsComponent"> + </searchComponent> + + <!-- + + Search components are registered to SolrCore and used by Search Handlers + + By default, the following components are available: + + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" /> + <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" /> + <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" /> + <searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" /> + <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" /> + + If you register a searchComponent to one of the standard names, that will be used instead. + + --> + + <requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <!-- + By default, this will register the following components: + + <arr name="components"> + <str>query</str> + <str>facet</str> + <str>mlt</str> + <str>highlight</str> + <str>debug</str> + </arr> + + To insert handlers before or after the 'standard' components, use: + + <arr name="first-components"> + <str>first</str> + </arr> + + <arr name="last-components"> + <str>last</str> + </arr> + + --> + <arr name="last-components"> + <str>spellcheck</str> + <str>elevator</str> + </arr> + </requestHandler> + + <!-- Request handler to extract terms (useful for sitemap generation) --> + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> + <arr name="components"> + <str>terms</str> + </arr> + </requestHandler> + + <searchComponent name="elevator" class="org.apache.solr.handler.component.QueryElevationComponent" > + <!-- pick a fieldType to analyze queries --> + <str name="queryFieldType">string</str> + <str name="config-file">elevate.xml</str> + </searchComponent> + + <requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler" startup="lazy"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <arr name="last-components"> + <str>elevator</str> + </arr> + </requestHandler> + + <!-- Field Analysis Request Handler + + RequestHandler that provides much the same functionality as + analysis.jsp. Provides the ability to specify multiple field + types and field names in the same request and outputs + index-time and query-time analysis for each of them. + + Request parameters are: + analysis.fieldname - field name whose analyzers are to be used + + analysis.fieldtype - field type whose analyzers are to be used + analysis.fieldvalue - text for index-time analysis + q (or analysis.q) - text for query time analysis + analysis.showmatch (true|false) - When set to true and when + query analysis is performed, the produced tokens of the + field value analysis will be marked as "matched" for every + token that is produces by the query analysis + --> + <requestHandler name="/analysis/field" + startup="lazy" + class="solr.FieldAnalysisRequestHandler" /> + + + <!-- Document Analysis Handler + + http://wiki.apache.org/solr/AnalysisRequestHandler + + An analysis handler that provides a breakdown of the analysis + process of provided documents. This handler expects a (single) + content stream with the following format: + + <docs> + <doc> + <field name="id">1</field> + <field name="name">The Name</field> + <field name="text">The Text Value</field> + </doc> + <doc>...</doc> + <doc>...</doc> + ... + </docs> + + Note: Each document must contain a field which serves as the + unique key. This key is used in the returned response to associate + an analysis breakdown to the analyzed document. + + Like the FieldAnalysisRequestHandler, this handler also supports + query analysis by sending either an "analysis.query" or "q" + request parameter that holds the query text to be analyzed. It + also supports the "analysis.showmatch" parameter which when set to + true, all field tokens that match the query tokens will be marked + as a "match". + --> + <requestHandler name="/analysis/document" + class="solr.DocumentAnalysisRequestHandler" + startup="lazy" /> + + + <!-- ping/healthcheck --> + <requestHandler name="/admin/ping" class="PingRequestHandler"> + <lst name="defaults"> + <str name="qt">standard</str> + <str name="q">solrpingquery</str> + <str name="echoParams">all</str> + </lst> + </requestHandler> + + <!-- Echo the request contents back to the client --> + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > + <lst name="defaults"> + <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' --> + <str name="echoHandler">true</str> + </lst> + </requestHandler> + + <searchComponent class="solr.HighlightComponent" name="highlight"> + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) --> + <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + </highlighting> + </searchComponent> + + <!-- queryResponseWriter plugins... query responses will be written using the + writer specified by the 'wt' request parameter matching the name of a registered + writer. + The "default" writer is the default and will be used if 'wt' is not specified + in the request. XMLResponseWriter will be used if nothing is specified here. + The json, python, and ruby writers are also available by default. + + <queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/> + <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/> + <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/> + <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/> + <queryResponseWriter name="php" class="org.apache.solr.request.PHPResponseWriter"/> + <queryResponseWriter name="phps" class="org.apache.solr.request.PHPSerializedResponseWriter"/> + + <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/> + --> + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="org.apache.solr.response.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + <!-- Mitigate CVE-2017-12629 --> + <queryParser name="xmlparser" class="solr.ExtendedDismaxQParserPlugin"/> + + <!-- config for the admin interface --> + <admin> + <defaultQuery>shakespeare</defaultQuery> + + <!-- configure a healthcheck file for servers behind a loadbalancer + <healthcheck type="file">server-enabled</healthcheck> + --> + </admin> + +</config> diff --git a/solr/vufind/reserves/conf/solrconfig.xml b/solr/vufind/reserves/conf/solrconfig.xml new file mode 100644 index 00000000000..876146f4a4f --- /dev/null +++ b/solr/vufind/reserves/conf/solrconfig.xml @@ -0,0 +1,570 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<config> + <!-- Set this to 'false' if you want solr to continue working after it has + encountered an severe configuration error. In a production environment, + you may want solr to keep working even if one handler is mis-configured. + + You may also set this to false using by setting the system property: + -Dsolr.abortOnConfigurationError=false + --> + <abortOnConfigurationError>${solr.abortOnConfigurationError:false}</abortOnConfigurationError> + + <!-- Controls what version of Lucene various components of Solr + adhere to. Generally, you want to use the latest version to + get all bug fixes and improvements. It is highly recommended + that you fully re-index after changing this setting as it can + affect both how text is indexed and queried. + --> + <luceneMatchVersion>7.7.3</luceneMatchVersion> + + <!-- Use the classic schema style by default for VuFind --> + <schemaFactory class="ClassicIndexSchemaFactory"/> + + <!-- Used to specify an alternate directory to hold all index data + other than the default ./data under the Solr home. + If replication is in use, this should match the replication configuration. --> + <dataDir>${solr.solr.home:./solr}/reserves</dataDir> + + + <indexConfig> + + <useCompoundFile>false</useCompoundFile> + + + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene + indexing for buffering added documents and deletions before they are + flushed to the Directory. + maxBufferedDocs sets a limit on the number of documents buffered + before flushing. + If both ramBufferSizeMB and maxBufferedDocs is set, then + Lucene will flush based on whichever limit is hit first. --> + <ramBufferSizeMB>100</ramBufferSizeMB> + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> + + + <!-- Commit Deletion Policy + + Custom deletion policies can be specified here. The class must + implement org.apache.lucene.index.IndexDeletionPolicy. + + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html + + The default Solr IndexDeletionPolicy implementation supports + deleting index commit points on number of commits, age of + commit point and optimized status. + + The latest commit point should always be preserved regardless + of the criteria. + --> + <deletionPolicy class="solr.SolrDeletionPolicy"> + <!-- Keep only optimized commit points --> + <str name="keepOptimizedOnly">false</str> + <!-- The maximum number of commit points to be kept --> + <str name="maxCommitsToKeep">1</str> + <!-- + Delete all commit points once they have reached the given age. + Supports DateMathParser syntax e.g. + + <str name="maxCommitAge">30MINUTES</str> + <str name="maxCommitAge">1DAY</str> + --> + </deletionPolicy> + + </indexConfig> + + <!-- Enables JMX if and only if an existing MBeanServer is found, use + this if you want to configure JMX through JVM parameters. Remove + this to disable exposing Solr configuration and statistics to JMX. + + If you want to connect to a particular server, specify the agentId + e.g. <jmx agentId="myAgent" /> + + If you want to start a new MBeanServer, specify the serviceUrl + e.g <jmx serviceurl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> + + For more details see http://wiki.apache.org/solr/SolrJmx + --> + <jmx /> + + <!-- the default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + + <!-- Enables a transaction log, used for real-time get, durability, and + and solr cloud replica recovery. The log can grow as big as + uncommitted changes to the index, so use of a hard autoCommit + is recommended (see below). + "dir" - the target directory for transaction logs, defaults to the + solr data directory. --> + <updateLog> + <str name="dir">${solr.ulog.dir:}</str> + </updateLog> + + <!-- A prefix of "solr." for class names is an alias that + causes solr to search appropriate packages, including + org.apache.solr.(search|update|request|core|analysis) + --> + + <!-- Perform a <commit/> automatically under certain conditions: + maxDocs - number of updates since last commit is greater than this + maxTime - oldest uncommited update (in ms) is this long ago + --> + <autoCommit> + <maxTime>15000</maxTime> + <openSearcher>false</openSearcher> + </autoCommit> + + <!-- The RunExecutableListener executes an external command. + exe - the name of the executable to run + dir - dir to use as the current working directory. default="." + wait - the calling thread waits until the executable returns. default="true" + args - the arguments to pass to the program. default=nothing + env - environment variables to set. default=nothing + --> + <!-- A postCommit event is fired after every commit or optimize command + <listener event="postCommit" class="solr.RunExecutableListener"> + <str name="exe">solr/bin/snapshooter</str> + <str name="dir">.</str> + <bool name="wait">true</bool> + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> + <arr name="env"> <str>MYVAR=val1</str> </arr> + </listener> + --> + <!-- A postOptimize event is fired only after every optimize command, useful + in conjunction with index distribution to only distribute optimized indices + <listener event="postOptimize" class="solr.RunExecutableListener"> + <str name="exe">snapshooter</str> + <str name="dir">solr/bin</str> + <bool name="wait">true</bool> + </listener> + --> + + </updateHandler> + + + <query> + <!-- Maximum number of clauses in a boolean query... can affect + range or prefix queries that expand to big boolean + queries. An exception is thrown if exceeded. --> + <maxBooleanClauses>1024</maxBooleanClauses> + + + <!-- Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. + When a new searcher is opened, its caches may be prepopulated + or "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For LRUCache, + the autowarmed items will be the most recently accessed items. + Parameters: + class - the SolrCache implementation (currently only LRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (seel java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + --> + + <!-- VuFind uses the default Solr settings for filterCache, which should produce a high + hit rate. You can carefully adjust the filterCache and check the hit ratio in + admin>plugins>stats --> + <filterCache + class="solr.FastLRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- queryResultCache caches results of searches - ordered lists of + document ids (DocList) based on a query, a sort, and the range + of documents requested. --> + <queryResultCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- documentCache caches Lucene Document objects (the stored fields for each document). + Since Lucene internal document ids are transient, this cache will not be autowarmed. --> + <documentCache + class="solr.LRUCache" + size="16384" + initialSize="16384"/> + + <!-- If true, stored fields that are not requested will be loaded lazily. + + This can result in a significant speed improvement if the usual case is to + not load all stored fields, especially if the skipped fields are large compressed + text fields. + --> + <enableLazyFieldLoading>false</enableLazyFieldLoading> + + <!-- Example of a generic cache. These caches may be accessed by name + through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert(). + The purpose is to enable easy caching of user/application level data. + The regenerator argument should be specified as an implementation + of solr.search.CacheRegenerator if autowarming is desired. --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="org.mycompany.mypackage.MyRegenerator" + /> + --> + + <!-- An optimization that attempts to use a filter to satisfy a search. + If the requested sort does not include score, then the filterCache + will be checked for a filter matching the query. If found, the filter + will be used as the source of document ids, and then the sort will be + applied to that. + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. --> + <queryResultWindowSize>50</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- This entry enables an int hash representation for filters (DocSets) + when the number of items in the set is less than maxSize. For smaller + sets, this representation is more memory efficient, more efficient to + iterate over, and faster to take intersections. --> + <HashDocSet maxSize="3000" loadFactor="0.75"/> + + <!-- a newSearcher event is fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka registered). --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. --> + <!-- + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst> + <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst> + </arr> + </listener> + --> + + <!-- a firstSearcher event is fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. --> + <!-- + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst> + </arr> + </listener> + --> + + <!-- If a search request comes in and there is no current registered searcher, + then immediately register the still warming searcher and use it. If + "false" then all requests will block until the first searcher is done + warming. --> + <useColdSearcher>false</useColdSearcher> + + <!-- Maximum number of searchers that may be warming in the background + concurrently. An error is returned if this limit is exceeded. Recommend + 1-2 for read-only slaves, higher for masters w/o cache warming. --> + <maxWarmingSearchers>4</maxWarmingSearchers> + + </query> + + <!-- + Let the dispatch filter handler /select?qt=XXX + handleSelect=true will use consistent error handling for /select and /update + handleSelect=false will use solr1.1 style error formatting + --> + <requestDispatcher handleSelect="true" > + <!--Make sure your system has some authentication before enabling remote streaming! --> + <requestParsers enableRemoteStreaming="false" + multipartUploadLimitInKB="2048" + formdataUploadLimitInKB="2048"/> + + <!-- Set HTTP caching related parameters (for proxy caches and clients). + + To get the behaviour of Solr 1.2 (ie: no caching related headers) + use the never304="true" option and do not specify a value for + <cacheControl> + --> + <!-- <httpCaching never304="true"> --> + <httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"> + <!-- lastModFrom="openTime" is the default, the Last-Modified value + (and validation against If-Modified-Since requests) will all be + relative to when the current Searcher was opened. + You can change it to lastModFrom="dirLastMod" if you want the + value to exactly correspond to when the physical index was last + modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + different even if the index has not changed (ie: when making + significant changes to your config file) + + lastModifiedFrom and etagSeed are both ignored if you use the + never304="true" option. + --> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header, as well as an Expires header + if the value contains "max-age=" + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- <cacheControl>max-age=30, public</cacheControl> --> + </httpCaching> + </requestDispatcher> + + + <!-- requestHandler plugins... incoming queries will be dispatched to the + correct handler based on the path or the qt (query type) param. + Names starting with a '/' are accessed with the a path equal to the + registered name. Names without a leading '/' are accessed with: + http://host/app/select?qt=name + If no qt is defined, the requestHandler that declares default="true" + will be used. + --> + <requestHandler name="standard" class="solr.SearchHandler" default="true"> + <!-- default values for query parameters may optionally be defined here + <lst name="defaults"> + <int name="rows">10</int> + <str name="fl">*</str> + <str name="version">2.1</str> + <lst> + --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <str name="q.op">AND</str> + <str name="df">course</str> + </lst> + </requestHandler> + + <!-- the following two handlers will be used for eligible dismax searches defined + in searchspecs.yaml. The edismax handler will be used most of the time, unless + a specific configuration tells VuFind to use traditional dismax instead. You + can use these handler definitions to set global Dismax settings (e.g. mm / bf). + If you need different settings for different types of searches (e.g. Title vs. + Author), you can also configure individual settings in the searchspecs.yaml + file. + --> + <requestHandler name="dismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">dismax</str> + <str name="echoParams">explicit</str> + <str name="q.op">AND</str> + <str name="df">course</str> + </lst> + </requestHandler> + + <requestHandler name="edismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">edismax</str> + <str name="echoParams">explicit</str> + <str name="lowercaseOperators">false</str> + <str name="q.op">AND</str> + <str name="df">course</str> + </lst> + </requestHandler> + + <!-- Search component for extracting terms (useful for sitemap generation) --> + <searchComponent name="terms" class="solr.TermsComponent"> + </searchComponent> + + <!-- + + Search components are registered to SolrCore and used by Search Handlers + + By default, the following components are available: + + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" /> + <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" /> + <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" /> + <searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" /> + <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" /> + + If you register a searchComponent to one of the standard names, that will be used instead. + + --> + + <!-- Request handler to extract terms (useful for sitemap generation) --> + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> + <arr name="components"> + <str>terms</str> + </arr> + </requestHandler> + + <!-- Field Analysis Request Handler + + RequestHandler that provides much the same functionality as + analysis.jsp. Provides the ability to specify multiple field + types and field names in the same request and outputs + index-time and query-time analysis for each of them. + + Request parameters are: + analysis.fieldname - field name whose analyzers are to be used + + analysis.fieldtype - field type whose analyzers are to be used + analysis.fieldvalue - text for index-time analysis + q (or analysis.q) - text for query time analysis + analysis.showmatch (true|false) - When set to true and when + query analysis is performed, the produced tokens of the + field value analysis will be marked as "matched" for every + token that is produces by the query analysis + --> + <requestHandler name="/analysis/field" + startup="lazy" + class="solr.FieldAnalysisRequestHandler" /> + + + <!-- Document Analysis Handler + + http://wiki.apache.org/solr/AnalysisRequestHandler + + An analysis handler that provides a breakdown of the analysis + process of provided documents. This handler expects a (single) + content stream with the following format: + + <docs> + <doc> + <field name="id">1</field> + <field name="name">The Name</field> + <field name="text">The Text Value</field> + </doc> + <doc>...</doc> + <doc>...</doc> + ... + </docs> + + Note: Each document must contain a field which serves as the + unique key. This key is used in the returned response to associate + an analysis breakdown to the analyzed document. + + Like the FieldAnalysisRequestHandler, this handler also supports + query analysis by sending either an "analysis.query" or "q" + request parameter that holds the query text to be analyzed. It + also supports the "analysis.showmatch" parameter which when set to + true, all field tokens that match the query tokens will be marked + as a "match". + --> + <requestHandler name="/analysis/document" + class="solr.DocumentAnalysisRequestHandler" + startup="lazy" /> + + + <!-- CSV update handler, loaded on demand --> + <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> + + + <!-- ping/healthcheck --> + <requestHandler name="/admin/ping" class="PingRequestHandler"> + <lst name="defaults"> + <str name="qt">standard</str> + <str name="q">solrpingquery</str> + <str name="echoParams">all</str> + </lst> + </requestHandler> + + <!-- Echo the request contents back to the client --> + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > + <lst name="defaults"> + <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' --> + <str name="echoHandler">true</str> + </lst> + </requestHandler> + + <searchComponent class="solr.HighlightComponent" name="highlight"> + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) --> + <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + </highlighting> + </searchComponent> + + <!-- queryResponseWriter plugins... query responses will be written using the + writer specified by the 'wt' request parameter matching the name of a registered + writer. + The "default" writer is the default and will be used if 'wt' is not specified + in the request. XMLResponseWriter will be used if nothing is specified here. + The json, python, and ruby writers are also available by default. + + <queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/> + <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/> + <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/> + <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/> + <queryResponseWriter name="php" class="org.apache.solr.request.PHPResponseWriter"/> + <queryResponseWriter name="phps" class="org.apache.solr.request.PHPSerializedResponseWriter"/> + + <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/> + --> + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="org.apache.solr.response.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + <!-- Mitigate CVE-2017-12629 --> + <queryParser name="xmlparser" class="solr.ExtendedDismaxQParserPlugin"/> + + <!-- config for the admin interface --> + <admin> + <defaultQuery>shakespeare</defaultQuery> + + <!-- configure a healthcheck file for servers behind a loadbalancer + <healthcheck type="file">server-enabled</healthcheck> + --> + </admin> + +</config> diff --git a/solr/vufind/website/conf/solrconfig.xml b/solr/vufind/website/conf/solrconfig.xml new file mode 100644 index 00000000000..e063ce113e0 --- /dev/null +++ b/solr/vufind/website/conf/solrconfig.xml @@ -0,0 +1,659 @@ +<?xml version="1.0" encoding="UTF-8" ?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + +<config> + <!-- Set this to 'false' if you want solr to continue working after it has + encountered an severe configuration error. In a production environment, + you may want solr to keep working even if one handler is mis-configured. + + You may also set this to false using by setting the system property: + -Dsolr.abortOnConfigurationError=false + --> + <abortOnConfigurationError>${solr.abortOnConfigurationError:false}</abortOnConfigurationError> + + <!-- Controls what version of Lucene various components of Solr + adhere to. Generally, you want to use the latest version to + get all bug fixes and improvements. It is highly recommended + that you fully re-index after changing this setting as it can + affect both how text is indexed and queried. + --> + <luceneMatchVersion>7.7.3</luceneMatchVersion> + + <!-- Use the classic schema style by default for VuFind --> + <schemaFactory class="ClassicIndexSchemaFactory"/> + + <!-- Used to specify an alternate directory to hold all index data + other than the default ./data under the Solr home. + If replication is in use, this should match the replication configuration. --> + <dataDir>${solr.solr.home:./solr}/website</dataDir> + + + <indexConfig> + + <useCompoundFile>false</useCompoundFile> + + + <!-- ramBufferSizeMB sets the amount of RAM that may be used by Lucene + indexing for buffering added documents and deletions before they are + flushed to the Directory. + maxBufferedDocs sets a limit on the number of documents buffered + before flushing. + If both ramBufferSizeMB and maxBufferedDocs is set, then + Lucene will flush based on whichever limit is hit first. --> + <ramBufferSizeMB>100</ramBufferSizeMB> + <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> + + + <!-- Commit Deletion Policy + + Custom deletion policies can be specified here. The class must + implement org.apache.lucene.index.IndexDeletionPolicy. + + http://lucene.apache.org/java/3_5_0/api/core/org/apache/lucene/index/IndexDeletionPolicy.html + + The default Solr IndexDeletionPolicy implementation supports + deleting index commit points on number of commits, age of + commit point and optimized status. + + The latest commit point should always be preserved regardless + of the criteria. + --> + <deletionPolicy class="solr.SolrDeletionPolicy"> + <!-- Keep only optimized commit points --> + <str name="keepOptimizedOnly">false</str> + <!-- The maximum number of commit points to be kept --> + <str name="maxCommitsToKeep">1</str> + <!-- + Delete all commit points once they have reached the given age. + Supports DateMathParser syntax e.g. + + <str name="maxCommitAge">30MINUTES</str> + <str name="maxCommitAge">1DAY</str> + --> + </deletionPolicy> + + </indexConfig> + + <!-- Enables JMX if and only if an existing MBeanServer is found, use + this if you want to configure JMX through JVM parameters. Remove + this to disable exposing Solr configuration and statistics to JMX. + + If you want to connect to a particular server, specify the agentId + e.g. <jmx agentId="myAgent" /> + + If you want to start a new MBeanServer, specify the serviceUrl + e.g <jmx serviceurl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr" /> + + For more details see http://wiki.apache.org/solr/SolrJmx + --> + <jmx /> + + <!-- the default high-performance update handler --> + <updateHandler class="solr.DirectUpdateHandler2"> + + + <!-- Enables a transaction log, used for real-time get, durability, and + and solr cloud replica recovery. The log can grow as big as + uncommitted changes to the index, so use of a hard autoCommit + is recommended (see below). + "dir" - the target directory for transaction logs, defaults to the + solr data directory. --> + <updateLog> + <str name="dir">${solr.ulog.dir:}</str> + </updateLog> + + <!-- A prefix of "solr." for class names is an alias that + causes solr to search appropriate packages, including + org.apache.solr.(search|update|request|core|analysis) + --> + + <!-- Perform a <commit/> automatically under certain conditions: + maxDocs - number of updates since last commit is greater than this + maxTime - oldest uncommited update (in ms) is this long ago + --> + <autoCommit> + <maxTime>15000</maxTime> + <openSearcher>false</openSearcher> + </autoCommit> + + <!-- The RunExecutableListener executes an external command. + exe - the name of the executable to run + dir - dir to use as the current working directory. default="." + wait - the calling thread waits until the executable returns. default="true" + args - the arguments to pass to the program. default=nothing + env - environment variables to set. default=nothing + --> + <!-- A postCommit event is fired after every commit or optimize command + <listener event="postCommit" class="solr.RunExecutableListener"> + <str name="exe">solr/bin/snapshooter</str> + <str name="dir">.</str> + <bool name="wait">true</bool> + <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> + <arr name="env"> <str>MYVAR=val1</str> </arr> + </listener> + --> + <!-- A postOptimize event is fired only after every optimize command, useful + in conjunction with index distribution to only distribute optimized indices + <listener event="postOptimize" class="solr.RunExecutableListener"> + <str name="exe">snapshooter</str> + <str name="dir">solr/bin</str> + <bool name="wait">true</bool> + </listener> + --> + + </updateHandler> + + + <query> + <!-- Maximum number of clauses in a boolean query... can affect + range or prefix queries that expand to big boolean + queries. An exception is thrown if exceeded. --> + <maxBooleanClauses>1024</maxBooleanClauses> + + + <!-- Cache used by SolrIndexSearcher for filters (DocSets), + unordered sets of *all* documents that match a query. + When a new searcher is opened, its caches may be prepopulated + or "autowarmed" using data from caches in the old searcher. + autowarmCount is the number of items to prepopulate. For LRUCache, + the autowarmed items will be the most recently accessed items. + Parameters: + class - the SolrCache implementation (currently only LRUCache) + size - the maximum number of entries in the cache + initialSize - the initial capacity (number of entries) of + the cache. (seel java.util.HashMap) + autowarmCount - the number of entries to prepopulate from + and old cache. + --> + + <!-- VuFind uses the default Solr settings for filterCache, which should produce a high + hit rate. You can carefully adjust the filterCache and check the hit ratio in + admin>plugins>stats --> + <filterCache + class="solr.FastLRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- queryResultCache caches results of searches - ordered lists of + document ids (DocList) based on a query, a sort, and the range + of documents requested. --> + <queryResultCache + class="solr.LRUCache" + size="512" + initialSize="512" + autowarmCount="256"/> + + <!-- documentCache caches Lucene Document objects (the stored fields for each document). + Since Lucene internal document ids are transient, this cache will not be autowarmed. --> + <documentCache + class="solr.LRUCache" + size="16384" + initialSize="16384"/> + + <!-- If true, stored fields that are not requested will be loaded lazily. + + This can result in a significant speed improvement if the usual case is to + not load all stored fields, especially if the skipped fields are large compressed + text fields. + --> + <enableLazyFieldLoading>true</enableLazyFieldLoading> + + <!-- Example of a generic cache. These caches may be accessed by name + through SolrIndexSearcher.getCache(),cacheLookup(), and cacheInsert(). + The purpose is to enable easy caching of user/application level data. + The regenerator argument should be specified as an implementation + of solr.search.CacheRegenerator if autowarming is desired. --> + <!-- + <cache name="myUserCache" + class="solr.LRUCache" + size="4096" + initialSize="1024" + autowarmCount="1024" + regenerator="org.mycompany.mypackage.MyRegenerator" + /> + --> + + <!-- An optimization that attempts to use a filter to satisfy a search. + If the requested sort does not include score, then the filterCache + will be checked for a filter matching the query. If found, the filter + will be used as the source of document ids, and then the sort will be + applied to that. + <useFilterForSortedQuery>true</useFilterForSortedQuery> + --> + + <!-- An optimization for use with the queryResultCache. When a search + is requested, a superset of the requested number of document ids + are collected. For example, if a search for a particular query + requests matching documents 10 through 19, and queryWindowSize is 50, + then documents 0 through 49 will be collected and cached. Any further + requests in that range can be satisfied via the cache. --> + <queryResultWindowSize>50</queryResultWindowSize> + + <!-- Maximum number of documents to cache for any entry in the + queryResultCache. --> + <queryResultMaxDocsCached>200</queryResultMaxDocsCached> + + <!-- This entry enables an int hash representation for filters (DocSets) + when the number of items in the set is less than maxSize. For smaller + sets, this representation is more memory efficient, more efficient to + iterate over, and faster to take intersections. --> + <HashDocSet maxSize="3000" loadFactor="0.75"/> + + <!-- a newSearcher event is fired whenever a new searcher is being prepared + and there is a current searcher handling requests (aka registered). --> + <!-- QuerySenderListener takes an array of NamedList and executes a + local query request for each NamedList in sequence. --> + <listener event="newSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> + <str name="q">science art business engineering history</str> + <str name="start">0</str> + <str name="rows">10</str> + </lst> + </arr> + </listener> + + <!-- a firstSearcher event is fired whenever a new searcher is being + prepared but there is no current registered searcher to handle + requests or to gain autowarming data from. --> + <listener event="firstSearcher" class="solr.QuerySenderListener"> + <arr name="queries"> + <lst> + <str name="q">science art business engineering history</str> + <!-- + <str name="facet.field">format</str> + <str name="fq">format:book</str> + --> + </lst> + </arr> + </listener> + + <!-- If a search request comes in and there is no current registered searcher, + then immediately register the still warming searcher and use it. If + "false" then all requests will block until the first searcher is done + warming. --> + <useColdSearcher>false</useColdSearcher> + + <!-- Maximum number of searchers that may be warming in the background + concurrently. An error is returned if this limit is exceeded. Recommend + 1-2 for read-only slaves, higher for masters w/o cache warming. --> + <maxWarmingSearchers>2</maxWarmingSearchers> + + </query> + + <!-- + Let the dispatch filter handler /select?qt=XXX + handleSelect=true will use consistent error handling for /select and /update + handleSelect=false will use solr1.1 style error formatting + --> + <requestDispatcher handleSelect="true" > + <!--Make sure your system has some authentication before enabling remote streaming! --> + <requestParsers enableRemoteStreaming="true" + multipartUploadLimitInKB="2048000" + formdataUploadLimitInKB="2048"/> + + <!-- Set HTTP caching related parameters (for proxy caches and clients). + + To get the behaviour of Solr 1.2 (ie: no caching related headers) + use the never304="true" option and do not specify a value for + <cacheControl> + --> + <!-- <httpCaching never304="true"> --> + <httpCaching lastModifiedFrom="openTime" + etagSeed="Solr"> + <!-- lastModFrom="openTime" is the default, the Last-Modified value + (and validation against If-Modified-Since requests) will all be + relative to when the current Searcher was opened. + You can change it to lastModFrom="dirLastMod" if you want the + value to exactly correspond to when the physical index was last + modified. + + etagSeed="..." is an option you can change to force the ETag + header (and validation against If-None-Match requests) to be + different even if the index has not changed (ie: when making + significant changes to your config file) + + lastModifiedFrom and etagSeed are both ignored if you use the + never304="true" option. + --> + <!-- If you include a <cacheControl> directive, it will be used to + generate a Cache-Control header, as well as an Expires header + if the value contains "max-age=" + + By default, no Cache-Control header is generated. + + You can use the <cacheControl> option even if you have set + never304="true" + --> + <!-- <cacheControl>max-age=30, public</cacheControl> --> + </httpCaching> + </requestDispatcher> + + + <!-- requestHandler plugins... incoming queries will be dispatched to the + correct handler based on the path or the qt (query type) param. + Names starting with a '/' are accessed with the a path equal to the + registered name. Names without a leading '/' are accessed with: + http://host/app/select?qt=name + If no qt is defined, the requestHandler that declares default="true" + will be used. + --> + <requestHandler name="standard" class="solr.SearchHandler" default="true"> + <!-- default values for query parameters --> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <!-- + <int name="rows">10</int> + <str name="fl">*</str> + <str name="version">2.1</str> + --> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.onlyMorePopular">true</str> + <str name="spellcheck.count">20</str> + <str name="q.op">AND</str> + <str name="df">fulltext</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <!-- the following two handlers will be used for eligible dismax searches defined + in searchspecs.yaml. The edismax handler will be used most of the time, unless + a specific configuration tells VuFind to use traditional dismax instead. You + can use these handler definitions to set global Dismax settings (e.g. mm / bf). + If you need different settings for different types of searches (e.g. Title vs. + Author), you can also configure individual settings in the searchspecs.yaml + file. + --> + <requestHandler name="dismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">dismax</str> + <str name="echoParams">explicit</str> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.onlyMorePopular">true</str> + <str name="spellcheck.count">20</str> + <str name="q.op">AND</str> + <str name="df">fulltext</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <requestHandler name="edismax" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="defType">edismax</str> + <str name="echoParams">explicit</str> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.onlyMorePopular">true</str> + <str name="spellcheck.count">20</str> + <str name="lowercaseOperators">false</str> + <str name="q.op">AND</str> + <str name="df">fulltext</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <searchComponent name="spellcheck" class="org.apache.solr.handler.component.SpellCheckComponent"> + <lst name="spellchecker"> + <str name="name">default</str> + <str name="field">spellingShingle</str> + <str name="accuracy">0.75</str> + <str name="spellcheckIndexDir">./spellShingle</str> + <str name="queryAnalyzerFieldType">textSpellShingle</str> + <str name="buildOnOptimize">true</str> + </lst> + <lst name="spellchecker"> + <str name="name">basicSpell</str> + <str name="field">spelling</str> + <str name="accuracy">0.75</str> + <str name="spellcheckIndexDir">./spellchecker</str> + <str name="queryAnalyzerFieldType">textSpell</str> + <str name="buildOnOptimize">true</str> + </lst> + </searchComponent> + <queryConverter name="queryConverter" class="org.apache.solr.spelling.SpellingQueryConverter"/> + + <!-- Search component for extracting terms (useful for sitemap generation) --> + <searchComponent name="terms" class="solr.TermsComponent"> + </searchComponent> + + <!-- + + Search components are registered to SolrCore and used by Search Handlers + + By default, the following components are available: + + <searchComponent name="query" class="org.apache.solr.handler.component.QueryComponent" /> + <searchComponent name="facet" class="org.apache.solr.handler.component.FacetComponent" /> + <searchComponent name="mlt" class="org.apache.solr.handler.component.MoreLikeThisComponent" /> + <searchComponent name="highlight" class="org.apache.solr.handler.component.HighlightComponent" /> + <searchComponent name="stats" class="org.apache.solr.handler.component.StatsComponent" /> + <searchComponent name="debug" class="org.apache.solr.handler.component.DebugComponent" /> + + If you register a searchComponent to one of the standard names, that will be used instead. + + --> + + <requestHandler name="/search" class="org.apache.solr.handler.component.SearchHandler"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <!-- + By default, this will register the following components: + + <arr name="components"> + <str>query</str> + <str>facet</str> + <str>mlt</str> + <str>highlight</str> + <str>debug</str> + </arr> + + To insert handlers before or after the 'standard' components, use: + + <arr name="first-components"> + <str>first</str> + </arr> + + <arr name="last-components"> + <str>last</str> + </arr> + + --> + <arr name="last-components"> + <str>spellcheck</str> + <str>elevator</str> + </arr> + </requestHandler> + + <!-- Request handler to extract terms (useful for sitemap generation) --> + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> + <arr name="components"> + <str>terms</str> + </arr> + </requestHandler> + + <searchComponent name="elevator" class="org.apache.solr.handler.component.QueryElevationComponent" > + <!-- pick a fieldType to analyze queries --> + <str name="queryFieldType">string</str> + <str name="config-file">elevate.xml</str> + </searchComponent> + + <requestHandler name="/elevate" class="org.apache.solr.handler.component.SearchHandler" startup="lazy"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + </lst> + <arr name="last-components"> + <str>elevator</str> + </arr> + </requestHandler> + + <!-- Field Analysis Request Handler + + RequestHandler that provides much the same functionality as + analysis.jsp. Provides the ability to specify multiple field + types and field names in the same request and outputs + index-time and query-time analysis for each of them. + + Request parameters are: + analysis.fieldname - field name whose analyzers are to be used + + analysis.fieldtype - field type whose analyzers are to be used + analysis.fieldvalue - text for index-time analysis + q (or analysis.q) - text for query time analysis + analysis.showmatch (true|false) - When set to true and when + query analysis is performed, the produced tokens of the + field value analysis will be marked as "matched" for every + token that is produces by the query analysis + --> + <requestHandler name="/analysis/field" + startup="lazy" + class="solr.FieldAnalysisRequestHandler" /> + + + <!-- Document Analysis Handler + + http://wiki.apache.org/solr/AnalysisRequestHandler + + An analysis handler that provides a breakdown of the analysis + process of provided documents. This handler expects a (single) + content stream with the following format: + + <docs> + <doc> + <field name="id">1</field> + <field name="name">The Name</field> + <field name="text">The Text Value</field> + </doc> + <doc>...</doc> + <doc>...</doc> + ... + </docs> + + Note: Each document must contain a field which serves as the + unique key. This key is used in the returned response to associate + an analysis breakdown to the analyzed document. + + Like the FieldAnalysisRequestHandler, this handler also supports + query analysis by sending either an "analysis.query" or "q" + request parameter that holds the query text to be analyzed. It + also supports the "analysis.showmatch" parameter which when set to + true, all field tokens that match the query tokens will be marked + as a "match". + --> + <requestHandler name="/analysis/document" + class="solr.DocumentAnalysisRequestHandler" + startup="lazy" /> + + + <!-- CSV update handler, loaded on demand --> + <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy" /> + + + <!-- ping/healthcheck --> + <requestHandler name="/admin/ping" class="PingRequestHandler"> + <lst name="defaults"> + <str name="qt">standard</str> + <str name="q">solrpingquery</str> + <str name="echoParams">all</str> + </lst> + </requestHandler> + + <!-- Echo the request contents back to the client --> + <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" > + <lst name="defaults"> + <str name="echoParams">explicit</str> <!-- for all params (including the default etc) use: 'all' --> + <str name="echoHandler">true</str> + </lst> + </requestHandler> + + <searchComponent class="solr.HighlightComponent" name="highlight"> + <highlighting> + <!-- Configure the standard fragmenter --> + <!-- This could most likely be commented out in the "default" case --> + <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true"> + <lst name="defaults"> + <int name="hl.fragsize">100</int> + </lst> + </fragmenter> + + <!-- A regular-expression-based fragmenter (f.i., for sentence extraction) --> + <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter"> + <lst name="defaults"> + <!-- slightly smaller fragsizes work better because of slop --> + <int name="hl.fragsize">70</int> + <!-- allow 50% slop on fragment sizes --> + <float name="hl.regex.slop">0.5</float> + <!-- a basic sentence pattern --> + <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> + </lst> + </fragmenter> + + <!-- Configure the standard formatter --> + <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true"> + <lst name="defaults"> + <str name="hl.simple.pre"><![CDATA[<em>]]></str> + <str name="hl.simple.post"><![CDATA[</em>]]></str> + </lst> + </formatter> + </highlighting> + </searchComponent> + + <!-- queryResponseWriter plugins... query responses will be written using the + writer specified by the 'wt' request parameter matching the name of a registered + writer. + The "default" writer is the default and will be used if 'wt' is not specified + in the request. XMLResponseWriter will be used if nothing is specified here. + The json, python, and ruby writers are also available by default. + + <queryResponseWriter name="xml" class="org.apache.solr.request.XMLResponseWriter" default="true"/> + <queryResponseWriter name="json" class="org.apache.solr.request.JSONResponseWriter"/> + <queryResponseWriter name="python" class="org.apache.solr.request.PythonResponseWriter"/> + <queryResponseWriter name="ruby" class="org.apache.solr.request.RubyResponseWriter"/> + <queryResponseWriter name="php" class="org.apache.solr.request.PHPResponseWriter"/> + <queryResponseWriter name="phps" class="org.apache.solr.request.PHPSerializedResponseWriter"/> + + <queryResponseWriter name="custom" class="com.example.MyResponseWriter"/> + --> + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="org.apache.solr.response.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + <!-- Mitigate CVE-2017-12629 --> + <queryParser name="xmlparser" class="solr.ExtendedDismaxQParserPlugin"/> + + <!-- config for the admin interface --> + <admin> + <defaultQuery>shakespeare</defaultQuery> + + <!-- configure a healthcheck file for servers behind a loadbalancer + <healthcheck type="file">server-enabled</healthcheck> + --> + </admin> + +</config> -- GitLab