diff --git a/config/vufind/config.ini b/config/vufind/config.ini index c3a6cf56e5c5058d25daa7e3d14d5f7ab3377cf6..e590deb7a1009b734f916509dc136bf3ed732442 100644 --- a/config/vufind/config.ini +++ b/config/vufind/config.ini @@ -427,6 +427,11 @@ timeout = 30 ; You can choose dismax for standard Dismax (the default) or edismax for Extended ; Dismax, or you can configure your own custom handler in solrconfig.xml. default_dismax_handler = dismax +; This is the number of records to retrieve in a batch e.g. when building a record +; hierarchy. A higher number results in fewer round-trips but may increase Solr's +; memory usage. Default is 1000. +;cursor_batch_size = 1000 + ; Enable/Disable searching reserves using the "reserves" Solr core. When enabling ; this feature, you need to run the util/index_reserves.php script to populate the diff --git a/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Factory.php b/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Factory.php index 253b7e9943a9ffa734fb6cef4bdd2edb25f92a48..9d635ebf02ed80e62bea1f3cd5fc5cabaabadd29 100644 --- a/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Factory.php +++ b/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Factory.php @@ -60,13 +60,17 @@ class Factory $filters = isset($hierarchyFilters->HierarchyTree->filterQueries) ? $hierarchyFilters->HierarchyTree->filterQueries->toArray() : []; + $config = $sm->getServiceLocator()->get('VuFind\Config') + ->get('config'); + $batchSize = isset($config->Index->cursor_batch_size) + ? $config->Index->cursor_batch_size : 1000; $solr = $sm->getServiceLocator()->get('VuFind\Search\BackendManager') ->get('Solr')->getConnector(); $formatterManager = $sm->getServiceLocator() ->get('VuFind\HierarchyTreeDataFormatterPluginManager'); return new Solr( $solr, $formatterManager, rtrim($cacheDir, '/') . '/hierarchy', - $filters + $filters, $batchSize ); } } diff --git a/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Solr.php b/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Solr.php index eccc2ad439bf43f2b834c02cc3c21eedb452797e..84d8a4a098455e002b126d69a7cb06f7ece1c06b 100644 --- a/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Solr.php +++ b/module/VuFind/src/VuFind/Hierarchy/TreeDataSource/Solr.php @@ -73,6 +73,13 @@ class Solr extends AbstractBase */ protected $filters = []; + /** + * Record batch size + * + * @var int + */ + protected $batchSize = 1000; + /** * Constructor. * @@ -80,9 +87,10 @@ class Solr extends AbstractBase * @param FormatterManager $fm Formatter manager * @param string $cacheDir Directory to hold cache results (optional) * @param array $filters Filters to apply to Solr tree queries + * @param int $batchSize Number of records retrieved in a batch */ public function __construct(Connector $connector, FormatterManager $fm, - $cacheDir = null, $filters = [] + $cacheDir = null, $filters = [], $batchSize = 1000 ) { $this->solrConnector = $connector; $this->formatterManager = $fm; @@ -90,6 +98,7 @@ class Solr extends AbstractBase $this->cacheDir = rtrim($cacheDir, '/'); } $this->filters = $filters; + $this->batchSize = $batchSize; } /** @@ -119,21 +128,43 @@ class Solr extends AbstractBase */ protected function searchSolr($q, $rows = 1073741823) { - $params = new ParamBag( - [ - 'q' => [$q], - 'fq' => $this->filters, - 'hl' => ['false'], - 'fl' => ['title,id,hierarchy_parent_id,hierarchy_top_id,' - . 'is_hierarchy_id,hierarchy_sequence,title_in_hierarchy'], - 'wt' => ['json'], - 'json.nl' => ['arrarr'], - 'rows' => [$rows], // Integer max - 'start' => [0] - ] - ); - $response = $this->solrConnector->search($params); - return json_decode($response); + $prevCursorMark = ''; + $cursorMark = '*'; + $records = []; + while ($cursorMark !== $prevCursorMark) { + $params = new ParamBag( + [ + 'q' => [$q], + 'fq' => $this->filters, + 'hl' => ['false'], + 'spellcheck' => ['false'], + 'fl' => ['title,id,hierarchy_parent_id,hierarchy_top_id,' + . 'is_hierarchy_id,hierarchy_sequence,title_in_hierarchy'], + 'wt' => ['json'], + 'json.nl' => ['arrarr'], + 'rows' => [min([$this->batchSize, $rows])], + // Start is always 0 when using cursorMark + 'start' => [0], + // Sort is required + 'sort' => ['id asc'], + // Override any default timeAllowed since it cannot be used with + // cursorMark + 'timeAllowed' => -1, + 'cursorMark' => $cursorMark + ] + ); + $results = json_decode($this->solrConnector->search($params)); + if (empty($results->response->docs)) { + break; + } + $records = array_merge($records, $results->response->docs); + if (count($records) >= $rows) { + break; + } + $prevCursorMark = $cursorMark; + $cursorMark = $results->nextCursorMark; + } + return $records; } /** @@ -155,12 +186,12 @@ class Solr extends AbstractBase } $lastId = $id; - $results = $this->searchSolr('hierarchy_top_id:"' . $id . '"'); - if ($results->response->numFound < 1) { + $records = $this->searchSolr('hierarchy_top_id:"' . $id . '"'); + if (!$records) { return []; } $map = [$id => []]; - foreach ($results->response->docs as $current) { + foreach ($records as $current) { $parents = isset($current->hierarchy_parent_id) ? $current->hierarchy_parent_id : []; foreach ($parents as $parentId) { @@ -193,9 +224,8 @@ class Solr extends AbstractBase } $lastId = $id; - $recordResults = $this->searchSolr('id:"' . $id . '"', 1); - $record = isset($recordResults->response->docs[0]) - ? $recordResults->response->docs[0] : false; + $records = $this->searchSolr('id:"' . $id . '"', 1); + $record = $records ? $records[0] : false; return $record; }