Skip to content
Snippets Groups Projects
Commit eaad9b67 authored by Ere Maijala's avatar Ere Maijala Committed by Demian Katz
Browse files

Use cursorMark to fetch records when building hierarchies (#1090)

- Reduces memory pressure on Solr
- Improved speed (at least under some circumstances)
parent ab8e86c6
Branches
Tags
No related merge requests found
...@@ -427,6 +427,11 @@ timeout = 30 ...@@ -427,6 +427,11 @@ timeout = 30
; You can choose dismax for standard Dismax (the default) or edismax for Extended ; You can choose dismax for standard Dismax (the default) or edismax for Extended
; Dismax, or you can configure your own custom handler in solrconfig.xml. ; Dismax, or you can configure your own custom handler in solrconfig.xml.
default_dismax_handler = dismax default_dismax_handler = dismax
; This is the number of records to retrieve in a batch e.g. when building a record
; hierarchy. A higher number results in fewer round-trips but may increase Solr's
; memory usage. Default is 1000.
;cursor_batch_size = 1000
; Enable/Disable searching reserves using the "reserves" Solr core. When enabling ; Enable/Disable searching reserves using the "reserves" Solr core. When enabling
; this feature, you need to run the util/index_reserves.php script to populate the ; this feature, you need to run the util/index_reserves.php script to populate the
......
...@@ -60,13 +60,17 @@ class Factory ...@@ -60,13 +60,17 @@ class Factory
$filters = isset($hierarchyFilters->HierarchyTree->filterQueries) $filters = isset($hierarchyFilters->HierarchyTree->filterQueries)
? $hierarchyFilters->HierarchyTree->filterQueries->toArray() ? $hierarchyFilters->HierarchyTree->filterQueries->toArray()
: []; : [];
$config = $sm->getServiceLocator()->get('VuFind\Config')
->get('config');
$batchSize = isset($config->Index->cursor_batch_size)
? $config->Index->cursor_batch_size : 1000;
$solr = $sm->getServiceLocator()->get('VuFind\Search\BackendManager') $solr = $sm->getServiceLocator()->get('VuFind\Search\BackendManager')
->get('Solr')->getConnector(); ->get('Solr')->getConnector();
$formatterManager = $sm->getServiceLocator() $formatterManager = $sm->getServiceLocator()
->get('VuFind\HierarchyTreeDataFormatterPluginManager'); ->get('VuFind\HierarchyTreeDataFormatterPluginManager');
return new Solr( return new Solr(
$solr, $formatterManager, rtrim($cacheDir, '/') . '/hierarchy', $solr, $formatterManager, rtrim($cacheDir, '/') . '/hierarchy',
$filters $filters, $batchSize
); );
} }
} }
...@@ -73,6 +73,13 @@ class Solr extends AbstractBase ...@@ -73,6 +73,13 @@ class Solr extends AbstractBase
*/ */
protected $filters = []; protected $filters = [];
/**
* Record batch size
*
* @var int
*/
protected $batchSize = 1000;
/** /**
* Constructor. * Constructor.
* *
...@@ -80,9 +87,10 @@ class Solr extends AbstractBase ...@@ -80,9 +87,10 @@ class Solr extends AbstractBase
* @param FormatterManager $fm Formatter manager * @param FormatterManager $fm Formatter manager
* @param string $cacheDir Directory to hold cache results (optional) * @param string $cacheDir Directory to hold cache results (optional)
* @param array $filters Filters to apply to Solr tree queries * @param array $filters Filters to apply to Solr tree queries
* @param int $batchSize Number of records retrieved in a batch
*/ */
public function __construct(Connector $connector, FormatterManager $fm, public function __construct(Connector $connector, FormatterManager $fm,
$cacheDir = null, $filters = [] $cacheDir = null, $filters = [], $batchSize = 1000
) { ) {
$this->solrConnector = $connector; $this->solrConnector = $connector;
$this->formatterManager = $fm; $this->formatterManager = $fm;
...@@ -90,6 +98,7 @@ class Solr extends AbstractBase ...@@ -90,6 +98,7 @@ class Solr extends AbstractBase
$this->cacheDir = rtrim($cacheDir, '/'); $this->cacheDir = rtrim($cacheDir, '/');
} }
$this->filters = $filters; $this->filters = $filters;
$this->batchSize = $batchSize;
} }
/** /**
...@@ -119,21 +128,43 @@ class Solr extends AbstractBase ...@@ -119,21 +128,43 @@ class Solr extends AbstractBase
*/ */
protected function searchSolr($q, $rows = 1073741823) protected function searchSolr($q, $rows = 1073741823)
{ {
$params = new ParamBag( $prevCursorMark = '';
[ $cursorMark = '*';
'q' => [$q], $records = [];
'fq' => $this->filters, while ($cursorMark !== $prevCursorMark) {
'hl' => ['false'], $params = new ParamBag(
'fl' => ['title,id,hierarchy_parent_id,hierarchy_top_id,' [
. 'is_hierarchy_id,hierarchy_sequence,title_in_hierarchy'], 'q' => [$q],
'wt' => ['json'], 'fq' => $this->filters,
'json.nl' => ['arrarr'], 'hl' => ['false'],
'rows' => [$rows], // Integer max 'spellcheck' => ['false'],
'start' => [0] 'fl' => ['title,id,hierarchy_parent_id,hierarchy_top_id,'
] . 'is_hierarchy_id,hierarchy_sequence,title_in_hierarchy'],
); 'wt' => ['json'],
$response = $this->solrConnector->search($params); 'json.nl' => ['arrarr'],
return json_decode($response); 'rows' => [min([$this->batchSize, $rows])],
// Start is always 0 when using cursorMark
'start' => [0],
// Sort is required
'sort' => ['id asc'],
// Override any default timeAllowed since it cannot be used with
// cursorMark
'timeAllowed' => -1,
'cursorMark' => $cursorMark
]
);
$results = json_decode($this->solrConnector->search($params));
if (empty($results->response->docs)) {
break;
}
$records = array_merge($records, $results->response->docs);
if (count($records) >= $rows) {
break;
}
$prevCursorMark = $cursorMark;
$cursorMark = $results->nextCursorMark;
}
return $records;
} }
/** /**
...@@ -155,12 +186,12 @@ class Solr extends AbstractBase ...@@ -155,12 +186,12 @@ class Solr extends AbstractBase
} }
$lastId = $id; $lastId = $id;
$results = $this->searchSolr('hierarchy_top_id:"' . $id . '"'); $records = $this->searchSolr('hierarchy_top_id:"' . $id . '"');
if ($results->response->numFound < 1) { if (!$records) {
return []; return [];
} }
$map = [$id => []]; $map = [$id => []];
foreach ($results->response->docs as $current) { foreach ($records as $current) {
$parents = isset($current->hierarchy_parent_id) $parents = isset($current->hierarchy_parent_id)
? $current->hierarchy_parent_id : []; ? $current->hierarchy_parent_id : [];
foreach ($parents as $parentId) { foreach ($parents as $parentId) {
...@@ -193,9 +224,8 @@ class Solr extends AbstractBase ...@@ -193,9 +224,8 @@ class Solr extends AbstractBase
} }
$lastId = $id; $lastId = $id;
$recordResults = $this->searchSolr('id:"' . $id . '"', 1); $records = $this->searchSolr('id:"' . $id . '"', 1);
$record = isset($recordResults->response->docs[0]) $record = $records ? $records[0] : false;
? $recordResults->response->docs[0] : false;
return $record; return $record;
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment