From b19e665a8671c6bb719bc3a9067566bebd6ace66 Mon Sep 17 00:00:00 2001 From: Ere Maijala <ere.maijala@helsinki.fi> Date: Mon, 4 Feb 2019 21:54:10 +0200 Subject: [PATCH] Improvements for the OAI-PMH Server. (#1237) - Use cursorMark instead of deep paging with Solr - Make it possible to override the Server class - Ignore records that cannot be returned in the requested format --- module/VuFind/config/module.config.php | 2 + .../src/VuFind/Controller/OaiController.php | 9 +-- module/VuFind/src/VuFind/OAI/Server.php | 80 ++++++++++++------- module/VuFind/src/VuFind/OAI/Server/Auth.php | 10 +-- .../VuFind/src/VuFind/OAI/ServerFactory.php | 70 ++++++++++++++++ .../VuFind/src/VuFind/Search/Solr/Results.php | 43 ++++++++++ .../Solr/Response/Json/RecordCollection.php | 10 +++ 7 files changed, 180 insertions(+), 44 deletions(-) create mode 100644 module/VuFind/src/VuFind/OAI/ServerFactory.php diff --git a/module/VuFind/config/module.config.php b/module/VuFind/config/module.config.php index 6143ee81d03..2020b2179a2 100644 --- a/module/VuFind/config/module.config.php +++ b/module/VuFind/config/module.config.php @@ -361,6 +361,8 @@ $config = [ 'VuFind\Log\Logger' => 'VuFind\Log\LoggerFactory', 'VuFind\Mailer\Mailer' => 'VuFind\Mailer\Factory', 'VuFind\Net\IpAddressUtils' => 'Zend\ServiceManager\Factory\InvokableFactory', + 'VuFind\OAI\Server' => 'VuFind\OAI\ServerFactory', + 'VuFind\OAI\Server\Auth' => 'VuFind\OAI\ServerFactory', 'VuFind\QRCode\Loader' => 'VuFind\QRCode\LoaderFactory', 'VuFind\Recommend\PluginManager' => 'VuFind\ServiceManager\AbstractPluginManagerFactory', 'VuFind\Record\Cache' => 'VuFind\Record\CacheFactory', diff --git a/module/VuFind/src/VuFind/Controller/OaiController.php b/module/VuFind/src/VuFind/Controller/OaiController.php index 23df0700cde..50d1873fb0e 100644 --- a/module/VuFind/src/VuFind/Controller/OaiController.php +++ b/module/VuFind/src/VuFind/Controller/OaiController.php @@ -27,7 +27,6 @@ */ namespace VuFind\Controller; -use VuFind\Search\Results\PluginManager as ResultsManager; use VuFindApi\Formatter\RecordFormatter; /** @@ -102,12 +101,8 @@ class OaiController extends AbstractBase $this->getRequest()->getQuery()->toArray(), $this->getRequest()->getPost()->toArray() ); - $server = new $serverClass( - $this->serviceLocator->get(ResultsManager::class), - $this->serviceLocator->get(\VuFind\Record\Loader::class), - $this->serviceLocator->get(\VuFind\Db\Table\PluginManager::class), - $config, $baseURL, $params - ); + $server = $this->serviceLocator->get($serverClass); + $server->init($config, $baseURL, $params); $server->setRecordLinkHelper( $this->getViewRenderer()->plugin('recordLink') ); diff --git a/module/VuFind/src/VuFind/OAI/Server.php b/module/VuFind/src/VuFind/OAI/Server.php index 7b60a4a6735..e84dac9bd60 100644 --- a/module/VuFind/src/VuFind/OAI/Server.php +++ b/module/VuFind/src/VuFind/OAI/Server.php @@ -5,6 +5,7 @@ * PHP version 7 * * Copyright (C) Villanova University 2010. + * Copyright (C) The National Library of Finland 2018. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -22,6 +23,7 @@ * @category VuFind * @package OAI_Server * @author Demian Katz <demian.katz@villanova.edu> + * @author Ere Maijala <ere.maijala@helsinki.fi> * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License * @link https://vufind.org/wiki/development Wiki */ @@ -40,6 +42,7 @@ use VuFindApi\Formatter\RecordFormatter; * @category VuFind * @package OAI_Server * @author Demian Katz <demian.katz@villanova.edu> + * @author Ere Maijala <ere.maijala@helsinki.fi> * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License * @link https://vufind.org/wiki/development Wiki */ @@ -178,7 +181,7 @@ class Server */ protected $defaultQuery = ''; - /** + /* * Record formatter * * @var RecordFormatter @@ -200,19 +203,27 @@ class Server * retrieving records * @param \VuFind\Record\Loader $loader Record loader * @param \VuFind\Db\Table\PluginManager $tables Table manager - * @param \Zend\Config\Config $config VuFind configuration - * @param string $baseURL The base URL for the OAI - * server - * @param array $params The incoming OAI-PMH - * parameters (i.e. $_GET) */ public function __construct(\VuFind\Search\Results\PluginManager $results, - \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables, - \Zend\Config\Config $config, $baseURL, $params + \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables ) { $this->resultsManager = $results; $this->recordLoader = $loader; $this->tableManager = $tables; + } + + /** + * Initialize settings + * + * @param \Zend\Config\Config $config VuFind configuration + * @param string $baseURL The base URL for the OAI server + * @param array $params The incoming OAI-PMH parameters (i.e. + * $_GET) + * + * @return void + */ + public function init(\Zend\Config\Config $config, $baseURL, $params) + { $this->baseURL = $baseURL; $parts = parse_url($baseURL); $this->baseHostURL = $parts['scheme'] . '://' . $parts['host']; @@ -727,30 +738,36 @@ class Server } } - // Figure out how many Solr records we need to display (and where to start): - $solrOffset = ($currentCursor >= $deletedCount) - ? $currentCursor - $deletedCount : 0; - $solrLimit = ($params['cursor'] + $this->pageSize) - $currentCursor; + // Figure out how many non-deleted records we need to display: + $recordLimit = ($params['cursor'] + $this->pageSize) - $currentCursor; + $cursorMark = $params['cursorMark'] ?? ''; // Get non-deleted records from the Solr index: $set = $params['set'] ?? ''; $result = $this->listRecordsGetNonDeleted( - $from, $until, $solrOffset, $solrLimit, $set + $from, + $until, + $cursorMark, + $recordLimit, + $set ); $nonDeletedCount = $result->getResultTotal(); $format = $params['metadataPrefix']; foreach ($result->getResults() as $doc) { - if (!$this->attachNonDeleted($xml, $doc, $format, $headersOnly, $set)) { - $this->unexpectedError('Cannot load document'); - } + $this->attachNonDeleted($xml, $doc, $format, $headersOnly, $set); $currentCursor++; } + $nextCursorMark = $result->getCursorMark(); // If our cursor didn't reach the last record, we need a resumption token! $listSize = $deletedCount + $nonDeletedCount; - if ($listSize > $currentCursor) { - $this->saveResumptionToken($xml, $params, $currentCursor, $listSize); - } elseif ($solrOffset > 0) { + if ($listSize > $currentCursor + && ('' === $cursorMark || $nextCursorMark !== $cursorMark) + ) { + $this->saveResumptionToken( + $xml, $params, $currentCursor, $listSize, $nextCursorMark + ); + } elseif ($params['cursor'] > 0) { // If we reached the end of the list but there is more than one page, we // still need to display an empty <resumptionToken> tag: $token = $xml->addChild('resumptionToken'); @@ -863,15 +880,15 @@ class Server /** * Get an array of information on non-deleted records in the specified range. * - * @param int $from Start date. - * @param int $until End date. - * @param int $offset First record to obtain in full detail. - * @param int $limit Max number of full records to return. - * @param string $set Set to limit to (empty string for none). + * @param int $from Start date. + * @param int $until End date. + * @param string $cursorMark cursorMark for the position in the full result list. + * @param int $limit Max number of full records to return. + * @param string $set Set to limit to (empty string for none). * * @return \VuFind\Search\Base\Results Search result object. */ - protected function listRecordsGetNonDeleted($from, $until, $offset, $limit, + protected function listRecordsGetNonDeleted($from, $until, $cursorMark, $limit, $set = '' ) { // Set up search parameters: @@ -880,7 +897,7 @@ class Server $params->setLimit($limit); $params->getOptions()->disableHighlighting(); $params->getOptions()->spellcheckEnabled(false); - $params->setSort('last_indexed asc', true); + $params->setSort('last_indexed asc, id asc', true); // Construct a range query based on last indexed time: $params->setOverrideQuery( @@ -906,7 +923,8 @@ class Server } // Perform a Solr search: - $results->overrideStartRecord($offset + 1); + $results->overrideStartRecord(1); + $results->setCursorMark($cursorMark); // Return our results: return $results; @@ -1146,15 +1164,19 @@ class Server * @param int $currentCursor Current cursor position in search * results. * @param int $listSize Total size of search results. + * @param string $cursorMark cursorMark for the position in the full + * results list. * * @return void */ - protected function saveResumptionToken($xml, $params, $currentCursor, $listSize) - { + protected function saveResumptionToken($xml, $params, $currentCursor, $listSize, + $cursorMark + ) { // Save the old cursor position before overwriting it for storage in the // database! $oldCursor = $params['cursor']; $params['cursor'] = $currentCursor; + $params['cursorMark'] = $cursorMark; // Save everything to the database: $search = $this->tableManager->get('OaiResumption'); diff --git a/module/VuFind/src/VuFind/OAI/Server/Auth.php b/module/VuFind/src/VuFind/OAI/Server/Auth.php index 4211191e688..cc2cbb6a30c 100644 --- a/module/VuFind/src/VuFind/OAI/Server/Auth.php +++ b/module/VuFind/src/VuFind/OAI/Server/Auth.php @@ -49,17 +49,11 @@ class Auth extends Base * retrieving records * @param \VuFind\Record\Loader $loader Record loader * @param \VuFind\Db\Table\PluginManager $tables Table manager - * @param \Zend\Config\Config $config VuFind configuration - * @param string $baseURL The base URL for the OAI - * server - * @param array $params The incoming OAI-PMH - * parameters (i.e. $_GET) */ public function __construct(\VuFind\Search\Results\PluginManager $results, - \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables, - \Zend\Config\Config $config, $baseURL, $params + \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables ) { - parent::__construct($results, $loader, $tables, $config, $baseURL, $params); + parent::__construct($results, $loader, $tables); $this->core = 'authority'; $this->searchClassId = 'SolrAuth'; } diff --git a/module/VuFind/src/VuFind/OAI/ServerFactory.php b/module/VuFind/src/VuFind/OAI/ServerFactory.php new file mode 100644 index 00000000000..4d04b6cfa1e --- /dev/null +++ b/module/VuFind/src/VuFind/OAI/ServerFactory.php @@ -0,0 +1,70 @@ +<?php +/** + * OAI Server factory. + * + * PHP version 7 + * + * Copyright (C) Villanova University 2018. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * @category VuFind + * @package OAI_Server + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ +namespace VuFind\OAI; + +use Interop\Container\ContainerInterface; +use Zend\ServiceManager\Factory\FactoryInterface; + +/** + * OAI Server factory. + * + * @category VuFind + * @package OAI_Server + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development Wiki + */ +class ServerFactory implements FactoryInterface +{ + /** + * Create an object + * + * @param ContainerInterface $container Service manager + * @param string $requestedName Service being created + * @param null|array $options Extra options (optional) + * + * @return object + * + * @throws ServiceNotFoundException if unable to resolve the service. + * @throws ServiceNotCreatedException if an exception is raised when + * creating a service. + * @throws ContainerException if any other error occurs + */ + public function __invoke(ContainerInterface $container, $requestedName, + array $options = null + ) { + if (!empty($options)) { + throw new \Exception('Unexpected options sent to factory.'); + } + return new $requestedName( + $container->get(\VuFind\Search\Results\PluginManager::class), + $container->get(\VuFind\Record\Loader::class), + $container->get(\VuFind\Db\Table\PluginManager::class) + ); + } +} diff --git a/module/VuFind/src/VuFind/Search/Solr/Results.php b/module/VuFind/src/VuFind/Search/Solr/Results.php index 83bdb021061..5d725772acd 100644 --- a/module/VuFind/src/VuFind/Search/Solr/Results.php +++ b/module/VuFind/src/VuFind/Search/Solr/Results.php @@ -71,6 +71,15 @@ class Results extends \VuFind\Search\Base\Results */ protected $spellingProcessor = null; + /** + * CursorMark used for deep paging (e.g. OAI-PMH Server). + * Set to '*' to start paging a request and use the new value returned from the + * search request for the next request. + * + * @var null|string + */ + protected $cursorMark = null; + /** * Get spelling processor. * @@ -96,6 +105,28 @@ class Results extends \VuFind\Search\Base\Results $this->spellingProcessor = $processor; } + /** + * Get cursorMark. + * + * @return null|string + */ + public function getCursorMark() + { + return $this->cursorMark; + } + + /** + * Set cursorMark. + * + * @param null|string $cursorMark New cursor mark + * + * @return void + */ + public function setCursorMark($cursorMark) + { + $this->cursorMark = $cursorMark; + } + /** * Support method for performAndProcessSearch -- perform a search based on the * parameters passed to the object. @@ -109,6 +140,13 @@ class Results extends \VuFind\Search\Base\Results $offset = $this->getStartRecord() - 1; $params = $this->getParams()->getBackendParameters(); $searchService = $this->getSearchService(); + $cursorMark = $this->getCursorMark(); + if (null !== $cursorMark) { + $params->set('cursorMark', '' === $cursorMark ? '*' : $cursorMark); + // Override any default timeAllowed since it cannot be used with + // cursorMark + $params->set('timeAllowed', -1); + } try { $collection = $searchService @@ -137,6 +175,11 @@ class Results extends \VuFind\Search\Base\Results $this->suggestions = $this->getSpellingProcessor() ->getSuggestions($spellcheck, $this->getParams()->getQuery()); + // Update current cursorMark + if (null !== $cursorMark) { + $this->setCursorMark($collection->getCursorMark()); + } + // Construct record drivers for all the items in the response: $this->results = $collection->getRecords(); } diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php index 7ab8c7ae4e7..cd08495e0aa 100644 --- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php +++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php @@ -148,6 +148,16 @@ class RecordCollection extends AbstractRecordCollection return $this->response['highlighting'] ?? []; } + /** + * Get cursorMark. + * + * @return string + */ + public function getCursorMark() + { + return $this->response['nextCursorMark'] ?? ''; + } + /** * Get raw Solr input parameters from the response. * -- GitLab