From b19e665a8671c6bb719bc3a9067566bebd6ace66 Mon Sep 17 00:00:00 2001
From: Ere Maijala <ere.maijala@helsinki.fi>
Date: Mon, 4 Feb 2019 21:54:10 +0200
Subject: [PATCH] Improvements for the OAI-PMH Server. (#1237)

- Use cursorMark instead of deep paging with Solr
- Make it possible to override the Server class
- Ignore records that cannot be returned in the requested format
---
 module/VuFind/config/module.config.php        |  2 +
 .../src/VuFind/Controller/OaiController.php   |  9 +--
 module/VuFind/src/VuFind/OAI/Server.php       | 80 ++++++++++++-------
 module/VuFind/src/VuFind/OAI/Server/Auth.php  | 10 +--
 .../VuFind/src/VuFind/OAI/ServerFactory.php   | 70 ++++++++++++++++
 .../VuFind/src/VuFind/Search/Solr/Results.php | 43 ++++++++++
 .../Solr/Response/Json/RecordCollection.php   | 10 +++
 7 files changed, 180 insertions(+), 44 deletions(-)
 create mode 100644 module/VuFind/src/VuFind/OAI/ServerFactory.php

diff --git a/module/VuFind/config/module.config.php b/module/VuFind/config/module.config.php
index 6143ee81d03..2020b2179a2 100644
--- a/module/VuFind/config/module.config.php
+++ b/module/VuFind/config/module.config.php
@@ -361,6 +361,8 @@ $config = [
             'VuFind\Log\Logger' => 'VuFind\Log\LoggerFactory',
             'VuFind\Mailer\Mailer' => 'VuFind\Mailer\Factory',
             'VuFind\Net\IpAddressUtils' => 'Zend\ServiceManager\Factory\InvokableFactory',
+            'VuFind\OAI\Server' => 'VuFind\OAI\ServerFactory',
+            'VuFind\OAI\Server\Auth' => 'VuFind\OAI\ServerFactory',
             'VuFind\QRCode\Loader' => 'VuFind\QRCode\LoaderFactory',
             'VuFind\Recommend\PluginManager' => 'VuFind\ServiceManager\AbstractPluginManagerFactory',
             'VuFind\Record\Cache' => 'VuFind\Record\CacheFactory',
diff --git a/module/VuFind/src/VuFind/Controller/OaiController.php b/module/VuFind/src/VuFind/Controller/OaiController.php
index 23df0700cde..50d1873fb0e 100644
--- a/module/VuFind/src/VuFind/Controller/OaiController.php
+++ b/module/VuFind/src/VuFind/Controller/OaiController.php
@@ -27,7 +27,6 @@
  */
 namespace VuFind\Controller;
 
-use VuFind\Search\Results\PluginManager as ResultsManager;
 use VuFindApi\Formatter\RecordFormatter;
 
 /**
@@ -102,12 +101,8 @@ class OaiController extends AbstractBase
                 $this->getRequest()->getQuery()->toArray(),
                 $this->getRequest()->getPost()->toArray()
             );
-            $server = new $serverClass(
-                $this->serviceLocator->get(ResultsManager::class),
-                $this->serviceLocator->get(\VuFind\Record\Loader::class),
-                $this->serviceLocator->get(\VuFind\Db\Table\PluginManager::class),
-                $config, $baseURL, $params
-            );
+            $server = $this->serviceLocator->get($serverClass);
+            $server->init($config, $baseURL, $params);
             $server->setRecordLinkHelper(
                 $this->getViewRenderer()->plugin('recordLink')
             );
diff --git a/module/VuFind/src/VuFind/OAI/Server.php b/module/VuFind/src/VuFind/OAI/Server.php
index 7b60a4a6735..e84dac9bd60 100644
--- a/module/VuFind/src/VuFind/OAI/Server.php
+++ b/module/VuFind/src/VuFind/OAI/Server.php
@@ -5,6 +5,7 @@
  * PHP version 7
  *
  * Copyright (C) Villanova University 2010.
+ * Copyright (C) The National Library of Finland 2018.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2,
@@ -22,6 +23,7 @@
  * @category VuFind
  * @package  OAI_Server
  * @author   Demian Katz <demian.katz@villanova.edu>
+ * @author   Ere Maijala <ere.maijala@helsinki.fi>
  * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
  * @link     https://vufind.org/wiki/development Wiki
  */
@@ -40,6 +42,7 @@ use VuFindApi\Formatter\RecordFormatter;
  * @category VuFind
  * @package  OAI_Server
  * @author   Demian Katz <demian.katz@villanova.edu>
+ * @author   Ere Maijala <ere.maijala@helsinki.fi>
  * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
  * @link     https://vufind.org/wiki/development Wiki
  */
@@ -178,7 +181,7 @@ class Server
      */
     protected $defaultQuery = '';
 
-    /**
+    /*
      * Record formatter
      *
      * @var RecordFormatter
@@ -200,19 +203,27 @@ class Server
      * retrieving records
      * @param \VuFind\Record\Loader                $loader  Record loader
      * @param \VuFind\Db\Table\PluginManager       $tables  Table manager
-     * @param \Zend\Config\Config                  $config  VuFind configuration
-     * @param string                               $baseURL The base URL for the OAI
-     * server
-     * @param array                                $params  The incoming OAI-PMH
-     * parameters (i.e. $_GET)
      */
     public function __construct(\VuFind\Search\Results\PluginManager $results,
-        \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables,
-        \Zend\Config\Config $config, $baseURL, $params
+        \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables
     ) {
         $this->resultsManager = $results;
         $this->recordLoader = $loader;
         $this->tableManager = $tables;
+    }
+
+    /**
+     * Initialize settings
+     *
+     * @param \Zend\Config\Config $config  VuFind configuration
+     * @param string              $baseURL The base URL for the OAI server
+     * @param array               $params  The incoming OAI-PMH parameters (i.e.
+     * $_GET)
+     *
+     * @return void
+     */
+    public function init(\Zend\Config\Config $config, $baseURL, $params)
+    {
         $this->baseURL = $baseURL;
         $parts = parse_url($baseURL);
         $this->baseHostURL = $parts['scheme'] . '://' . $parts['host'];
@@ -727,30 +738,36 @@ class Server
             }
         }
 
-        // Figure out how many Solr records we need to display (and where to start):
-        $solrOffset = ($currentCursor >= $deletedCount)
-            ? $currentCursor - $deletedCount : 0;
-        $solrLimit = ($params['cursor'] + $this->pageSize) - $currentCursor;
+        // Figure out how many non-deleted records we need to display:
+        $recordLimit = ($params['cursor'] + $this->pageSize) - $currentCursor;
+        $cursorMark = $params['cursorMark'] ?? '';
 
         // Get non-deleted records from the Solr index:
         $set = $params['set'] ?? '';
         $result = $this->listRecordsGetNonDeleted(
-            $from, $until, $solrOffset, $solrLimit, $set
+            $from,
+            $until,
+            $cursorMark,
+            $recordLimit,
+            $set
         );
         $nonDeletedCount = $result->getResultTotal();
         $format = $params['metadataPrefix'];
         foreach ($result->getResults() as $doc) {
-            if (!$this->attachNonDeleted($xml, $doc, $format, $headersOnly, $set)) {
-                $this->unexpectedError('Cannot load document');
-            }
+            $this->attachNonDeleted($xml, $doc, $format, $headersOnly, $set);
             $currentCursor++;
         }
+        $nextCursorMark = $result->getCursorMark();
 
         // If our cursor didn't reach the last record, we need a resumption token!
         $listSize = $deletedCount + $nonDeletedCount;
-        if ($listSize > $currentCursor) {
-            $this->saveResumptionToken($xml, $params, $currentCursor, $listSize);
-        } elseif ($solrOffset > 0) {
+        if ($listSize > $currentCursor
+            && ('' === $cursorMark || $nextCursorMark !== $cursorMark)
+        ) {
+            $this->saveResumptionToken(
+                $xml, $params, $currentCursor, $listSize, $nextCursorMark
+            );
+        } elseif ($params['cursor'] > 0) {
             // If we reached the end of the list but there is more than one page, we
             // still need to display an empty <resumptionToken> tag:
             $token = $xml->addChild('resumptionToken');
@@ -863,15 +880,15 @@ class Server
     /**
      * Get an array of information on non-deleted records in the specified range.
      *
-     * @param int    $from   Start date.
-     * @param int    $until  End date.
-     * @param int    $offset First record to obtain in full detail.
-     * @param int    $limit  Max number of full records to return.
-     * @param string $set    Set to limit to (empty string for none).
+     * @param int    $from       Start date.
+     * @param int    $until      End date.
+     * @param string $cursorMark cursorMark for the position in the full result list.
+     * @param int    $limit      Max number of full records to return.
+     * @param string $set        Set to limit to (empty string for none).
      *
      * @return \VuFind\Search\Base\Results Search result object.
      */
-    protected function listRecordsGetNonDeleted($from, $until, $offset, $limit,
+    protected function listRecordsGetNonDeleted($from, $until, $cursorMark, $limit,
         $set = ''
     ) {
         // Set up search parameters:
@@ -880,7 +897,7 @@ class Server
         $params->setLimit($limit);
         $params->getOptions()->disableHighlighting();
         $params->getOptions()->spellcheckEnabled(false);
-        $params->setSort('last_indexed asc', true);
+        $params->setSort('last_indexed asc, id asc', true);
 
         // Construct a range query based on last indexed time:
         $params->setOverrideQuery(
@@ -906,7 +923,8 @@ class Server
         }
 
         // Perform a Solr search:
-        $results->overrideStartRecord($offset + 1);
+        $results->overrideStartRecord(1);
+        $results->setCursorMark($cursorMark);
 
         // Return our results:
         return $results;
@@ -1146,15 +1164,19 @@ class Server
      * @param int              $currentCursor Current cursor position in search
      * results.
      * @param int              $listSize      Total size of search results.
+     * @param string           $cursorMark    cursorMark for the position in the full
+     * results list.
      *
      * @return void
      */
-    protected function saveResumptionToken($xml, $params, $currentCursor, $listSize)
-    {
+    protected function saveResumptionToken($xml, $params, $currentCursor, $listSize,
+        $cursorMark
+    ) {
         // Save the old cursor position before overwriting it for storage in the
         // database!
         $oldCursor = $params['cursor'];
         $params['cursor'] = $currentCursor;
+        $params['cursorMark'] = $cursorMark;
 
         // Save everything to the database:
         $search = $this->tableManager->get('OaiResumption');
diff --git a/module/VuFind/src/VuFind/OAI/Server/Auth.php b/module/VuFind/src/VuFind/OAI/Server/Auth.php
index 4211191e688..cc2cbb6a30c 100644
--- a/module/VuFind/src/VuFind/OAI/Server/Auth.php
+++ b/module/VuFind/src/VuFind/OAI/Server/Auth.php
@@ -49,17 +49,11 @@ class Auth extends Base
      * retrieving records
      * @param \VuFind\Record\Loader                $loader  Record loader
      * @param \VuFind\Db\Table\PluginManager       $tables  Table manager
-     * @param \Zend\Config\Config                  $config  VuFind configuration
-     * @param string                               $baseURL The base URL for the OAI
-     * server
-     * @param array                                $params  The incoming OAI-PMH
-     * parameters (i.e. $_GET)
      */
     public function __construct(\VuFind\Search\Results\PluginManager $results,
-        \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables,
-        \Zend\Config\Config $config, $baseURL, $params
+        \VuFind\Record\Loader $loader, \VuFind\Db\Table\PluginManager $tables
     ) {
-        parent::__construct($results, $loader, $tables, $config, $baseURL, $params);
+        parent::__construct($results, $loader, $tables);
         $this->core = 'authority';
         $this->searchClassId = 'SolrAuth';
     }
diff --git a/module/VuFind/src/VuFind/OAI/ServerFactory.php b/module/VuFind/src/VuFind/OAI/ServerFactory.php
new file mode 100644
index 00000000000..4d04b6cfa1e
--- /dev/null
+++ b/module/VuFind/src/VuFind/OAI/ServerFactory.php
@@ -0,0 +1,70 @@
+<?php
+/**
+ * OAI Server factory.
+ *
+ * PHP version 7
+ *
+ * Copyright (C) Villanova University 2018.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * @category VuFind
+ * @package  OAI_Server
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     https://vufind.org/wiki/development Wiki
+ */
+namespace VuFind\OAI;
+
+use Interop\Container\ContainerInterface;
+use Zend\ServiceManager\Factory\FactoryInterface;
+
+/**
+ * OAI Server factory.
+ *
+ * @category VuFind
+ * @package  OAI_Server
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     https://vufind.org/wiki/development Wiki
+ */
+class ServerFactory implements FactoryInterface
+{
+    /**
+     * Create an object
+     *
+     * @param ContainerInterface $container     Service manager
+     * @param string             $requestedName Service being created
+     * @param null|array         $options       Extra options (optional)
+     *
+     * @return object
+     *
+     * @throws ServiceNotFoundException if unable to resolve the service.
+     * @throws ServiceNotCreatedException if an exception is raised when
+     * creating a service.
+     * @throws ContainerException if any other error occurs
+     */
+    public function __invoke(ContainerInterface $container, $requestedName,
+        array $options = null
+    ) {
+        if (!empty($options)) {
+            throw new \Exception('Unexpected options sent to factory.');
+        }
+        return new $requestedName(
+            $container->get(\VuFind\Search\Results\PluginManager::class),
+            $container->get(\VuFind\Record\Loader::class),
+            $container->get(\VuFind\Db\Table\PluginManager::class)
+        );
+    }
+}
diff --git a/module/VuFind/src/VuFind/Search/Solr/Results.php b/module/VuFind/src/VuFind/Search/Solr/Results.php
index 83bdb021061..5d725772acd 100644
--- a/module/VuFind/src/VuFind/Search/Solr/Results.php
+++ b/module/VuFind/src/VuFind/Search/Solr/Results.php
@@ -71,6 +71,15 @@ class Results extends \VuFind\Search\Base\Results
      */
     protected $spellingProcessor = null;
 
+    /**
+     * CursorMark used for deep paging (e.g. OAI-PMH Server).
+     * Set to '*' to start paging a request and use the new value returned from the
+     * search request for the next request.
+     *
+     * @var null|string
+     */
+    protected $cursorMark = null;
+
     /**
      * Get spelling processor.
      *
@@ -96,6 +105,28 @@ class Results extends \VuFind\Search\Base\Results
         $this->spellingProcessor = $processor;
     }
 
+    /**
+     * Get cursorMark.
+     *
+     * @return null|string
+     */
+    public function getCursorMark()
+    {
+        return $this->cursorMark;
+    }
+
+    /**
+     * Set cursorMark.
+     *
+     * @param null|string $cursorMark New cursor mark
+     *
+     * @return void
+     */
+    public function setCursorMark($cursorMark)
+    {
+        $this->cursorMark = $cursorMark;
+    }
+
     /**
      * Support method for performAndProcessSearch -- perform a search based on the
      * parameters passed to the object.
@@ -109,6 +140,13 @@ class Results extends \VuFind\Search\Base\Results
         $offset = $this->getStartRecord() - 1;
         $params = $this->getParams()->getBackendParameters();
         $searchService = $this->getSearchService();
+        $cursorMark = $this->getCursorMark();
+        if (null !== $cursorMark) {
+            $params->set('cursorMark', '' === $cursorMark ? '*' : $cursorMark);
+            // Override any default timeAllowed since it cannot be used with
+            // cursorMark
+            $params->set('timeAllowed', -1);
+        }
 
         try {
             $collection = $searchService
@@ -137,6 +175,11 @@ class Results extends \VuFind\Search\Base\Results
         $this->suggestions = $this->getSpellingProcessor()
             ->getSuggestions($spellcheck, $this->getParams()->getQuery());
 
+        // Update current cursorMark
+        if (null !== $cursorMark) {
+            $this->setCursorMark($collection->getCursorMark());
+        }
+
         // Construct record drivers for all the items in the response:
         $this->results = $collection->getRecords();
     }
diff --git a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php
index 7ab8c7ae4e7..cd08495e0aa 100644
--- a/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php
+++ b/module/VuFindSearch/src/VuFindSearch/Backend/Solr/Response/Json/RecordCollection.php
@@ -148,6 +148,16 @@ class RecordCollection extends AbstractRecordCollection
         return $this->response['highlighting'] ?? [];
     }
 
+    /**
+     * Get cursorMark.
+     *
+     * @return string
+     */
+    public function getCursorMark()
+    {
+        return $this->response['nextCursorMark'] ?? '';
+    }
+
     /**
      * Get raw Solr input parameters from the response.
      *
-- 
GitLab