From 48bf74357a24b684d8fc47c5630498d816eb49ad Mon Sep 17 00:00:00 2001
From: Ere Maijala <ere.maijala@helsinki.fi>
Date: Thu, 6 Feb 2020 16:36:15 +0200
Subject: [PATCH] Make the cursorMark based sitemap generation use the search
 service. (#1486)

- Includes refactoring to build \VuFind\Sitemap\Generator through the service manager.
---
 .../VuFind/src/VuFind/Sitemap/Generator.php   | 83 ++++++++++++++++---
 .../src/VuFind/Sitemap/GeneratorFactory.php   | 73 ++++++++++++++++
 module/VuFindConsole/config/module.config.php |  1 +
 .../Controller/UtilController.php             |  8 +-
 4 files changed, 148 insertions(+), 17 deletions(-)
 create mode 100644 module/VuFind/src/VuFind/Sitemap/GeneratorFactory.php

diff --git a/module/VuFind/src/VuFind/Sitemap/Generator.php b/module/VuFind/src/VuFind/Sitemap/Generator.php
index 2d2a2bd464c..a4891a4cf92 100644
--- a/module/VuFind/src/VuFind/Sitemap/Generator.php
+++ b/module/VuFind/src/VuFind/Sitemap/Generator.php
@@ -29,7 +29,10 @@ namespace VuFind\Sitemap;
 
 use VuFind\Search\BackendManager;
 use VuFindSearch\Backend\Solr\Backend;
+use VuFindSearch\Backend\Solr\Response\Json\RecordCollectionFactory;
 use VuFindSearch\ParamBag;
+use VuFindSearch\Query\Query;
+use VuFindSearch\Service as SearchService;
 use Zend\Config\Config;
 use Zend\Console\Console;
 
@@ -51,6 +54,13 @@ class Generator
      */
     protected $backendManager;
 
+    /**
+     * Search service.
+     *
+     * @var SearchService
+     */
+    protected $searchService;
+
     /**
      * Base URL for site
      *
@@ -131,14 +141,17 @@ class Generator
     /**
      * Constructor
      *
-     * @param BackendManager $bm      Search backend
+     * @param BackendManager $bm      Search backend manaver
+     * @param SearchService  $ss      Search manager
      * @param string         $baseUrl VuFind base URL
      * @param Config         $config  Sitemap configuration settings
      */
-    public function __construct(BackendManager $bm, $baseUrl, Config $config)
-    {
+    public function __construct(BackendManager $bm, SearchService $ss, $baseUrl,
+        Config $config
+    ) {
         // Save incoming parameters:
         $this->backendManager = $bm;
+        $this->searchService = $ss;
         $this->baseUrl = $baseUrl;
         $this->config = $config;
         $this->baseSitemapUrl = empty($this->config->SitemapIndex->baseSitemapUrl)
@@ -255,7 +268,7 @@ class Generator
         // Display total elapsed time in verbose mode:
         if ($this->verbose) {
             Console::writeLine(
-                'Elapsed time (in seconds): ' . ($this->getTime() - $startTime)
+                'Elapsed time (in seconds): ' . round($this->getTime() - $startTime)
             );
         }
     }
@@ -285,6 +298,8 @@ class Generator
         $currentOffset = ($this->retrievalMode === 'terms') ? '' : '*';
         $recordCount = 0;
 
+        $this->setupBackend($backend);
+
         while (true) {
             // Get IDs and break out of the loop if we've run out:
             $result = $this->getIdsFromBackend($backend, $currentOffset);
@@ -320,6 +335,49 @@ class Generator
         return $currentPage;
     }
 
+    /**
+     * Set up the backend.
+     *
+     * @param Backend $backend Search backend
+     *
+     * @return void
+     */
+    protected function setupBackend(Backend $backend)
+    {
+        $method = $this->retrievalMode == 'terms'
+            ? 'setupBackendUsingTerms' : 'setupBackendUsingCursorMark';
+        return $this->$method($backend);
+    }
+
+    /**
+     * Set up the backend.
+     *
+     * @param Backend $backend Search backend
+     *
+     * @return void
+     */
+    protected function setupBackendUsingTerms(Backend $backend)
+    {
+    }
+
+    /**
+     * Set up the backend.
+     *
+     * @param Backend $backend Search backend
+     *
+     * @return void
+     */
+    protected function setupBackendUsingCursorMark(Backend $backend)
+    {
+        // Set up the record factory. We use a very simple factory since performance
+        // is important and we only need the identifier.
+        $recordFactory = function ($data) {
+            return new \VuFindSearch\Response\SimpleRecord($data);
+        };
+        $collectionFactory = new RecordCollectionFactory($recordFactory);
+        $backend->setRecordCollectionFactory($collectionFactory);
+    }
+
     /**
      * Retrieve a batch of IDs.
      *
@@ -375,7 +433,6 @@ class Generator
         $params = new ParamBag(
             [
                 'q' => '*:*',
-                'fl' => $key,
                 'rows' => $this->countPerPage,
                 'start' => 0, // Always 0 when using a cursorMark
                 'wt' => 'json',
@@ -386,13 +443,17 @@ class Generator
                 'cursorMark' => $cursorMark
             ]
         );
-        $raw = $connector->search($params);
-        $result = json_decode($raw);
-        $ids = [];
-        $nextOffset = $result->nextCursorMark;
-        foreach ($result->response->docs ?? [] as $doc) {
-            $ids[] = $doc->$key;
+        $results = $this->searchService->getIds(
+            $backend->getIdentifier(),
+            new Query('*:*'),
+            0,
+            $this->countPerPage,
+            $params
+        );
+        foreach ($results->getRecords() as $doc) {
+            $ids[] = $doc->get($key);
         }
+        $nextOffset = $results->getCursorMark();
         return compact('ids', 'nextOffset');
     }
 
diff --git a/module/VuFind/src/VuFind/Sitemap/GeneratorFactory.php b/module/VuFind/src/VuFind/Sitemap/GeneratorFactory.php
new file mode 100644
index 00000000000..d558ae57d32
--- /dev/null
+++ b/module/VuFind/src/VuFind/Sitemap/GeneratorFactory.php
@@ -0,0 +1,73 @@
+<?php
+/**
+ * Sitemap Generator factory.
+ *
+ * PHP version 7
+ *
+ * Copyright (C) Villanova University 2019.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ * @category VuFind
+ * @package  Service
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     https://vufind.org/wiki/development Wiki
+ */
+namespace VuFind\Sitemap;
+
+use Interop\Container\ContainerInterface;
+use Zend\ServiceManager\Factory\FactoryInterface;
+
+/**
+ * Sitemap Generator factory.
+ *
+ * @category VuFind
+ * @package  Service
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     https://vufind.org/wiki/development Wiki
+ */
+class GeneratorFactory implements FactoryInterface
+{
+    /**
+     * Create an object
+     *
+     * @param ContainerInterface $container     Service manager
+     * @param string             $requestedName Service being created
+     * @param null|array         $options       Extra options (optional)
+     *
+     * @return object
+     *
+     * @throws ServiceNotFoundException if unable to resolve the service.
+     * @throws ServiceNotCreatedException if an exception is raised when
+     * creating a service.
+     * @throws ContainerException if any other error occurs
+     */
+    public function __invoke(ContainerInterface $container, $requestedName,
+        array $options = null
+    ) {
+        if (!empty($options)) {
+            throw new \Exception('Unexpected options passed to factory.');
+        }
+        $configLoader = $container->get(\VuFind\Config\PluginManager::class);
+        $config = $configLoader->get('config');
+        return new $requestedName(
+            $container->get(\VuFind\Search\BackendManager::class),
+            $container->get(\VuFindSearch\Service::class),
+            $config->Site->url,
+            $configLoader->get('sitemap')
+        );
+    }
+}
diff --git a/module/VuFindConsole/config/module.config.php b/module/VuFindConsole/config/module.config.php
index b153af319f3..7f694e56608 100644
--- a/module/VuFindConsole/config/module.config.php
+++ b/module/VuFindConsole/config/module.config.php
@@ -42,6 +42,7 @@ $config = [
     ],
     'service_manager' => [
         'factories' => [
+            'VuFind\Sitemap\Generator' => 'VuFind\Sitemap\GeneratorFactory',
             'VuFindConsole\Generator\GeneratorTools' => 'VuFindConsole\Generator\GeneratorToolsFactory',
         ],
     ],
diff --git a/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php b/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php
index 94c2bbeb2b3..98db89ef69e 100644
--- a/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php
+++ b/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php
@@ -302,12 +302,8 @@ class UtilController extends AbstractBase
         }
 
         // Build sitemap and display appropriate warnings if needed:
-        $configLoader = $this->serviceLocator
-            ->get(\VuFind\Config\PluginManager::class);
-        $generator = new Sitemap(
-            $this->serviceLocator->get(\VuFind\Search\BackendManager::class),
-            $configLoader->get('config')->Site->url, $configLoader->get('sitemap')
-        );
+        $generator = $this->serviceLocator->get(\VuFind\Sitemap\Generator::class);
+        $request = $this->getRequest();
         $generator->setVerbose($request->getParam('verbose', false));
         if ($url = $request->getParam('baseurl', false)) {
             $generator->setBaseUrl($url);
-- 
GitLab