From d7e6b4d32b437ab172971220afb6648cd9c26546 Mon Sep 17 00:00:00 2001
From: Ere Maijala <ere.maijala@helsinki.fi>
Date: Thu, 7 Jul 2016 22:04:07 +0300
Subject: [PATCH] Refactored search/session expiration to delete records in
 batches (#739)

- Avoid locking the tables for too long or creating a massive transaction in case the table has accumulated records for a longer time.

* Reverted deletion of getExpiredQuery() and added command line parameters for specifying batch size and sleep time.

* Don't sleep after last batch.
---
 module/VuFind/src/VuFind/Db/Table/Search.php  |  59 +++++++++
 module/VuFind/src/VuFind/Db/Table/Session.php |  59 +++++++++
 .../Controller/UtilController.php             | 124 ++++++++++++------
 3 files changed, 205 insertions(+), 37 deletions(-)

diff --git a/module/VuFind/src/VuFind/Db/Table/Search.php b/module/VuFind/src/VuFind/Db/Table/Search.php
index a397492448a..3a4df5cfecd 100644
--- a/module/VuFind/src/VuFind/Db/Table/Search.php
+++ b/module/VuFind/src/VuFind/Db/Table/Search.php
@@ -31,6 +31,7 @@ namespace VuFind\Db\Table;
 use minSO;
 use Zend\Db\Adapter\ParameterContainer;
 use Zend\Db\TableGateway\Feature;
+use Zend\Db\Sql\Expression;
 
 /**
  * Table Definition for search
@@ -131,6 +132,64 @@ class Search extends Gateway
         return $this->select($callback);
     }
 
+    /**
+     * Delete expired searches. Allows setting of 'from' and 'to' ID's so that rows
+     * can be deleted in small batches.
+     *
+     * @param int $daysOld Age in days of an "expired" search.
+     * @param int $idFrom  Lowest id of rows to delete.
+     * @param int $idTo    Highest id of rows to delete.
+     *
+     * @return int Number of rows deleted
+     */
+    public function deleteExpired($daysOld = 2, $idFrom = null, $idTo = null)
+    {
+        // Determine the expiration date:
+        $expireDate = date('Y-m-d H:i:s', time() - $daysOld * 24 * 60 * 60);
+        $callback = function ($select) use ($expireDate, $idFrom, $idTo) {
+            $where = $select->where->lessThan('created', $expireDate)
+                ->equalTo('saved', 0);
+            if (null !== $idFrom) {
+                $where->and->greaterThanOrEqualTo('id', $idFrom);
+            }
+            if (null !== $idTo) {
+                $where->and->lessThanOrEqualTo('id', $idTo);
+            }
+        };
+        return $this->delete($callback);
+    }
+
+    /**
+     * Get the lowest id and highest id for expired searches.
+     *
+     * @param int $daysOld Age in days of an "expired" search.
+     *
+     * @return array|bool Array of lowest id and highest id or false if no expired
+     * records found
+     */
+    public function getExpiredIdRange($daysOld = 2)
+    {
+        // Determine the expiration date:
+        $expireDate = date('Y-m-d H:i:s', time() - $daysOld * 24 * 60 * 60);
+        $callback = function ($select) use ($expireDate) {
+            $select->where->lessThan('created', $expireDate)->equalTo('saved', 0);
+        };
+        $select = $this->getSql()->select();
+        $select->columns(
+            [
+                'id' => new Expression('1'), // required for TableGateway
+                'minId' => new Expression('MIN(id)'),
+                'maxId' => new Expression('MAX(id)'),
+            ]
+        );
+        $select->where($callback);
+        $result = $this->selectWith($select)->current();
+        if (null === $result->minId) {
+            return false;
+        }
+        return [$result->minId, $result->maxId];
+    }
+
     /**
      * Get a query representing expired searches (this can be passed
      * to select() or delete() for further processing).
diff --git a/module/VuFind/src/VuFind/Db/Table/Session.php b/module/VuFind/src/VuFind/Db/Table/Session.php
index 3c555b30efb..e99ecce21c7 100644
--- a/module/VuFind/src/VuFind/Db/Table/Session.php
+++ b/module/VuFind/src/VuFind/Db/Table/Session.php
@@ -5,6 +5,7 @@
  * PHP version 5
  *
  * Copyright (C) Villanova University 2010.
+ * Copyright (C) The National Library of Finland 2016.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2,
@@ -22,11 +23,13 @@
  * @category VuFind
  * @package  Db_Table
  * @author   Demian Katz <demian.katz@villanova.edu>
+ * @author   Ere Maijala <ere.maijala@helsinki.fi>
  * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
  * @link     https://vufind.org Main Page
  */
 namespace VuFind\Db\Table;
 use VuFind\Exception\SessionExpired as SessionExpiredException;
+use Zend\Db\Sql\Expression;
 
 /**
  * Table Definition for session
@@ -34,6 +37,7 @@ use VuFind\Exception\SessionExpired as SessionExpiredException;
  * @category VuFind
  * @package  Db_Table
  * @author   Demian Katz <demian.katz@villanova.edu>
+ * @author   Ere Maijala <ere.maijala@helsinki.fi>
  * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
  * @link     https://vufind.org Main Site
  */
@@ -139,6 +143,61 @@ class Session extends Gateway
         $this->delete($callback);
     }
 
+    /**
+     * Delete expired sessions. Allows setting of 'from' and 'to' ID's so that rows
+     * can be deleted in small batches.
+     *
+     * @param int $daysOld Age in days of an "expired" session.
+     * @param int $idFrom  Lowest id of rows to delete.
+     * @param int $idTo    Highest id of rows to delete.
+     *
+     * @return int Number of rows deleted
+     */
+    public function deleteExpired($daysOld = 2, $idFrom = null, $idTo = null)
+    {
+        $expireDate = time() - $daysOld * 24 * 60 * 60;
+        $callback = function ($select) use ($expireDate, $idFrom, $idTo) {
+            $where = $select->where->lessThan('last_used', $expireDate);
+            if (null !== $idFrom) {
+                $where->and->greaterThanOrEqualTo('id', $idFrom);
+            }
+            if (null !== $idTo) {
+                $where->and->lessThanOrEqualTo('id', $idTo);
+            }
+        };
+        return $this->delete($callback);
+    }
+
+    /**
+     * Get the lowest id and highest id for expired sessions.
+     *
+     * @param int $daysOld Age in days of an "expired" session.
+     *
+     * @return array|bool Array of lowest id and highest id or false if no expired
+     * records found
+     */
+    public function getExpiredIdRange($daysOld = 2)
+    {
+        $expireDate = time() - $daysOld * 24 * 60 * 60;
+        $callback = function ($select) use ($expireDate) {
+            $select->where->lessThan('last_used', $expireDate);
+        };
+        $select = $this->getSql()->select();
+        $select->columns(
+            [
+                'id' => new Expression('1'), // required for TableGateway
+                'minId' => new Expression('MIN(id)'),
+                'maxId' => new Expression('MAX(id)'),
+            ]
+        );
+        $select->where($callback);
+        $result = $this->selectWith($select)->current();
+        if (null === $result->minId) {
+            return false;
+        }
+        return [$result->minId, $result->maxId];
+    }
+
     /**
      * Get a query representing expired sessions (this can be passed
      * to select() or delete() for further processing).
diff --git a/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php b/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php
index 29d013db4dd..2264be86db4 100644
--- a/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php
+++ b/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php
@@ -453,6 +453,39 @@ class UtilController extends AbstractBase
         return $this->getSuccessResponse();
     }
 
+    /**
+     * Display help for the search or session expiration actions
+     *
+     * @param string $rows Plural name of records to delete
+     *
+     * @return \Zend\Console\Response
+     */
+    protected function expirationHelp($rows)
+    {
+        Console::writeLine("Expire old $rows in the database.");
+        Console::writeLine('');
+        Console::writeLine(
+            'Optional parameters: [--batch=size] [--sleep=time] [age]'
+        );
+        Console::writeLine('');
+        Console::writeLine(
+            '  batch: number of records to delete in a single batch'
+            . ' (default 1000)'
+        );
+        Console::writeLine(
+            '  sleep: milliseconds to sleep between batches (default 100)'
+        );
+
+        Console::writeLine(
+            "  age: the age (in days) of $rows to expire (default 2)"
+        );
+        Console::writeLine('');
+        Console::writeLine(
+            "By default, $rows more than 2 days old will be removed."
+        );
+        return $this->getFailureResponse();
+    }
+
     /**
      * Command-line tool to clear unwanted entries
      * from search history database table.
@@ -464,21 +497,13 @@ class UtilController extends AbstractBase
         $this->consoleOpts->addRules(
             [
                 'h|help' => 'Get help',
+                'batch=i' => 'Batch size',
+                'sleep=i' => 'Sleep interval between batches'
             ]
         );
 
-        if ($this->consoleOpts->getOption('h')
-            || $this->consoleOpts->getOption('help')
-        ) {
-            Console::writeLine('Expire old searches in the database.');
-            Console::writeLine('');
-            Console::writeLine(
-                'Optional parameter: the age (in days) of searches to expire;'
-            );
-            Console::writeLine(
-                'by default, searches more than 2 days old will be removed.'
-            );
-            return $this->getFailureResponse();
+        if ($this->consoleOpts->getOption('h')) {
+            return $this->expirationHelp('searches');
         }
 
         return $this->expire(
@@ -499,21 +524,13 @@ class UtilController extends AbstractBase
         $this->consoleOpts->addRules(
             [
                 'h|help' => 'Get help',
+                'batch=i' => 'Batch size',
+                'sleep=i' => 'Sleep interval between batches'
             ]
         );
 
-        if ($this->consoleOpts->getOption('h')
-            || $this->consoleOpts->getOption('help')
-        ) {
-            Console::writeLine('Expire old sessions in the database.');
-            Console::writeLine('');
-            Console::writeLine(
-                'Optional parameter: the age (in days) of sessions to expire;'
-            );
-            Console::writeLine(
-                'by default, sessions more than 2 days old will be removed.'
-            );
-            return $this->getFailureResponse();
+        if ($this->consoleOpts->getOption('h')) {
+            return $this->expirationHelp('sessions');
         }
 
         return $this->expire(
@@ -651,7 +668,7 @@ class UtilController extends AbstractBase
     /**
      * Abstract delete method.
      *
-     * @param string $table         Table to operate on.
+     * @param string $tableName     Table to operate on.
      * @param string $successString String for reporting success.
      * @param string $failString    String for reporting failure.
      * @param int    $minAge        Minimum age allowed for expiration (also used
@@ -659,7 +676,7 @@ class UtilController extends AbstractBase
      *
      * @return mixed
      */
-    protected function expire($table, $successString, $failString, $minAge = 2)
+    protected function expire($tableName, $successString, $failString, $minAge = 2)
     {
         // Get command-line arguments
         $argv = $this->consoleOpts->getRemainingArgs();
@@ -667,6 +684,11 @@ class UtilController extends AbstractBase
         // Use command line value as expiration age, or default to $minAge.
         $daysOld = isset($argv[0]) ? intval($argv[0]) : $minAge;
 
+        // Use command line values for batch size and sleep time if specified.
+        $options = $this->consoleOpts->getArguments();
+        $batchSize = isset($options['batch']) ? $options['batch'] : 1000;
+        $sleepTime = isset($options['sleep']) ? $options['sleep'] : 100;
+
         // Abort if we have an invalid expiration age.
         if ($daysOld < 2) {
             Console::writeLine(
@@ -678,23 +700,51 @@ class UtilController extends AbstractBase
             return $this->getFailureResponse();
         }
 
-        // Delete the expired searches--this cleans up any junk left in the database
-        // from old search histories that were not
-        // caught by the session garbage collector.
-        $search = $this->getTable($table);
-        if (!method_exists($search, 'getExpiredQuery')) {
-            throw new \Exception($table . ' does not support getExpiredQuery()');
+        // Delete the expired rows--this cleans up any junk left in the database
+        // e.g. from old searches or sessions that were not caught by the session
+        // garbage collector.
+        $table = $this->getTable($tableName);
+        if (!method_exists($table, 'getExpiredIdRange')) {
+            throw new \Exception("$tableName does not support getExpiredIdRange()");
         }
-        $query = $search->getExpiredQuery($daysOld);
-        if (($count = count($search->select($query))) == 0) {
-            Console::writeLine($failString);
+        if (!method_exists($table, 'deleteExpired')) {
+            throw new \Exception("$tableName does not support deleteExpired()");
+        }
+
+        $idRange = $table->getExpiredIdRange($daysOld);
+        if (false === $idRange) {
+            $this->timestampedMessage($failString);
             return $this->getSuccessResponse();
         }
-        $search->delete($query);
-        Console::writeLine(str_replace('%%count%%', $count, $successString));
+
+        // Delete records in batches
+        for ($batch = $idRange[0]; $batch <= $idRange[1]; $batch += $batchSize) {
+            $count = $table->deleteExpired(
+                $daysOld, $batch, $batch + $batchSize - 1
+            );
+            $this->timestampedMessage(
+                str_replace('%%count%%', $count, $successString)
+            );
+            // Be nice to others and wait between batches
+            if ($batch + $batchSize <= $idRange[1]) {
+                usleep($sleepTime * 1000);
+            }
+        }
         return $this->getSuccessResponse();
     }
 
+    /**
+     * Print a message with a time stamp to the console
+     *
+     * @param string $msg Message
+     *
+     * @return void
+     */
+    protected function timestampedMessage($msg)
+    {
+        Console::writeLine('[' . date('Y-m-d H:i:s') . '] ' . $msg);
+    }
+
     /**
      * Convert hash algorithms
      * Expected parameters: oldmethod:oldkey (or none) newmethod:newkey
-- 
GitLab