From d7e6b4d32b437ab172971220afb6648cd9c26546 Mon Sep 17 00:00:00 2001 From: Ere Maijala <ere.maijala@helsinki.fi> Date: Thu, 7 Jul 2016 22:04:07 +0300 Subject: [PATCH] Refactored search/session expiration to delete records in batches (#739) - Avoid locking the tables for too long or creating a massive transaction in case the table has accumulated records for a longer time. * Reverted deletion of getExpiredQuery() and added command line parameters for specifying batch size and sleep time. * Don't sleep after last batch. --- module/VuFind/src/VuFind/Db/Table/Search.php | 59 +++++++++ module/VuFind/src/VuFind/Db/Table/Session.php | 59 +++++++++ .../Controller/UtilController.php | 124 ++++++++++++------ 3 files changed, 205 insertions(+), 37 deletions(-) diff --git a/module/VuFind/src/VuFind/Db/Table/Search.php b/module/VuFind/src/VuFind/Db/Table/Search.php index a397492448a..3a4df5cfecd 100644 --- a/module/VuFind/src/VuFind/Db/Table/Search.php +++ b/module/VuFind/src/VuFind/Db/Table/Search.php @@ -31,6 +31,7 @@ namespace VuFind\Db\Table; use minSO; use Zend\Db\Adapter\ParameterContainer; use Zend\Db\TableGateway\Feature; +use Zend\Db\Sql\Expression; /** * Table Definition for search @@ -131,6 +132,64 @@ class Search extends Gateway return $this->select($callback); } + /** + * Delete expired searches. Allows setting of 'from' and 'to' ID's so that rows + * can be deleted in small batches. + * + * @param int $daysOld Age in days of an "expired" search. + * @param int $idFrom Lowest id of rows to delete. + * @param int $idTo Highest id of rows to delete. + * + * @return int Number of rows deleted + */ + public function deleteExpired($daysOld = 2, $idFrom = null, $idTo = null) + { + // Determine the expiration date: + $expireDate = date('Y-m-d H:i:s', time() - $daysOld * 24 * 60 * 60); + $callback = function ($select) use ($expireDate, $idFrom, $idTo) { + $where = $select->where->lessThan('created', $expireDate) + ->equalTo('saved', 0); + if (null !== $idFrom) { + $where->and->greaterThanOrEqualTo('id', $idFrom); + } + if (null !== $idTo) { + $where->and->lessThanOrEqualTo('id', $idTo); + } + }; + return $this->delete($callback); + } + + /** + * Get the lowest id and highest id for expired searches. + * + * @param int $daysOld Age in days of an "expired" search. + * + * @return array|bool Array of lowest id and highest id or false if no expired + * records found + */ + public function getExpiredIdRange($daysOld = 2) + { + // Determine the expiration date: + $expireDate = date('Y-m-d H:i:s', time() - $daysOld * 24 * 60 * 60); + $callback = function ($select) use ($expireDate) { + $select->where->lessThan('created', $expireDate)->equalTo('saved', 0); + }; + $select = $this->getSql()->select(); + $select->columns( + [ + 'id' => new Expression('1'), // required for TableGateway + 'minId' => new Expression('MIN(id)'), + 'maxId' => new Expression('MAX(id)'), + ] + ); + $select->where($callback); + $result = $this->selectWith($select)->current(); + if (null === $result->minId) { + return false; + } + return [$result->minId, $result->maxId]; + } + /** * Get a query representing expired searches (this can be passed * to select() or delete() for further processing). diff --git a/module/VuFind/src/VuFind/Db/Table/Session.php b/module/VuFind/src/VuFind/Db/Table/Session.php index 3c555b30efb..e99ecce21c7 100644 --- a/module/VuFind/src/VuFind/Db/Table/Session.php +++ b/module/VuFind/src/VuFind/Db/Table/Session.php @@ -5,6 +5,7 @@ * PHP version 5 * * Copyright (C) Villanova University 2010. + * Copyright (C) The National Library of Finland 2016. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -22,11 +23,13 @@ * @category VuFind * @package Db_Table * @author Demian Katz <demian.katz@villanova.edu> + * @author Ere Maijala <ere.maijala@helsinki.fi> * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License * @link https://vufind.org Main Page */ namespace VuFind\Db\Table; use VuFind\Exception\SessionExpired as SessionExpiredException; +use Zend\Db\Sql\Expression; /** * Table Definition for session @@ -34,6 +37,7 @@ use VuFind\Exception\SessionExpired as SessionExpiredException; * @category VuFind * @package Db_Table * @author Demian Katz <demian.katz@villanova.edu> + * @author Ere Maijala <ere.maijala@helsinki.fi> * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License * @link https://vufind.org Main Site */ @@ -139,6 +143,61 @@ class Session extends Gateway $this->delete($callback); } + /** + * Delete expired sessions. Allows setting of 'from' and 'to' ID's so that rows + * can be deleted in small batches. + * + * @param int $daysOld Age in days of an "expired" session. + * @param int $idFrom Lowest id of rows to delete. + * @param int $idTo Highest id of rows to delete. + * + * @return int Number of rows deleted + */ + public function deleteExpired($daysOld = 2, $idFrom = null, $idTo = null) + { + $expireDate = time() - $daysOld * 24 * 60 * 60; + $callback = function ($select) use ($expireDate, $idFrom, $idTo) { + $where = $select->where->lessThan('last_used', $expireDate); + if (null !== $idFrom) { + $where->and->greaterThanOrEqualTo('id', $idFrom); + } + if (null !== $idTo) { + $where->and->lessThanOrEqualTo('id', $idTo); + } + }; + return $this->delete($callback); + } + + /** + * Get the lowest id and highest id for expired sessions. + * + * @param int $daysOld Age in days of an "expired" session. + * + * @return array|bool Array of lowest id and highest id or false if no expired + * records found + */ + public function getExpiredIdRange($daysOld = 2) + { + $expireDate = time() - $daysOld * 24 * 60 * 60; + $callback = function ($select) use ($expireDate) { + $select->where->lessThan('last_used', $expireDate); + }; + $select = $this->getSql()->select(); + $select->columns( + [ + 'id' => new Expression('1'), // required for TableGateway + 'minId' => new Expression('MIN(id)'), + 'maxId' => new Expression('MAX(id)'), + ] + ); + $select->where($callback); + $result = $this->selectWith($select)->current(); + if (null === $result->minId) { + return false; + } + return [$result->minId, $result->maxId]; + } + /** * Get a query representing expired sessions (this can be passed * to select() or delete() for further processing). diff --git a/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php b/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php index 29d013db4dd..2264be86db4 100644 --- a/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php +++ b/module/VuFindConsole/src/VuFindConsole/Controller/UtilController.php @@ -453,6 +453,39 @@ class UtilController extends AbstractBase return $this->getSuccessResponse(); } + /** + * Display help for the search or session expiration actions + * + * @param string $rows Plural name of records to delete + * + * @return \Zend\Console\Response + */ + protected function expirationHelp($rows) + { + Console::writeLine("Expire old $rows in the database."); + Console::writeLine(''); + Console::writeLine( + 'Optional parameters: [--batch=size] [--sleep=time] [age]' + ); + Console::writeLine(''); + Console::writeLine( + ' batch: number of records to delete in a single batch' + . ' (default 1000)' + ); + Console::writeLine( + ' sleep: milliseconds to sleep between batches (default 100)' + ); + + Console::writeLine( + " age: the age (in days) of $rows to expire (default 2)" + ); + Console::writeLine(''); + Console::writeLine( + "By default, $rows more than 2 days old will be removed." + ); + return $this->getFailureResponse(); + } + /** * Command-line tool to clear unwanted entries * from search history database table. @@ -464,21 +497,13 @@ class UtilController extends AbstractBase $this->consoleOpts->addRules( [ 'h|help' => 'Get help', + 'batch=i' => 'Batch size', + 'sleep=i' => 'Sleep interval between batches' ] ); - if ($this->consoleOpts->getOption('h') - || $this->consoleOpts->getOption('help') - ) { - Console::writeLine('Expire old searches in the database.'); - Console::writeLine(''); - Console::writeLine( - 'Optional parameter: the age (in days) of searches to expire;' - ); - Console::writeLine( - 'by default, searches more than 2 days old will be removed.' - ); - return $this->getFailureResponse(); + if ($this->consoleOpts->getOption('h')) { + return $this->expirationHelp('searches'); } return $this->expire( @@ -499,21 +524,13 @@ class UtilController extends AbstractBase $this->consoleOpts->addRules( [ 'h|help' => 'Get help', + 'batch=i' => 'Batch size', + 'sleep=i' => 'Sleep interval between batches' ] ); - if ($this->consoleOpts->getOption('h') - || $this->consoleOpts->getOption('help') - ) { - Console::writeLine('Expire old sessions in the database.'); - Console::writeLine(''); - Console::writeLine( - 'Optional parameter: the age (in days) of sessions to expire;' - ); - Console::writeLine( - 'by default, sessions more than 2 days old will be removed.' - ); - return $this->getFailureResponse(); + if ($this->consoleOpts->getOption('h')) { + return $this->expirationHelp('sessions'); } return $this->expire( @@ -651,7 +668,7 @@ class UtilController extends AbstractBase /** * Abstract delete method. * - * @param string $table Table to operate on. + * @param string $tableName Table to operate on. * @param string $successString String for reporting success. * @param string $failString String for reporting failure. * @param int $minAge Minimum age allowed for expiration (also used @@ -659,7 +676,7 @@ class UtilController extends AbstractBase * * @return mixed */ - protected function expire($table, $successString, $failString, $minAge = 2) + protected function expire($tableName, $successString, $failString, $minAge = 2) { // Get command-line arguments $argv = $this->consoleOpts->getRemainingArgs(); @@ -667,6 +684,11 @@ class UtilController extends AbstractBase // Use command line value as expiration age, or default to $minAge. $daysOld = isset($argv[0]) ? intval($argv[0]) : $minAge; + // Use command line values for batch size and sleep time if specified. + $options = $this->consoleOpts->getArguments(); + $batchSize = isset($options['batch']) ? $options['batch'] : 1000; + $sleepTime = isset($options['sleep']) ? $options['sleep'] : 100; + // Abort if we have an invalid expiration age. if ($daysOld < 2) { Console::writeLine( @@ -678,23 +700,51 @@ class UtilController extends AbstractBase return $this->getFailureResponse(); } - // Delete the expired searches--this cleans up any junk left in the database - // from old search histories that were not - // caught by the session garbage collector. - $search = $this->getTable($table); - if (!method_exists($search, 'getExpiredQuery')) { - throw new \Exception($table . ' does not support getExpiredQuery()'); + // Delete the expired rows--this cleans up any junk left in the database + // e.g. from old searches or sessions that were not caught by the session + // garbage collector. + $table = $this->getTable($tableName); + if (!method_exists($table, 'getExpiredIdRange')) { + throw new \Exception("$tableName does not support getExpiredIdRange()"); } - $query = $search->getExpiredQuery($daysOld); - if (($count = count($search->select($query))) == 0) { - Console::writeLine($failString); + if (!method_exists($table, 'deleteExpired')) { + throw new \Exception("$tableName does not support deleteExpired()"); + } + + $idRange = $table->getExpiredIdRange($daysOld); + if (false === $idRange) { + $this->timestampedMessage($failString); return $this->getSuccessResponse(); } - $search->delete($query); - Console::writeLine(str_replace('%%count%%', $count, $successString)); + + // Delete records in batches + for ($batch = $idRange[0]; $batch <= $idRange[1]; $batch += $batchSize) { + $count = $table->deleteExpired( + $daysOld, $batch, $batch + $batchSize - 1 + ); + $this->timestampedMessage( + str_replace('%%count%%', $count, $successString) + ); + // Be nice to others and wait between batches + if ($batch + $batchSize <= $idRange[1]) { + usleep($sleepTime * 1000); + } + } return $this->getSuccessResponse(); } + /** + * Print a message with a time stamp to the console + * + * @param string $msg Message + * + * @return void + */ + protected function timestampedMessage($msg) + { + Console::writeLine('[' . date('Y-m-d H:i:s') . '] ' . $msg); + } + /** * Convert hash algorithms * Expected parameters: oldmethod:oldkey (or none) newmethod:newkey -- GitLab