From 3c1c2247b7b86790f6cf483e9ae25caf5e6ff2aa Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Thu, 19 Jul 2012 13:10:15 -0400 Subject: [PATCH] Finished adding CLI tools (complete except for some missing database models). --- module/VuFind/CLI/config/module.config.php | 1 + .../VuFind/CLI/Controller/UtilController.php | 294 ++++++++++++++++++ util/dedupe.php | 57 ++++ util/deletes.php | 33 ++ util/expire_searches.php | 32 ++ util/index_reserves.php | 32 ++ util/optimize.php | 32 ++ util/sitemap.php | 32 ++ util/suppressed.php | 33 ++ 9 files changed, 546 insertions(+) create mode 100644 module/VuFind/src/VuFind/CLI/Controller/UtilController.php create mode 100644 util/dedupe.php create mode 100644 util/deletes.php create mode 100644 util/expire_searches.php create mode 100644 util/index_reserves.php create mode 100644 util/optimize.php create mode 100644 util/sitemap.php create mode 100644 util/suppressed.php diff --git a/module/VuFind/CLI/config/module.config.php b/module/VuFind/CLI/config/module.config.php index 512418d341d..acbcbe2c41c 100644 --- a/module/VuFind/CLI/config/module.config.php +++ b/module/VuFind/CLI/config/module.config.php @@ -6,6 +6,7 @@ $config = array( 'invokables' => array( 'harvest' => 'VuFind\CLI\Controller\HarvestController', 'import' => 'VuFind\CLI\Controller\ImportController', + 'util' => 'VuFind\CLI\Controller\UtilController', ), ), ); diff --git a/module/VuFind/src/VuFind/CLI/Controller/UtilController.php b/module/VuFind/src/VuFind/CLI/Controller/UtilController.php new file mode 100644 index 00000000000..590f2419b12 --- /dev/null +++ b/module/VuFind/src/VuFind/CLI/Controller/UtilController.php @@ -0,0 +1,294 @@ +<?php +/** + * CLI Controller Module + * + * PHP version 5 + * + * Copyright (C) Villanova University 2010. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Controller + * @author Chris Hallberg <challber@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/building_a_recommendations_module Wiki + */ +namespace VuFind\CLI\Controller; +use File_MARC, File_MARCXML, VuFind\Connection\Manager as ConnectionManager, + VuFind\Sitemap; + +/** + * This controller handles various command-line tools + * + * @category VuFind2 + * @package Controller + * @author Chris Hallberg <challber@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/building_a_recommendations_module Wiki + */ +class UtilController extends AbstractBase +{ + /** + * Build the Reserves index. + * + * @return void + */ + public function indexreservesAction() + { + ini_set('memory_limit', '50M'); + ini_set('max_execution_time', '3600'); + + // Setup Solr Connection + $solr = ConnectionManager::connectToIndex('SolrReserves'); + + // Connect to ILS + $catalog = ConnectionManager::connectToCatalog(); + + // Records to index + $index = array(); + + // Get instructors + $instructors = $catalog->getInstructors(); + + // Get Courses + $courses = $catalog->getCourses(); + + // Get Departments + $departments = $catalog->getDepartments(); + + // Get all reserve records + $reserves = $catalog->findReserves('', '', ''); + + if (!empty($instructors) && !empty($courses) && !empty($departments) + && !empty($reserves) + ) { + // Delete existing records + $solr->deleteAll(); + + // Build the index + $solr->buildIndex($instructors, $courses, $departments, $reserves); + + // Commit and Optimize the Solr Index + $solr->commit(); + $solr->optimize(); + } + return $this->getSuccessResponse(); + } + + /** + * Optimize the Solr index. + * + * @return void + */ + public function optimizeAction() + { + ini_set('memory_limit', '50M'); + ini_set('max_execution_time', '3600'); + + // Setup Solr Connection -- Allow core to be specified as first command line + // param. + $argv = $this->consoleOpts->getRemainingArgs(); + $solr = ConnectionManager::connectToIndex( + null, isset($argv[0]) ? $argv[0] : '' + ); + + // Commit and Optimize the Solr Index + $solr->commit(); + $solr->optimize(); + return $this->getSuccessResponse(); + } + + /** + * Generate a Sitemap + * + * @return void + */ + public function sitemapAction() + { + // Build sitemap and display appropriate warnings if needed: + $generator = new Sitemap(); + $generator->generate(); + foreach ($generator->getWarnings() as $warning) { + echo "$warning\n"; + } + return $this->getSuccessResponse(); + } + + /** + * Command-line tool to batch-delete records from the Solr index. + * + * @return void + */ + public function deletesAction() + { + // Parse the command line parameters -- see if we are in "flat file" mode, + // find out what file we are reading in, + // and determine the index we are affecting! + $argv = $this->consoleOpts->getRemainingArgs(); + $filename = isset($argv[0]) ? $argv[0] : null; + $mode = isset($argv[1]) ? $argv[1] : 'marc'; + $index = isset($argv[2]) ? $argv[2] : 'Solr'; + + // No filename specified? Give usage guidelines: + if (empty($filename)) { + echo "Delete records from VuFind's index.\n\n", + "Usage: deletes.php [filename] [format] [index]\n\n", + "[filename] is the file containing records to delete.\n", + "[format] is the format of the file", + " -- it may be one of the following:\n", + "\tflat - flat text format", + " (deletes all IDs in newline-delimited file)\n", + "\tmarc - binary MARC format", + " (delete all record IDs from 001 fields)\n", + "\tmarcxml - MARC-XML format", + " (delete all record IDs from 001 fields)\n", + '"marc" is used by default if no format is specified.' . "\n", + "[index] is the index to use (default = Solr)\n"; + return $this->getFailureResponse(); + } + + // File doesn't exist? + if (!file_exists($filename)) { + echo "Cannot find file: {$filename}\n"; + return $this->getFailureResponse(); + } + + // Setup Solr Connection + $solr = ConnectionManager::connectToIndex($index); + + // Build list of records to delete: + $ids = array(); + + // Flat file mode: + if ($mode == 'flat') { + foreach (explode("\n", file_get_contents($filename)) as $id) { + $id = trim($id); + if (!empty($id)) { + $ids[] = $id; + } + } + } else { + // MARC file mode... We need to load the MARC record differently if it's + // XML or binary: + $collection = ($mode == 'marcxml') + ? new File_MARCXML($filename) : new File_MARC($filename); + + // Once the records are loaded, the rest of the logic is always the same: + while ($record = $collection->next()) { + $idField = $record->getField('001'); + $ids[] = (string)$idField->getData(); + } + } + + // Delete, Commit and Optimize if necessary: + if (!empty($ids)) { + $solr->deleteRecords($ids); + $solr->commit(); + $solr->optimize(); + } + return $this->getSuccessResponse(); + } + + /** + * Command-line tool to clear unwanted entries + * from search history database table. + * + * @return void + */ + public function expiresearchesAction() + { + // Get command-line arguments + $argv = $this->consoleOpts->getRemainingArgs(); + + // Use command line value as expiration age, or default to 2. + $daysOld = isset($argv[0]) ? intval($argv[0]) : 2; + + // Abort if we have an invalid expiration age. + if ($daysOld < 2) { + echo "Expiration age must be at least two days.\n"; + return $this->getFailureResponse(); + } + + // Delete the expired searches--this cleans up any junk left in the database + // from old search histories that were not + // caught by the session garbage collector. + $search = new VuFind_Model_Db_Search(); + $expired = $search->getExpiredSearches($daysOld); + if (count($expired) == 0) { + echo "No expired searches to delete.\n"; + return $this->getFailureResponse(); + } + $count = count($expired); + foreach ($expired as $oldSearch) { + $oldSearch->delete(); + } + echo "\n{$count} expired searches deleted.\n"; + return $this->getSuccessResponse(); + } + + /** + * Command-line tool to delete suppressed records from the index. + * + * @return void + */ + public function suppressedAction() + { + // Setup Solr Connection + $this->consoleOpts->addRules( + array( + 'authorities' => + 'Delete authority records instead of bibliographic records' + ) + ); + $core = $this->consoleOpts->getOption('authorities') + ? 'authority' : 'biblio'; + + $solr = ConnectionManager::connectToIndex('Solr', $core); + + // Make ILS Connection + try { + $catalog = ConnectionManager::connectToCatalog(); + if ($core == 'authority') { + $result = $catalog->getSuppressedAuthorityRecords(); + } else { + $result = $catalog->getSuppressedRecords(); + } + } catch (\Exception $e) { + echo "ILS error -- " . $e->getMessage() . "\n"; + return $this->getFailureResponse(); + } + + // Validate result: + if (!is_array($result)) { + echo "Could not obtain suppressed record list from ILS.\n"; + return $this->getFailureResponse(); + } else if (empty($result)) { + echo "No suppressed records to delete.\n"; + return $this->getSuccessResponse(); + } + + // Get Suppressed Records and Delete from index + $status = $solr->deleteRecords($result); + if ($status) { + // Commit and Optimize + $solr->commit(); + $solr->optimize(); + } else { + echo "Delete failed.\n"; + return $this->getFailureResponse(); + } + return $this->getSuccessResponse(); + } +} diff --git a/util/dedupe.php b/util/dedupe.php new file mode 100644 index 00000000000..09af7dcae6b --- /dev/null +++ b/util/dedupe.php @@ -0,0 +1,57 @@ +<?php +/** + * Remove duplicate lines from a file -- needed for the Windows version of + * the alphabetical browse database generator, since Windows sort does not + * support deduplication. Assumed presorted + * + * PHP version 5 + * + * Copyright (C) Villanova University 2010. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Utilities + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/alphabetical_heading_browse Wiki + */ +if(count($argv) < 2 || $argv[1] == "") { + echo "\nPlease specify an input file: "; + $argv[1] = chop(fgets(STDIN)); // Read the input +} +$in = fopen($argv[1], 'r'); +if (!$in) { + die('Could not open input file: '.$argv[1]."\n"); +} + +if(count($argv) < 3 || $argv[2] == "") { + echo "\nPlease specify an output file: "; + $argv[2] = chop(fgets(STDIN)); // Read the input +} +$out = fopen($argv[2], 'w'); +if (!$out) { + die('Could not open output file: '.$argv[2]."\n"); +} + +$last = ''; +while ($tmp = fgets($in)) { + if ($tmp != $last) { + fputs($out, $tmp); + } + $last = $tmp; +} + +fclose($in); +fclose($out); \ No newline at end of file diff --git a/util/deletes.php b/util/deletes.php new file mode 100644 index 00000000000..91b64b43cc0 --- /dev/null +++ b/util/deletes.php @@ -0,0 +1,33 @@ +<?php +/** + * Command-line tool to batch-delete records from the Solr index. + * + * PHP version 5 + * + * Copyright (C) Villanova University 2007. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Utilities + * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/automation Wiki + */ + +// Load the Zend framework -- this will automatically trigger the appropriate +// controller action based on directory and file names +define('CLI_DIR', __DIR__); // save directory name of current script +require_once __DIR__ . '/../public/index.php'; diff --git a/util/expire_searches.php b/util/expire_searches.php new file mode 100644 index 00000000000..e08d4443df3 --- /dev/null +++ b/util/expire_searches.php @@ -0,0 +1,32 @@ +<?php +/** + * Command-line tool to clear unwanted entries from search history database table. + * + * PHP version 5 + * + * Copyright (C) Villanova University 2010. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Utilities + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/jira/browse/VUFIND-235 JIRA Ticket + */ + +// Load the Zend framework -- this will automatically trigger the appropriate +// controller action based on directory and file names +define('CLI_DIR', __DIR__); // save directory name of current script +require_once __DIR__ . '/../public/index.php'; diff --git a/util/index_reserves.php b/util/index_reserves.php new file mode 100644 index 00000000000..5c10ec20b72 --- /dev/null +++ b/util/index_reserves.php @@ -0,0 +1,32 @@ +<?php +/** + * Command-line tool to index reserves records to the Solr index. + * + * PHP version 5 + * + * Copyright (C) Villanova University 2009. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Utilities + * @author Tuan Nguyen <tuan@yorku.ca> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki Wiki + */ + +// Load the Zend framework -- this will automatically trigger the appropriate +// controller action based on directory and file names +define('CLI_DIR', __DIR__); // save directory name of current script +require_once __DIR__ . '/../public/index.php'; diff --git a/util/optimize.php b/util/optimize.php new file mode 100644 index 00000000000..8965d23e8ea --- /dev/null +++ b/util/optimize.php @@ -0,0 +1,32 @@ +<?php +/** + * Command-line tool to optimize the Solr index. + * + * PHP version 5 + * + * Copyright (C) Villanova University 2009. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Utilities + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/performance#index_optimization Wiki + */ + +// Load the Zend framework -- this will automatically trigger the appropriate +// controller action based on directory and file names +define('CLI_DIR', __DIR__); // save directory name of current script +require_once __DIR__ . '/../public/index.php'; diff --git a/util/sitemap.php b/util/sitemap.php new file mode 100644 index 00000000000..739cc1a7684 --- /dev/null +++ b/util/sitemap.php @@ -0,0 +1,32 @@ +<?php +/** + * Command-line tool to generate sitemaps based on Solr index contents. + * + * PHP version 5 + * + * Copyright (C) Villanova University 2009. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Utilities + * @author David K. Uspal <david.uspal@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/search_engine_optimization Wiki + */ + +// Load the Zend framework -- this will automatically trigger the appropriate +// controller action based on directory and file names +define('CLI_DIR', __DIR__); // save directory name of current script +require_once __DIR__ . '/../public/index.php'; diff --git a/util/suppressed.php b/util/suppressed.php new file mode 100644 index 00000000000..e12ea30c291 --- /dev/null +++ b/util/suppressed.php @@ -0,0 +1,33 @@ +<?php +/** + * Command-line tool to delete suppressed records from the index. + * + * PHP version 5 + * + * Copyright (C) Villanova University 2007. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Utilities + * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/automation Wiki + */ + +// Load the Zend framework -- this will automatically trigger the appropriate +// controller action based on directory and file names +define('CLI_DIR', __DIR__); // save directory name of current script +require_once __DIR__ . '/../public/index.php'; -- GitLab