From 09112418e1f10e78d2a2ec6f05cc978e219c7fa5 Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Mon, 23 Jul 2012 15:17:07 -0400 Subject: [PATCH] Added Solr Utils class and associated test cases. --- module/VuFind/src/VuFind/Solr/Utils.php | 147 ++++++++++++++++++++++++ module/VuFind/tests/Solr/UtilsTest.php | 137 ++++++++++++++++++++++ 2 files changed, 284 insertions(+) create mode 100644 module/VuFind/src/VuFind/Solr/Utils.php create mode 100644 module/VuFind/tests/Solr/UtilsTest.php diff --git a/module/VuFind/src/VuFind/Solr/Utils.php b/module/VuFind/src/VuFind/Solr/Utils.php new file mode 100644 index 00000000000..86f98df01b4 --- /dev/null +++ b/module/VuFind/src/VuFind/Solr/Utils.php @@ -0,0 +1,147 @@ +<?php +/** + * Solr Utility Functions + * + * PHP version 5 + * + * Copyright (C) Andrew Nagy 2009. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Support_Classes + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes Wiki + */ +namespace VuFind\Solr; + +/** + * Solr Utility Functions + * + * This class is designed to hold Solr-related support methods that may + * be called statically. This allows sharing of some Solr-related logic + * between the Solr and Summon classes. + * + * @category VuFind2 + * @package Support_Classes + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes Wiki + */ +class Utils +{ + // This lookahead detects whether or not we are inside quotes; it + // may be shared by multiple methods. + protected static $insideQuotes = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)'; + + /** + * Capitalize boolean operators in a query string to allow case-insensitivity. + * + * @param string $query The query to capitalize. + * + * @return string The capitalized query. + */ + public static function capitalizeBooleans($query) + { + // Load the "inside quotes" lookahead so we can use it to prevent + // switching case of Boolean reserved words inside quotes, since + // that can cause problems in case-sensitive fields when the reserved + // words are actually used as search terms. + $lookahead = self::$insideQuotes; + $regs = array("/\s+AND\s+{$lookahead}/i", "/\s+OR\s+{$lookahead}/i", + "/(\s+NOT\s+|^NOT\s+){$lookahead}/i", "/\(NOT\s+{$lookahead}/i"); + $replace = array(' AND ', ' OR ', ' NOT ', '(NOT '); + return trim(preg_replace($regs, $replace, $query)); + } + + /** + * Make ranges case-insensitive in a query string. + * + * @param string $query The query to update. + * + * @return string The query with case-insensitive ranges. + */ + public static function capitalizeRanges($query) + { + // Load the "inside quotes" lookahead so we can use it to prevent + // switching case of ranges inside quotes, since that can cause + // problems in case-sensitive fields when the reserved words are + // actually used as search terms. + $lookahead = self::$insideQuotes; + $regs = array("/(\[)([^\]]+)\s+TO\s+([^\]]+)(\]){$lookahead}/i", + "/(\{)([^}]+)\s+TO\s+([^}]+)(\}){$lookahead}/i"); + $callback = array(get_called_class(), 'capitalizeRangesCallback'); + return trim(preg_replace_callback($regs, $callback, $query)); + } + + /** + * Support method for capitalizeRanges -- process a single match found by + * preg_replace_callback. + * + * @param array $in Array of matches. + * + * @return string Processed result. + */ + public static function capitalizeRangesCallback($in) + { + // Extract the relevant parts of the expression: + $open = $in[1]; // opening symbol + $close = $in[4]; // closing symbol + $start = $in[2]; // start of range + $end = $in[3]; // end of range + + // Is this a case-sensitive range? + if (strtoupper($start) != strtolower($start) + || strtoupper($end) != strtolower($end) + ) { + // Build a lowercase version of the range: + $lower = $open . trim(strtolower($start)) . ' TO ' . + trim(strtolower($end)) . $close; + // Build a uppercase version of the range: + $upper = $open . trim(strtoupper($start)) . ' TO ' . + trim(strtoupper($end)) . $close; + + // Special case: don't create illegal timestamps! + $timestamp = '/[0-9]{4}-[0-9]{2}-[0-9]{2}t[0-9]{2}:[0-9]{2}:[0-9]{2}z/i'; + if (preg_match($timestamp, $start) || preg_match($timestamp, $end)) { + return $upper; + } + + // Accept results matching either range: + return '(' . $lower . ' OR ' . $upper . ')'; + } else { + // Simpler case -- case insensitive (probably numeric) range: + return $open . trim($start) . ' TO ' . trim($end) . $close; + } + } + + /** + * Parse "from" and "to" values out of a range query (or return false if the + * query is not a range). + * + * @param string $query Solr query to parse. + * + * @return array|bool Array with 'from' and 'to' values extracted from range + * or false if the provided query is not a range. + */ + public static function parseRange($query) + { + $regEx = '/\[([^\]]+)\s+TO\s+([^\]]+)\]/'; + if (!preg_match($regEx, $query, $matches)) { + return false; + } + return array('from' => trim($matches[1]), 'to' => trim($matches[2])); + } +} diff --git a/module/VuFind/tests/Solr/UtilsTest.php b/module/VuFind/tests/Solr/UtilsTest.php new file mode 100644 index 00000000000..49ca801f911 --- /dev/null +++ b/module/VuFind/tests/Solr/UtilsTest.php @@ -0,0 +1,137 @@ +<?php +/** + * Solr Utils Test Class + * + * PHP version 5 + * + * Copyright (C) Villanova University 2010. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Tests + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/unit_tests Wiki + */ +namespace VuFind\Tests\Solr; +use VuFind\Solr\Utils; + +/** + * Solr Utils Test Class + * + * @category VuFind2 + * @package Tests + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/unit_tests Wiki + */ +class UtilsTest extends \PHPUnit_Framework_TestCase +{ + /** + * Test capitalizeBooleans functionality. + * + * @return void + */ + public function testCapitalizeBooleans() + { + // Set up an array of expected inputs and outputs: + // @codingStandardsIgnoreStart + $tests = array( + array('this not that', 'this NOT that'), // capitalize not + array('this and that', 'this AND that'), // capitalize and + array('this or that', 'this OR that'), // capitalize or + array('apples and oranges (not that)', 'apples AND oranges (NOT that)'), + array('"this not that"', '"this not that"'), // do not capitalize inside quotes + array('"this and that"', '"this and that"'), // do not capitalize inside quotes + array('"this or that"', '"this or that"'), // do not capitalize inside quotes + array('"apples and oranges (not that)"', '"apples and oranges (not that)"'), + array('this AND that', 'this AND that'), // don't mess up existing caps + array('and and and', 'and AND and'), + array('andornot noted andy oranges', 'andornot noted andy oranges'), + array('(this or that) and (apples not oranges)', '(this OR that) AND (apples NOT oranges)'), + array('this aNd that', 'this AND that'), // strange capitalization of AND + array('this nOt that', 'this NOT that') // strange capitalization of NOT + ); + // @codingStandardsIgnoreEnd + + // Test all the operations: + foreach ($tests as $current) { + $this->assertEquals( + Utils::capitalizeBooleans($current[0]), $current[1] + ); + } + } + + /** + * Test capitalizeRanges functionality. + * + * @return void + */ + public function testCapitalizeRanges() + { + // Set up an array of expected inputs and outputs: + // @codingStandardsIgnoreStart + $tests = array( + array('"{a to b}"', '"{a to b}"'), // don't capitalize inside quotes + array('"[a to b]"', '"[a to b]"'), + array('[a to b]', '([a TO b] OR [A TO B])'), // expand alphabetic cases + array('[a TO b]', '([a TO b] OR [A TO B])'), + array('[a To b]', '([a TO b] OR [A TO B])'), + array('[a tO b]', '([a TO b] OR [A TO B])'), + array('{a to b}', '({a TO b} OR {A TO B})'), + array('{a TO b}', '({a TO b} OR {A TO B})'), + array('{a To b}', '({a TO b} OR {A TO B})'), + array('{a tO b}', '({a TO b} OR {A TO B})'), + array('[1900 to 1910]', '[1900 TO 1910]'), // don't expand numeric cases + array('[1900 TO 1910]', '[1900 TO 1910]'), + array('{1900 to 1910}', '{1900 TO 1910}'), + array('{1900 TO 1910}', '{1900 TO 1910}'), + array('[a to b]', '([a TO b] OR [A TO B])'), // handle extra spaces + // special case for timestamps: + array('[1900-01-01t00:00:00z to 1900-12-31t23:59:59z]', '[1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z]'), + array('{1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z}', '{1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z}') + ); + // @codingStandardsIgnoreEnd + + // Test all the operations: + foreach ($tests as $current) { + $this->assertEquals( + Utils::capitalizeRanges($current[0]), $current[1] + ); + } + } + + /** + * Test parseRange functionality. + * + * @return void + */ + public function testParseRange() + { + // basic range test: + $result = Utils::parseRange("[1 TO 100]"); + $this->assertEquals('1', $result['from']); + $this->assertEquals('100', $result['to']); + + // test whitespace handling: + $result = Utils::parseRange("[1 TO 100]"); + $this->assertEquals('1', $result['from']); + $this->assertEquals('100', $result['to']); + + // test invalid ranges: + $this->assertFalse(Utils::parseRange('1 TO 100')); + $this->assertFalse(Utils::parseRange('[not a range to me]')); + } +} \ No newline at end of file -- GitLab