Skip to content
Snippets Groups Projects
Commit 09112418 authored by Demian Katz's avatar Demian Katz
Browse files

Added Solr Utils class and associated test cases.

parent 4d58569d
No related merge requests found
<?php
/**
* Solr Utility Functions
*
* PHP version 5
*
* Copyright (C) Andrew Nagy 2009.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* @category VuFind2
* @package Support_Classes
* @author Demian Katz <demian.katz@villanova.edu>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link http://vufind.org/wiki/system_classes Wiki
*/
namespace VuFind\Solr;
/**
* Solr Utility Functions
*
* This class is designed to hold Solr-related support methods that may
* be called statically. This allows sharing of some Solr-related logic
* between the Solr and Summon classes.
*
* @category VuFind2
* @package Support_Classes
* @author Demian Katz <demian.katz@villanova.edu>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link http://vufind.org/wiki/system_classes Wiki
*/
class Utils
{
// This lookahead detects whether or not we are inside quotes; it
// may be shared by multiple methods.
protected static $insideQuotes = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)';
/**
* Capitalize boolean operators in a query string to allow case-insensitivity.
*
* @param string $query The query to capitalize.
*
* @return string The capitalized query.
*/
public static function capitalizeBooleans($query)
{
// Load the "inside quotes" lookahead so we can use it to prevent
// switching case of Boolean reserved words inside quotes, since
// that can cause problems in case-sensitive fields when the reserved
// words are actually used as search terms.
$lookahead = self::$insideQuotes;
$regs = array("/\s+AND\s+{$lookahead}/i", "/\s+OR\s+{$lookahead}/i",
"/(\s+NOT\s+|^NOT\s+){$lookahead}/i", "/\(NOT\s+{$lookahead}/i");
$replace = array(' AND ', ' OR ', ' NOT ', '(NOT ');
return trim(preg_replace($regs, $replace, $query));
}
/**
* Make ranges case-insensitive in a query string.
*
* @param string $query The query to update.
*
* @return string The query with case-insensitive ranges.
*/
public static function capitalizeRanges($query)
{
// Load the "inside quotes" lookahead so we can use it to prevent
// switching case of ranges inside quotes, since that can cause
// problems in case-sensitive fields when the reserved words are
// actually used as search terms.
$lookahead = self::$insideQuotes;
$regs = array("/(\[)([^\]]+)\s+TO\s+([^\]]+)(\]){$lookahead}/i",
"/(\{)([^}]+)\s+TO\s+([^}]+)(\}){$lookahead}/i");
$callback = array(get_called_class(), 'capitalizeRangesCallback');
return trim(preg_replace_callback($regs, $callback, $query));
}
/**
* Support method for capitalizeRanges -- process a single match found by
* preg_replace_callback.
*
* @param array $in Array of matches.
*
* @return string Processed result.
*/
public static function capitalizeRangesCallback($in)
{
// Extract the relevant parts of the expression:
$open = $in[1]; // opening symbol
$close = $in[4]; // closing symbol
$start = $in[2]; // start of range
$end = $in[3]; // end of range
// Is this a case-sensitive range?
if (strtoupper($start) != strtolower($start)
|| strtoupper($end) != strtolower($end)
) {
// Build a lowercase version of the range:
$lower = $open . trim(strtolower($start)) . ' TO ' .
trim(strtolower($end)) . $close;
// Build a uppercase version of the range:
$upper = $open . trim(strtoupper($start)) . ' TO ' .
trim(strtoupper($end)) . $close;
// Special case: don't create illegal timestamps!
$timestamp = '/[0-9]{4}-[0-9]{2}-[0-9]{2}t[0-9]{2}:[0-9]{2}:[0-9]{2}z/i';
if (preg_match($timestamp, $start) || preg_match($timestamp, $end)) {
return $upper;
}
// Accept results matching either range:
return '(' . $lower . ' OR ' . $upper . ')';
} else {
// Simpler case -- case insensitive (probably numeric) range:
return $open . trim($start) . ' TO ' . trim($end) . $close;
}
}
/**
* Parse "from" and "to" values out of a range query (or return false if the
* query is not a range).
*
* @param string $query Solr query to parse.
*
* @return array|bool Array with 'from' and 'to' values extracted from range
* or false if the provided query is not a range.
*/
public static function parseRange($query)
{
$regEx = '/\[([^\]]+)\s+TO\s+([^\]]+)\]/';
if (!preg_match($regEx, $query, $matches)) {
return false;
}
return array('from' => trim($matches[1]), 'to' => trim($matches[2]));
}
}
<?php
/**
* Solr Utils Test Class
*
* PHP version 5
*
* Copyright (C) Villanova University 2010.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* @category VuFind2
* @package Tests
* @author Demian Katz <demian.katz@villanova.edu>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link http://vufind.org/wiki/unit_tests Wiki
*/
namespace VuFind\Tests\Solr;
use VuFind\Solr\Utils;
/**
* Solr Utils Test Class
*
* @category VuFind2
* @package Tests
* @author Demian Katz <demian.katz@villanova.edu>
* @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License
* @link http://vufind.org/wiki/unit_tests Wiki
*/
class UtilsTest extends \PHPUnit_Framework_TestCase
{
/**
* Test capitalizeBooleans functionality.
*
* @return void
*/
public function testCapitalizeBooleans()
{
// Set up an array of expected inputs and outputs:
// @codingStandardsIgnoreStart
$tests = array(
array('this not that', 'this NOT that'), // capitalize not
array('this and that', 'this AND that'), // capitalize and
array('this or that', 'this OR that'), // capitalize or
array('apples and oranges (not that)', 'apples AND oranges (NOT that)'),
array('"this not that"', '"this not that"'), // do not capitalize inside quotes
array('"this and that"', '"this and that"'), // do not capitalize inside quotes
array('"this or that"', '"this or that"'), // do not capitalize inside quotes
array('"apples and oranges (not that)"', '"apples and oranges (not that)"'),
array('this AND that', 'this AND that'), // don't mess up existing caps
array('and and and', 'and AND and'),
array('andornot noted andy oranges', 'andornot noted andy oranges'),
array('(this or that) and (apples not oranges)', '(this OR that) AND (apples NOT oranges)'),
array('this aNd that', 'this AND that'), // strange capitalization of AND
array('this nOt that', 'this NOT that') // strange capitalization of NOT
);
// @codingStandardsIgnoreEnd
// Test all the operations:
foreach ($tests as $current) {
$this->assertEquals(
Utils::capitalizeBooleans($current[0]), $current[1]
);
}
}
/**
* Test capitalizeRanges functionality.
*
* @return void
*/
public function testCapitalizeRanges()
{
// Set up an array of expected inputs and outputs:
// @codingStandardsIgnoreStart
$tests = array(
array('"{a to b}"', '"{a to b}"'), // don't capitalize inside quotes
array('"[a to b]"', '"[a to b]"'),
array('[a to b]', '([a TO b] OR [A TO B])'), // expand alphabetic cases
array('[a TO b]', '([a TO b] OR [A TO B])'),
array('[a To b]', '([a TO b] OR [A TO B])'),
array('[a tO b]', '([a TO b] OR [A TO B])'),
array('{a to b}', '({a TO b} OR {A TO B})'),
array('{a TO b}', '({a TO b} OR {A TO B})'),
array('{a To b}', '({a TO b} OR {A TO B})'),
array('{a tO b}', '({a TO b} OR {A TO B})'),
array('[1900 to 1910]', '[1900 TO 1910]'), // don't expand numeric cases
array('[1900 TO 1910]', '[1900 TO 1910]'),
array('{1900 to 1910}', '{1900 TO 1910}'),
array('{1900 TO 1910}', '{1900 TO 1910}'),
array('[a to b]', '([a TO b] OR [A TO B])'), // handle extra spaces
// special case for timestamps:
array('[1900-01-01t00:00:00z to 1900-12-31t23:59:59z]', '[1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z]'),
array('{1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z}', '{1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z}')
);
// @codingStandardsIgnoreEnd
// Test all the operations:
foreach ($tests as $current) {
$this->assertEquals(
Utils::capitalizeRanges($current[0]), $current[1]
);
}
}
/**
* Test parseRange functionality.
*
* @return void
*/
public function testParseRange()
{
// basic range test:
$result = Utils::parseRange("[1 TO 100]");
$this->assertEquals('1', $result['from']);
$this->assertEquals('100', $result['to']);
// test whitespace handling:
$result = Utils::parseRange("[1 TO 100]");
$this->assertEquals('1', $result['from']);
$this->assertEquals('100', $result['to']);
// test invalid ranges:
$this->assertFalse(Utils::parseRange('1 TO 100'));
$this->assertFalse(Utils::parseRange('[not a range to me]'));
}
}
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment