From 09112418e1f10e78d2a2ec6f05cc978e219c7fa5 Mon Sep 17 00:00:00 2001
From: Demian Katz <demian.katz@villanova.edu>
Date: Mon, 23 Jul 2012 15:17:07 -0400
Subject: [PATCH] Added Solr Utils class and associated test cases.

---
 module/VuFind/src/VuFind/Solr/Utils.php | 147 ++++++++++++++++++++++++
 module/VuFind/tests/Solr/UtilsTest.php  | 137 ++++++++++++++++++++++
 2 files changed, 284 insertions(+)
 create mode 100644 module/VuFind/src/VuFind/Solr/Utils.php
 create mode 100644 module/VuFind/tests/Solr/UtilsTest.php

diff --git a/module/VuFind/src/VuFind/Solr/Utils.php b/module/VuFind/src/VuFind/Solr/Utils.php
new file mode 100644
index 00000000000..86f98df01b4
--- /dev/null
+++ b/module/VuFind/src/VuFind/Solr/Utils.php
@@ -0,0 +1,147 @@
+<?php
+/**
+ * Solr Utility Functions
+ *
+ * PHP version 5
+ *
+ * Copyright (C) Andrew Nagy 2009.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * @category VuFind2
+ * @package  Support_Classes
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/system_classes Wiki
+ */
+namespace VuFind\Solr;
+
+/**
+ * Solr Utility Functions
+ *
+ * This class is designed to hold Solr-related support methods that may
+ * be called statically.  This allows sharing of some Solr-related logic
+ * between the Solr and Summon classes.
+ *
+ * @category VuFind2
+ * @package  Support_Classes
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/system_classes Wiki
+ */
+class Utils
+{
+    // This lookahead detects whether or not we are inside quotes; it
+    // may be shared by multiple methods.
+    protected static $insideQuotes = '(?=(?:[^\"]*+\"[^\"]*+\")*+[^\"]*+$)';
+
+    /**
+     * Capitalize boolean operators in a query string to allow case-insensitivity.
+     *
+     * @param string $query The query to capitalize.
+     *
+     * @return string       The capitalized query.
+     */
+    public static function capitalizeBooleans($query)
+    {
+        // Load the "inside quotes" lookahead so we can use it to prevent
+        // switching case of Boolean reserved words inside quotes, since
+        // that can cause problems in case-sensitive fields when the reserved
+        // words are actually used as search terms.
+        $lookahead = self::$insideQuotes;
+        $regs = array("/\s+AND\s+{$lookahead}/i", "/\s+OR\s+{$lookahead}/i",
+                "/(\s+NOT\s+|^NOT\s+){$lookahead}/i", "/\(NOT\s+{$lookahead}/i");
+        $replace = array(' AND ', ' OR ', ' NOT ', '(NOT ');
+        return trim(preg_replace($regs, $replace, $query));
+    }
+
+    /**
+     * Make ranges case-insensitive in a query string.
+     *
+     * @param string $query The query to update.
+     *
+     * @return string       The query with case-insensitive ranges.
+     */
+    public static function capitalizeRanges($query)
+    {
+        // Load the "inside quotes" lookahead so we can use it to prevent
+        // switching case of ranges inside quotes, since that can cause
+        // problems in case-sensitive fields when the reserved words are
+        // actually used as search terms.
+        $lookahead = self::$insideQuotes;
+        $regs = array("/(\[)([^\]]+)\s+TO\s+([^\]]+)(\]){$lookahead}/i",
+            "/(\{)([^}]+)\s+TO\s+([^}]+)(\}){$lookahead}/i");
+        $callback = array(get_called_class(), 'capitalizeRangesCallback');
+        return trim(preg_replace_callback($regs, $callback, $query));
+    }
+
+    /**
+     * Support method for capitalizeRanges -- process a single match found by
+     * preg_replace_callback.
+     *
+     * @param array $in Array of matches.
+     *
+     * @return string   Processed result.
+     */
+    public static function capitalizeRangesCallback($in)
+    {
+        // Extract the relevant parts of the expression:
+        $open = $in[1];         // opening symbol
+        $close = $in[4];        // closing symbol
+        $start = $in[2];        // start of range
+        $end = $in[3];          // end of range
+
+        // Is this a case-sensitive range?
+        if (strtoupper($start) != strtolower($start)
+            || strtoupper($end) != strtolower($end)
+        ) {
+            // Build a lowercase version of the range:
+            $lower = $open . trim(strtolower($start)) . ' TO ' .
+                trim(strtolower($end)) . $close;
+            // Build a uppercase version of the range:
+            $upper = $open . trim(strtoupper($start)) . ' TO ' .
+                trim(strtoupper($end)) . $close;
+
+            // Special case: don't create illegal timestamps!
+            $timestamp = '/[0-9]{4}-[0-9]{2}-[0-9]{2}t[0-9]{2}:[0-9]{2}:[0-9]{2}z/i';
+            if (preg_match($timestamp, $start) || preg_match($timestamp, $end)) {
+                return $upper;
+            }
+
+            // Accept results matching either range:
+            return '(' . $lower . ' OR ' . $upper . ')';
+        } else {
+            // Simpler case -- case insensitive (probably numeric) range:
+            return $open . trim($start) . ' TO ' . trim($end) . $close;
+        }
+    }
+
+    /**
+     * Parse "from" and "to" values out of a range query (or return false if the
+     * query is not a range).
+     *
+     * @param string $query Solr query to parse.
+     *
+     * @return array|bool   Array with 'from' and 'to' values extracted from range
+     * or false if the provided query is not a range.
+     */
+    public static function parseRange($query)
+    {
+        $regEx = '/\[([^\]]+)\s+TO\s+([^\]]+)\]/';
+        if (!preg_match($regEx, $query, $matches)) {
+            return false;
+        }
+        return array('from' => trim($matches[1]), 'to' => trim($matches[2]));
+    }
+}
diff --git a/module/VuFind/tests/Solr/UtilsTest.php b/module/VuFind/tests/Solr/UtilsTest.php
new file mode 100644
index 00000000000..49ca801f911
--- /dev/null
+++ b/module/VuFind/tests/Solr/UtilsTest.php
@@ -0,0 +1,137 @@
+<?php
+/**
+ * Solr Utils Test Class
+ *
+ * PHP version 5
+ *
+ * Copyright (C) Villanova University 2010.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * @category VuFind2
+ * @package  Tests
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/unit_tests Wiki
+ */
+namespace VuFind\Tests\Solr;
+use VuFind\Solr\Utils;
+
+/**
+ * Solr Utils Test Class
+ *
+ * @category VuFind2
+ * @package  Tests
+ * @author   Demian Katz <demian.katz@villanova.edu>
+ * @license  http://opensource.org/licenses/gpl-2.0.php GNU General Public License
+ * @link     http://vufind.org/wiki/unit_tests Wiki
+ */
+class UtilsTest extends \PHPUnit_Framework_TestCase
+{
+    /**
+     * Test capitalizeBooleans functionality.
+     *
+     * @return void
+     */
+    public function testCapitalizeBooleans()
+    {
+        // Set up an array of expected inputs and outputs:
+        // @codingStandardsIgnoreStart
+        $tests = array(
+            array('this not that', 'this NOT that'),        // capitalize not
+            array('this and that', 'this AND that'),        // capitalize and
+            array('this or that', 'this OR that'),          // capitalize or
+            array('apples and oranges (not that)', 'apples AND oranges (NOT that)'),
+            array('"this not that"', '"this not that"'),    // do not capitalize inside quotes
+            array('"this and that"', '"this and that"'),    // do not capitalize inside quotes
+            array('"this or that"', '"this or that"'),      // do not capitalize inside quotes
+            array('"apples and oranges (not that)"', '"apples and oranges (not that)"'),
+            array('this AND that', 'this AND that'),        // don't mess up existing caps
+            array('and and and', 'and AND and'),
+            array('andornot noted andy oranges', 'andornot noted andy oranges'),
+            array('(this or that) and (apples not oranges)', '(this OR that) AND (apples NOT oranges)'),
+            array('this aNd that', 'this AND that'),        // strange capitalization of AND
+            array('this nOt that', 'this NOT that')         // strange capitalization of NOT
+        );
+        // @codingStandardsIgnoreEnd
+
+        // Test all the operations:
+        foreach ($tests as $current) {
+            $this->assertEquals(
+                Utils::capitalizeBooleans($current[0]), $current[1]
+            );
+        }
+    }
+
+    /**
+     * Test capitalizeRanges functionality.
+     *
+     * @return void
+     */
+    public function testCapitalizeRanges()
+    {
+        // Set up an array of expected inputs and outputs:
+        // @codingStandardsIgnoreStart
+        $tests = array(
+            array('"{a to b}"', '"{a to b}"'),              // don't capitalize inside quotes
+            array('"[a to b]"', '"[a to b]"'),
+            array('[a to b]', '([a TO b] OR [A TO B])'),    // expand alphabetic cases
+            array('[a TO b]', '([a TO b] OR [A TO B])'),
+            array('[a To b]', '([a TO b] OR [A TO B])'),
+            array('[a tO b]', '([a TO b] OR [A TO B])'),
+            array('{a to b}', '({a TO b} OR {A TO B})'),
+            array('{a TO b}', '({a TO b} OR {A TO B})'),
+            array('{a To b}', '({a TO b} OR {A TO B})'),
+            array('{a tO b}', '({a TO b} OR {A TO B})'),
+            array('[1900 to 1910]', '[1900 TO 1910]'),      // don't expand numeric cases
+            array('[1900 TO 1910]', '[1900 TO 1910]'),
+            array('{1900 to 1910}', '{1900 TO 1910}'),
+            array('{1900 TO 1910}', '{1900 TO 1910}'),
+            array('[a      to      b]', '([a TO b] OR [A TO B])'),   // handle extra spaces
+            // special case for timestamps:
+            array('[1900-01-01t00:00:00z to 1900-12-31t23:59:59z]', '[1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z]'),
+            array('{1900-01-01T00:00:00Z       TO   1900-12-31T23:59:59Z}', '{1900-01-01T00:00:00Z TO 1900-12-31T23:59:59Z}')
+        );
+        // @codingStandardsIgnoreEnd
+
+        // Test all the operations:
+        foreach ($tests as $current) {
+            $this->assertEquals(
+                Utils::capitalizeRanges($current[0]), $current[1]
+            );
+        }
+    }
+
+    /**
+     * Test parseRange functionality.
+     *
+     * @return void
+     */
+    public function testParseRange()
+    {
+        // basic range test:
+        $result = Utils::parseRange("[1 TO 100]");
+        $this->assertEquals('1', $result['from']);
+        $this->assertEquals('100', $result['to']);
+
+        // test whitespace handling:
+        $result = Utils::parseRange("[1      TO     100]");
+        $this->assertEquals('1', $result['from']);
+        $this->assertEquals('100', $result['to']);
+
+        // test invalid ranges:
+        $this->assertFalse(Utils::parseRange('1 TO 100'));
+        $this->assertFalse(Utils::parseRange('[not a range to me]'));
+    }
+}
\ No newline at end of file
-- 
GitLab