From 4bfd8094e7a2c85ee951945c3d193596d10fb3f0 Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Mon, 2 Jul 2012 14:41:25 -0400 Subject: [PATCH] Began porting WorldCat functionality (untested). --- module/VuFind/src/VuFind/Connection/SRU.php | 333 +++++++++++++ .../VuFind/src/VuFind/Connection/WorldCat.php | 215 ++++++++ .../src/VuFind/Connection/WorldCatUtils.php | 466 ++++++++++++++++++ module/VuFind/src/VuFind/XSLT/Processor.php | 62 +++ 4 files changed, 1076 insertions(+) create mode 100644 module/VuFind/src/VuFind/Connection/SRU.php create mode 100644 module/VuFind/src/VuFind/Connection/WorldCat.php create mode 100644 module/VuFind/src/VuFind/Connection/WorldCatUtils.php create mode 100644 module/VuFind/src/VuFind/XSLT/Processor.php diff --git a/module/VuFind/src/VuFind/Connection/SRU.php b/module/VuFind/src/VuFind/Connection/SRU.php new file mode 100644 index 00000000000..c8030f2f6e8 --- /dev/null +++ b/module/VuFind/src/VuFind/Connection/SRU.php @@ -0,0 +1,333 @@ +<?php +/** + * SRU Search Interface + * + * PHP version 5 + * + * Copyright (C) Andrew Nagy 2008. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Support_Classes + * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes#searching Wiki + */ +namespace VuFind\Connection; +use VuFind\Http\Client as HttpClient, VuFind\Log\Logger, + VuFind\XSLT\Processor as XSLTProcessor; + +/** + * SRU Search Interface + * + * @category VuFind2 + * @package Support_Classes + * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes#searching Wiki + */ +class SRU +{ + /** + * Logger object for debug info (or false for no debugging). + */ + protected $logger; + + /** + * Whether to Serialize to a PHP Array or not. + * @var bool + */ + protected $raw = false; + + /** + * The HTTP_Request object used for REST transactions + * @var object HTTP_Request + */ + protected $client; + + /** + * The host to connect to + * @var string + */ + protected $host; + + /** + * The version to specify in the URL + * @var string + */ + protected $sruVersion = '1.1'; + + /** + * Constructor + * + * Sets up the SOAP Client + * + * @param string $host The URL of the eXist Server + */ + public function __construct($host) + { + // Initialize properties needed for HTTP connection: + $this->host = $host; + $this->client = new HttpClient(); + + // Don't waste time generating debug messages if nobody is listening: + $this->logger = Logger::debugNeeded() ? Logger::getInstance() : false; + } + + /** + * Build Query string from search parameters + * + * @param array $search An array of search parameters + * + * @throws Exception + * @return array An array of query results + */ + public function buildQuery($search) + { + foreach ($search as $params) { + if ($params['lookfor'] != '') { + $query = (isset($query)) ? $query . ' ' . $params['bool'] . ' ' : ''; + switch ($params['field']) { + case 'title': + $query .= 'dc.title="' . $params['lookfor'] . '" OR '; + $query .= 'dc.title=' . $params['lookfor']; + break; + case 'id': + $query .= 'rec.id=' . $params['lookfor']; + break; + case 'author': + preg_match_all('/"[^"]*"|[^ ]+/', $params['lookfor'], $wordList); + $author = array(); + foreach ($wordList[0] as $phrase) { + if (substr($phrase, 0, 1) == '"') { + $arr = explode( + ' ', substr($phrase, 1, strlen($phrase) - 2) + ); + $author[] = implode(' AND ', $arr); + } else { + $author[] = $phrase; + } + } + $author = implode(' ', $author); + $query .= 'dc.creator any "' . $author . '" OR'; + $query .= 'dc.creator any ' . $author; + break; + case 'callnumber': + break; + case 'publisher': + break; + case 'year': + $query = 'dc.date=' . $params['lookfor']; + break; + case 'series': + break; + case 'language': + break; + case 'toc': + break; + case 'topic': + break; + case 'geo': + break; + case 'era': + break; + case 'genre': + break; + case 'subject': + break; + case 'isn': + break; + case 'all': + default: + $query = 'dc.title="' . $params['lookfor'] . '" OR dc.title=' . + $params['lookfor'] . ' OR dc.creator="' . + $params['lookfor'] . '" OR dc.creator=' . + $params['lookfor'] . ' OR dc.subject="' . + $params['lookfor'] . '" OR dc.subject=' . + $params['lookfor'] . ' OR dc.description=' . + $params['lookfor'] . ' OR dc.date=' . $params['lookfor']; + break; + } + } + } + + return $query; + } + + /** + * Get records similiar to one record + * + * @param array $record An associative array of the record data + * @param string $id The record id + * @param int $max The maximum records to return; Default is 5 + * + * @throws Exception + * @return array An array of query results + */ + public function getMoreLikeThis($record, $id, $max = 5) + { + // More Like This Query + $query = 'title="' . $record['245']['a'] . '" ' . + "NOT rec.id=$id"; + + // Query String Parameters + $options = array('operation' => 'searchRetrieve', + 'query' => $query, + 'maximumRecords' => $max, + 'startRecord' => 1, + 'recordSchema' => 'marcxml'); + + if ($this->logger) { + $this->logger->debug('More Like This Query: ' . print_r($query, true)); + } + + return $this->call('GET', $options); + } + + /** + * Scan + * + * @param string $clause The CQL clause specifying the start point + * @param int $pos The position of the start point in the response + * @param int $maxTerms The maximum number of terms to return + * + * @return string XML response + */ + public function scan($clause, $pos = null, $maxTerms = null) + { + $options = array('operation' => 'scan', + 'scanClause' => $clause); + if (!is_null($pos)) { + $options['responsePosition'] = $pos; + } + if (!is_null($maxTerms)) { + $options['maximumTerms'] = $maxTerms; + } + + return $this->call('GET', $options, false); + } + + /** + * Search + * + * @param string $query The search query + * @param string $start The record to start with + * @param string $limit The amount of records to return + * @param string $sortBy The value to be used by for sorting + * @param string $schema Record schema to use in results list + * @param bool $process Process into array (true) or return raw (false) + * + * @throws Exception + * @return array An array of query results + */ + public function search($query, $start = 1, $limit = null, $sortBy = null, + $schema = 'marcxml', $process = true + ) { + if ($this->logger) { + $this->logger->debug('Query: ' . print_r($query, true)); + } + + // Query String Parameters + $options = array('operation' => 'searchRetrieve', + 'query' => $query, + 'startRecord' => ($start) ? $start : 1, + 'recordSchema' => $schema); + if (!is_null($limit)) { + $options['maximumRecords'] = $limit; + } + if (!is_null($sortBy)) { + $options['sortKeys'] = $sortBy; + } + + return $this->call('GET', $options, $process); + } + + /** + * Check for HTTP errors in a response. + * + * @param Zend_Http_Response $result The response to check. + * + * @throws Exception + * @return void + */ + public function checkForHttpError($result) + { + if (!$result->isSuccess()) { + throw new Exception('HTTP error ' . $result->getStatus()); + } + } + + /** + * Submit REST Request + * + * @param string $method HTTP Method to use: GET or POST + * @param array $params An array of parameters for the request + * @param bool $process Should we convert the MARCXML? + * + * @return string|SimpleXMLElement The response from the XServer + */ + protected function call($method = 'GET', $params = null, $process = true) + { + if ($params) { + $query = array('version='.$this->sruVersion); + foreach ($params as $function => $value) { + if (is_array($value)) { + foreach ($value as $additional) { + $additional = urlencode($additional); + $query[] = "$function=$additional"; + } + } else { + $value = urlencode($value); + $query[] = "$function=$value"; + } + } + $queryString = implode('&', $query); + } + + if ($this->logger) { + $this->logger->debug( + 'Connect: ' . print_r($this->host . '?' . $queryString, true) + ); + } + + // Send Request + $this->client->resetParameters(); + $this->client->setUri($this->host . '?' . $queryString); + $result = $this->client->setMethod($method)->send(); + $this->checkForHttpError($result); + + // Return processed or unprocessed response, as appropriate: + return $process ? $this->process($result->getBody()) : $result->getBody(); + } + + /** + * Process an SRU response. Returns either the raw XML string or a + * SimpleXMLElement based on the contents of the class' raw property. + * + * @param string $result SRU response + * + * @return string|SimpleXMLElement + */ + protected function process($result) + { + if (substr($result, 0, 5) != '<?xml') { + throw new Exception('Cannot Load Results'); + } + + // Send back either the raw XML or a SimpleXML object, as requested: + $result = XSLTProcessor::process('sru-convert.xsl', $result); + return $this->raw ? $result : simplexml_load_string($result); + } +} diff --git a/module/VuFind/src/VuFind/Connection/WorldCat.php b/module/VuFind/src/VuFind/Connection/WorldCat.php new file mode 100644 index 00000000000..80c3b6a0639 --- /dev/null +++ b/module/VuFind/src/VuFind/Connection/WorldCat.php @@ -0,0 +1,215 @@ +<?php +/** + * Class for accessing OCLC WorldCat search API + * + * PHP version 5 + * + * Copyright (C) Andrew Nagy 2008. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Support_Classes + * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes Wiki + */ +namespace VuFind\Connection; +use VuFind\Config\Reader as ConfigReader; + +/** + * WorldCat SRU Search Interface + * + * @category VuFind2 + * @package Support_Classes + * @author Andrew S. Nagy <vufind-tech@lists.sourceforge.net> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes Wiki + */ +class WorldCat extends SRU +{ + protected $wskey; + protected $limitCodes; + + /** + * Constructor + */ + public function __construct() + { + parent::__construct( + 'http://www.worldcat.org/webservices/catalog/search/sru' + ); + $config = ConfigReader::getConfig(); + $this->wskey = isset($config->WorldCat->apiKey) + ? $config->WorldCat->apiKey : null; + $this->limitCodes = isset($config->WorldCat->LimitCodes) + ? $config->WorldCat->LimitCodes : null; + } + + /** + * Get holdings information for the specified record. + * + * @param string $id Record to obtain holdings for. + * + * @throws Exception + * @return SimpleXMLElement + */ + public function getHoldings($id) + { + $this->client->resetParameters(); + $uri = "http://www.worldcat.org/webservices/catalog/content/libraries/{$id}"; + $uri .= "?wskey={$this->wskey}&servicelevel=full"; + $this->client->setUri($uri); + if ($this->logger) { + $this->logger->debug('Connect: ' . $uri); + } + $result = $this->client->request('POST'); + $this->checkForHttpError($result); + + return simplexml_load_string($result->getBody()); + } + + /** + * Retrieve a specific record. + * + * @param string $id Record ID to retrieve + * + * @throws Exception + * @return string MARC XML + */ + public function getRecord($id) + { + $this->client->resetParameters(); + $uri = 'http://www.worldcat.org/webservices/catalog/content/' . $id; + $uri .= "?wskey={$this->wskey}&servicelevel=full"; + $this->client->setUri($uri); + if ($this->logger) { + $this->logger->debug('Connect: ' . $uri); + } + $result = $this->client->request('POST'); + $this->checkForHttpError($result); + + return $result->getBody(); + } + + /** + * Search + * + * @param string $query The search query + * @param string $oclcCode An OCLC code to exclude from results + * @param int $page The page of records to start with + * @param int $limit The number of records to return per page + * @param string $sort The value to be used by for sorting + * + * @throws Exception + * @return array An array of query results + */ + public function search($query, $oclcCode = null, $page = 1, $limit = 10, + $sort = null + ) { + // Exclude current library from results + if ($oclcCode) { + $query .= ' not srw.li all "' . $oclcCode . '"'; + } + + // Submit query + $start = ($page-1) * $limit; + $params = array('query' => $query, + 'startRecord' => $start, + 'maximumRecords' => $limit, + 'sortKeys' => empty($sort) ? 'relevance' : $sort, + 'servicelevel' => 'full', + 'wskey' => $this->wskey); + + // Establish a limitation on searching by OCLC Codes + if (!empty($this->limitCodes)) { + $params['oclcsymbol'] = $this->limitCodes; + } + + return simplexml_load_string($this->call('POST', $params, false)); + } + + /** + * Build Query string from search parameters + * + * @param array $search An array of search parameters + * + * @throws Exception + * @return string The query + */ + public function buildQuery($search) + { + $groups = array(); + $excludes = array(); + if (is_array($search)) { + $query = ''; + + foreach ($search as $params) { + // Advanced Search + if (isset($params['group'])) { + $thisGroup = array(); + // Process each search group + foreach ($params['group'] as $group) { + // Build this group individually as a basic search + $thisGroup[] = $this->buildQuery(array($group)); + } + // Is this an exclusion (NOT) group or a normal group? + if ($params['group'][0]['bool'] == 'NOT') { + $excludes[] = join(" OR ", $thisGroup); + } else { + $groups[] + = join(" ".$params['group'][0]['bool']." ", $thisGroup); + } + } + + // Basic Search + if (isset($params['lookfor']) && $params['lookfor'] != '') { + // Clean and validate input -- note that index may be in a + // different field depending on whether this is a basic or + // advanced search. + $lookfor = str_replace('"', '', $params['lookfor']); + if (isset($params['field'])) { + $index = $params['field']; + } else if (isset($params['index'])) { + $index = $params['index']; + } else { + $index = 'srw.kw'; + } + + // The index may contain multiple parts -- we want to search all + // listed index fields: + $index = explode(':', $index); + $clauses = array(); + foreach ($index as $currentIndex) { + $clauses[] = "{$currentIndex} all \"{$lookfor}\""; + } + $query .= '(' . implode(' OR ', $clauses) . ')'; + } + } + } + + // Put our advanced search together + if (count($groups) > 0) { + $query = "(" . join(") " . $search[0]['join'] . " (", $groups) . ")"; + } + // and concatenate exclusion after that + if (count($excludes) > 0) { + $query .= " NOT ((" . join(") OR (", $excludes) . "))"; + } + + // Ensure we have a valid query to this point + return isset($query) ? $query : ''; + } +} diff --git a/module/VuFind/src/VuFind/Connection/WorldCatUtils.php b/module/VuFind/src/VuFind/Connection/WorldCatUtils.php new file mode 100644 index 00000000000..d0e9142627e --- /dev/null +++ b/module/VuFind/src/VuFind/Connection/WorldCatUtils.php @@ -0,0 +1,466 @@ +<?php +/** + * World Cat Utilities + * + * PHP version 5 + * + * Copyright (C) Villanova University 2010. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Support_Classes + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes Wiki + */ +namespace VuFind\Connection; +use VuFind\Config\Reader as ConfigReader, VuFind\Log\Logger, + VuFind\XSLT\Processor as XSLTProcessor; + +/** + * World Cat Utilities + * + * Class for accessing helpful WorldCat APIs. + * + * @category VuFind2 + * @package Support_Classes + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/system_classes Wiki + */ +class WorldCatUtils +{ + /** + * Get the WorldCat ID from the config file. + * + * @return string + */ + protected function getWorldCatId() + { + static $wcId = null; + if (is_null($wcId)) { + $config = ConfigReader::getConfig(); + $wcId = isset($config->WorldCat->id) + ? $config->WorldCat->id : false; + } + return $wcId; + } + + /** + * Retrieve results from the index using the XISBN service. + * + * @param string $isbn ISBN of main record + * + * @return array ISBNs for related items (may be empty). + */ + public function getXISBN($isbn) + { + // Build URL + $url = 'http://xisbn.worldcat.org/webservices/xid/isbn/' . + urlencode(is_array($isbn) ? $isbn[0] : $isbn) . + '?method=getEditions&format=csv'; + if ($wcId = $this->getWorldCatId()) { + $url .= '&ai=' . urlencode($wcId); + } + + // Print Debug code + Logger::getInstance()->debug("XISBN: $url"); + + // Fetch results + $isbns = array(); + if ($fp = @fopen($url, "r")) { + while (($data = fgetcsv($fp, 1000, ",")) !== false) { + // Filter out non-ISBN characters and validate the length of + // whatever is left behind; this will prevent us from treating + // error messages like "invalidId" or "overlimit" as ISBNs. + $isbn = preg_replace('/[^0-9xX]/', '', $data[0]); + if (strlen($isbn) < 10) { + continue; + } + $isbns[] = $isbn; + } + } + + return $isbns; + } + + /** + * Retrieve results from the index using the XOCLCNUM service. + * + * @param string $oclc OCLC number of main record + * + * @return array ISBNs for related items (may be empty). + */ + public function getXOCLCNUM($oclc) + { + // Build URL + $url = 'http://xisbn.worldcat.org/webservices/xid/oclcnum/' . + urlencode(is_array($oclc) ? $oclc[0] : $oclc) . + '?method=getEditions&format=csv'; + if ($wcId = $this->getWorldCatId()) { + $url .= '&ai=' . urlencode($wcId); + } + + // Print Debug code + Logger::getInstance()->debug("XOCLCNUM: $url"); + + // Fetch results + $results = array(); + if ($fp = @fopen($url, "r")) { + while (($data = fgetcsv($fp, 1000, ",")) !== false) { + // Filter out non-numeric characters and validate the length of + // whatever is left behind; this will prevent us from treating + // error messages like "invalidId" or "overlimit" as ISBNs. + $current = preg_replace('/[^0-9]/', '', $data[0]); + if (empty($current)) { + continue; + } + $results[] = $current; + } + } + + return $results; + } + + /** + * Retrieve results from the index using the XISSN service. + * + * @param string $issn ISSN of main record + * + * @return array ISSNs for related items (may be empty). + */ + public function getXISSN($issn) + { + // Build URL + $url = 'http://xissn.worldcat.org/webservices/xid/issn/' . + urlencode(is_array($issn) ? $issn[0] : $issn) . + //'?method=getEditions&format=csv'; + '?method=getEditions&format=xml'; + if ($wcId = $this->getWorldCatId()) { + $url .= '&ai=' . urlencode($wcId); + } + + // Print Debug code + Logger::getInstance()->debug("XISSN: $url"); + + // Fetch results + $issns = array(); + $xml = @file_get_contents($url); + if (!empty($xml)) { + $data = simplexml_load_string($xml); + if (!empty($data) && isset($data->group->issn) + && count($data->group->issn) > 0 + ) { + foreach ($data->group->issn as $issn) { + $issns[] = (string)$issn; + } + } + } + + return $issns; + } + + /** + * Support function for getIdentitiesQuery(); is the provided name component + * worth considering as a first or last name? + * + * @param string $current Name chunk to examine. + * + * @return boolean Should we use this as a name? + */ + protected function isUsefulNameChunk($current) + { + // Some common prefixes and suffixes that we do not want to treat as first + // or last names: + static $badChunks = array('jr', 'sr', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', + 'viii', 'ix', 'x', 'junior', 'senior', 'esq', 'mr', 'mrs', 'miss', 'dr'); + + // Clean up the input string: + $current = str_replace('.', '', strtolower($current)); + + // We don't want to use empty, numeric or known bad strings! + if (empty($current) || is_numeric($current) + || in_array($current, $badChunks) + ) { + return false; + } + return true; + } + + /** + * Support function for getRelatedIdentities() -- parse a name into a query + * for WorldCat Identities. + * + * @param string $name Name to parse. + * + * @return mixed False if useless string; Identities query otherwise. + */ + protected function getIdentitiesQuery($name) + { + // Clean up user query and try to find name components within it: + $name = trim(str_replace(array('"', ',', '-'), ' ', $name)); + $parts = explode(' ', $name); + $first = $last = ''; + foreach ($parts as $current) { + $current = trim($current); + // Do we want to store this chunk? + if ($this->isUsefulNameChunk($current)) { + // Is the first name empty? If so, save this there. + if (empty($first)) { + $first = $current; + } else if (strlen($current) > 2 || empty($last)) { + // If this isn't the first name, we always want to save it as the + // last name UNLESS it's an initial, in which case we'll only + // save it if we don't already have something better! + $last = $current; + } + } + } + + // Fail if we found no useful name components; otherwise, build up the query + // based on whether we found a first name only or both first and last names: + if (empty($first) && empty($last)) { + return false; + } else if (empty($last)) { + return "local.Name=\"{$first}\""; + } else { + return "local.Name=\"{$last}\" and local.Name=\"{$first}\""; + } + } + + /** + * Support method for getRelatedIdentities() -- extract subject headings from + * the current node of the Identities API response. + * + * @param array $current Current response node. + * + * @return array Extracted subject headings. + */ + protected function processIdentitiesSubjects($current) + { + // Normalize subjects array if it has only a single entry: + $subjects = isset($current->fastHeadings->fast) ? + $current->fastHeadings->fast : null; + if (isset($subjects->tag)) { + $subjects = array($subjects); + } + + // Collect subjects for current name: + $retVal = array(); + if (!is_null($subjects) && count($subjects) > 0) { + foreach ($subjects as $currentSubject) { + if ($currentSubject['tag'] == '650') { + $text = (string)$currentSubject; + if (!empty($text)) { + // Double dash will cause problems with Solr searches, so + // represent subject heading subdivisions differently: + $retVal[] = str_replace('--', ': ', $text); + } + } + } + } + + return $retVal; + } + + /** + * Given a name string, get related identities. Inspired by Eric Lease + * Morgan's Name Finder demo (http://zoia.library.nd.edu/sandbox/name-finder/). + * Return value is an associative array where key = author name and value = + * subjects used in that author's works. + * + * @param string $name Name to search for (any format). + * @param int $maxRecords Max # of records to read from API (more = slower). + * + * @return mixed False on error, otherwise array of related names. + */ + public function getRelatedIdentities($name, $maxRecords = 10) + { + // Build the WorldCat Identities API query: + $query = $this->getIdentitiesQuery($name); + if (!$query) { + return false; + } + + // Get the API response: + $url = "http://worldcat.org/identities/search/PersonalIdentities" . + "?query=" . urlencode($query) . + "&version=1.1" . + "&operation=searchRetrieve" . + "&recordSchema=info%3Asrw%2Fschema%2F1%2FIdentities" . + "&maximumRecords=" . intval($maxRecords) . + "&startRecord=1" . + "&resultSetTTL=300" . + "&recordPacking=xml" . + "&recordXPath=" . + "&sortKeys=holdingscount"; + $xml = @file_get_contents($url); + + // Translate XML to object: + $data = simplexml_load_string($xml); + + // Give up if expected data is missing: + if (!isset($data->records->record)) { + return false; + } + + // Loop through data and collect names and related subjects: + $output = array(); + foreach ($data->records->record as $current) { + // Build current name string: + $current = isset($current->recordData->Identity->nameInfo) ? + $current->recordData->Identity->nameInfo : null; + if (isset($current['type']) && $current['type'] == 'personal' + && !empty($current->rawName->suba) + ) { + $currentName = $current->rawName->suba . + (isset($current->rawName->subd) ? + ', ' . $current->rawName->subd : ''); + + // Get subject list for current identity; if the current name is a + // duplicate of a previous name, merge the subjects together: + $subjects = $this->processIdentitiesSubjects($current); + $output[$currentName] = isset($output[$currentName]) + ? array_unique(array_merge($output[$currentName], $subjects)) + : $subjects; + } + } + + return $output; + } + + /** + * Given a subject term, get related (broader/narrower/alternate) terms. + * Loosely adapted from Eric Lease Morgan's Term Finder demo (see + * http://zoia.library.nd.edu/sandbox/term-finder/). Note that this is + * intended as a fairly fuzzy search -- $term need not be an exact subject + * heading; this function will return best guess matches in the 'exact' + * key, possible broader terms in the 'broader' key and possible narrower + * terms in the 'narrower' key of the return array. + * + * @param string $term Term to get related terms for. + * @param string $vocabulary Vocabulary to search (default = LCSH; see OCLC docs + * for other options). + * @param int $maxRecords Max # of records to read from API (more = slower). + * + * @return mixed False on error, otherwise array of related terms, + * keyed by category. + */ + public function getRelatedTerms($term, $vocabulary = 'lcsh', $maxRecords = 10) + { + // Strip quotes from incoming term: + $term = str_replace('"', '', $term); + + // Build the request URL: + $url = "http://tspilot.oclc.org/" . urlencode($vocabulary) . "/?" . + // Search for the user-supplied term in both preferred and alternative + // fields! + "query=oclcts.preferredTerm+%3D+%22" . urlencode($term) . + "%22+OR+oclcts.alternativeTerms+%3D+%22" . urlencode($term) . "%22" . + "&version=1.1" . + "&operation=searchRetrieve" . + "&recordSchema=info%3Asrw%2Fschema%2F1%2Fmarcxml-v1.1" . + "&maximumRecords=" . intval($maxRecords) . + "&startRecord=1" . + "&resultSetTTL=300" . + "&recordPacking=xml" . + "&recordXPath=" . + "&sortKeys=recordcount"; + + // Get the API response: + $data = @file_get_contents($url); + + // Extract plain MARCXML from the WorldCat response: + $marcxml = XSLTProcessor::process('wcterms-marcxml.xsl', $data); + + // Try to parse the MARCXML into a File_MARC object; if this fails, + // we probably have bad MARCXML, which may indicate an API failure + // or an empty record set. Just give up if this happens! + try { + $marc = new File_MARCXML($marcxml, File_MARCXML::SOURCE_STRING); + } catch (File_MARC_Exception $e) { + return false; + } + + // Initialize arrays: + $exact = array(); + $broader = array(); + $narrower = array(); + + while ($record = $marc->next()) { + // Get exact terms: + $actual = $record->getField('150'); + if ($actual) { + $main = $actual->getSubfield('a'); + if ($main) { + // Some versions of File_MARCXML seem to have trouble returning + // strings properly (giving back XML objects instead); let's + // cast to string to be sure we get what we expect! + $main = (string)$main->getData(); + + // Add subdivisions: + $subdivisions = $actual->getSubfields('x'); + if ($subdivisions) { + foreach ($subdivisions as $current) { + $main .= ', ' . (string)$current->getData(); + } + } + + // Only save the actual term if it is not a subset of the + // requested term. + if (!stristr($term, $main)) { + $exact[] = $main; + } + } + } + + // Get broader/narrower terms: + $related = $record->getFields('550'); + foreach ($related as $current) { + $type = $current->getSubfield('w'); + $value = $current->getSubfield('a'); + if ($type && $value) { + $type = (string)$type->getData(); + $value = (string)$value->getData(); + if ($type == 'g') { + // Don't save exact matches to the user-entered term: + if (strcasecmp($term, $value) != 0) { + $broader[] = $value; + } + } else if ($type == 'h') { + // Don't save exact matches to the user-entered term: + if (strcasecmp($term, $value) != 0) { + $narrower[] = $value; + } + } + } + } + } + + // Send back everything we found, sorted and filtered for uniqueness; note + // that we do NOT sort FAST results since they support relevance ranking. + // As of this writing, other vocabularies do not support relevance. + if ($vocabulary !== 'fast') { + natcasesort($exact); + natcasesort($broader); + natcasesort($narrower); + } + return array( + 'exact' => array_unique($exact), + 'broader' => array_unique($broader), + 'narrower' => array_unique($narrower) + ); + } +} \ No newline at end of file diff --git a/module/VuFind/src/VuFind/XSLT/Processor.php b/module/VuFind/src/VuFind/XSLT/Processor.php new file mode 100644 index 00000000000..3023e29c434 --- /dev/null +++ b/module/VuFind/src/VuFind/XSLT/Processor.php @@ -0,0 +1,62 @@ +<?php +/** + * VuFind XSLT wrapper + * + * PHP version 5 + * + * Copyright (C) Villanova University 2010. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * @category VuFind2 + * @package Support_Classes + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/ Wiki + */ +namespace VuFind\XSLT; + +/** + * VuFind XSLT wrapper + * + * @category VuFind2 + * @package Support_Classes + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link http://vufind.org/wiki/ Wiki + */ +class Processor +{ + /** + * Perform an XSLT transformation and return the results. + * + * @param string $xslt Name of stylesheet (in application/xsl directory) + * @param string $xml XML to transform with stylesheet + * + * @return string Transformed XML + */ + public static function process($xslt, $xml) + { + $style = new DOMDocument(); + // TODO: support local overrides + $style->load(APPLICATION_PATH . '/xsl/' . $xslt); + $xsl = new XSLTProcessor(); + $xsl->importStyleSheet($style); + $doc = new DOMDocument(); + if ($doc->loadXML($xml)) { + return $xsl->transformToXML($doc); + } + return ''; + } +} -- GitLab