diff --git a/import/archivesspace.properties b/import/archivesspace.properties new file mode 100644 index 0000000000000000000000000000000000000000..108e6f6b6fbc90b1861a7dd4db735b54b8151f18 --- /dev/null +++ b/import/archivesspace.properties @@ -0,0 +1,30 @@ +; XSLT Import Settings for ArchivesSpace +[General] +; REQUIRED: Name of XSLT file to apply. Path is relative to the import/xsl directory +; of the VuFind installation (or your local override directory). +xslt = archivesspace.xsl +; OPTIONAL: PHP function(s) to register for use within XSLT file. You may repeat +; this line to register multiple PHP functions. +;php_function[] = str_replace +; OPTIONAL: PHP class filled with public static functions for use by the XSLT file. +; The class must live within a PHP namespace. You may specify a fully-qualified +; name; if you do not include a namespace, the class will automatically be assumed +; to live in the \VuFind\XSLT\Import namespace. +custom_class[] = VuFind +; OPTIONAL: If true, all custom_class settings above will be passed to the XSLT with +; their namespaces stripped off; for example, \VuFind\XSLT\Import\VuFind would be +; treated as \VuFind in XSLT files. This allows more compact syntax within XSLT +; files, but it can lead to name conflicts if used carelessly. If set to false, you +; must use fully-qualified names in your XSLT. The false setting is recommended, but +; the current default is "true" for compatibility with legacy configurations. +truncate_custom_class = true + +; XSLT parameters -- any key/value pairs set here will be passed as parameters to +; the XSLT file, allowing local values to be set without modifying XSLT code. +[Parameters] +institution = "My University" +collection = "Archives" +; By default, any URL beginning with "http" will be indexed; however, you can +; specify a more narrow prefix here if you wish to filter to a particular subset +; of URLs indexed into VuFind. +;urlPrefix = "http://hdl.handle.net" diff --git a/import/xsl/archivesspace.xsl b/import/xsl/archivesspace.xsl new file mode 100644 index 0000000000000000000000000000000000000000..61a700856979152d77524c9896bce9ab3857dd8f --- /dev/null +++ b/import/xsl/archivesspace.xsl @@ -0,0 +1,151 @@ +<!-- available fields are defined in solr/biblio/conf/schema.xml --> +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:php="http://php.net/xsl" + xmlns:xlink="http://www.w3.org/2001/XMLSchema-instance"> + <xsl:output method="xml" indent="yes" encoding="utf-8"/> + <xsl:param name="institution">My University</xsl:param> + <xsl:param name="collection">Archives</xsl:param> + <xsl:param name="urlPrefix">http</xsl:param> + <xsl:template match="oai_dc:dc"> + <add> + <doc> + <!-- ID --> + <!-- Important: This relies on an <identifier> tag being injected by the OAI-PMH harvester. --> + <field name="id"> + <xsl:value-of select="//identifier"/> + </field> + + <!-- RECORD FORMAT --> + <field name="record_format">archivesspace</field> + + <!-- FULLRECORD --> + <!-- disabled for now; records are so large that they cause memory problems! + <field name="fullrecord"> + <xsl:copy-of select="php:function('VuFind::xmlAsText', //oai_dc:dc)"/> + </field> + --> + + <!-- ALLFIELDS --> + <field name="allfields"> + <xsl:value-of select="normalize-space(string(//oai_dc:dc))"/> + </field> + + <!-- INSTITUTION --> + <field name="institution"> + <xsl:value-of select="$institution" /> + </field> + + <!-- COLLECTION --> + <field name="collection"> + <xsl:value-of select="$collection" /> + </field> + + <!-- LANGUAGE --> + <xsl:if test="//dc:language"> + <xsl:for-each select="//dc:language"> + <xsl:if test="string-length() > 0"> + <field name="language"> + <xsl:value-of select="php:function('VuFind::mapString', normalize-space(string(.)), 'language_map.properties')"/> + </field> + </xsl:if> + </xsl:for-each> + </xsl:if> + + <!-- SUBJECT --> + <xsl:if test="//dc:subject"> + <xsl:for-each select="//dc:subject"> + <xsl:if test="string-length() > 0"> + <field name="topic"> + <xsl:value-of select="normalize-space()"/> + </field> + </xsl:if> + </xsl:for-each> + </xsl:if> + + <!-- DESCRIPTION --> + <xsl:if test="//dc:description"> + <field name="description"> + <xsl:value-of select="//dc:description" /> + </field> + </xsl:if> + + <!-- ADVISOR / CONTRIBUTOR --> + <xsl:if test="//dc:contributor[normalize-space()]"> + <field name="author2"> + <xsl:value-of select="//dc:contributor[normalize-space()]" /> + </field> + </xsl:if> + + <!-- FORMAT / TYPE --> + <field name="format">Archival Material</field> + <xsl:if test="//dc:type"> + <field name="format"> + <xsl:value-of select="//dc:type" /> + </field> + </xsl:if> + + <!-- AUTHOR --> + <xsl:if test="//dc:creator"> + <xsl:for-each select="//dc:creator"> + <xsl:if test="normalize-space()"> + <field name="author"> + <xsl:value-of select="normalize-space()"/> + </field> + <!-- use first author value for sorting --> + <xsl:if test="position()=1"> + <field name="author_sort"> + <xsl:value-of select="normalize-space()"/> + </field> + </xsl:if> + </xsl:if> + </xsl:for-each> + </xsl:if> + + <!-- TITLE --> + <xsl:if test="//dc:title[normalize-space()]"> + <field name="title"> + <xsl:value-of select="//dc:title[normalize-space()]"/> + </field> + <field name="title_short"> + <xsl:value-of select="//dc:title[normalize-space()]"/> + </field> + <field name="title_full"> + <xsl:value-of select="//dc:title[normalize-space()]"/> + </field> + <field name="title_sort"> + <xsl:value-of select="php:function('VuFind::stripArticles', string(//dc:title[normalize-space()]))"/> + </field> + </xsl:if> + + <!-- PUBLISHER --> + <xsl:if test="//dc:publisher[normalize-space()]"> + <field name="publisher"> + <xsl:value-of select="php:function('VuFind::implode', ', ', //dc:publisher)"/> + </field> + </xsl:if> + + <!-- PUBLISHDATE --> + <xsl:if test="//dc:date"> + <field name="publishDate"> + <xsl:value-of select="php:function('VuFind::extractBestDateOrRange', //dc:date)"/> + </field> + <field name="publishDateSort"> + <xsl:value-of select="php:function('VuFind::extractEarliestYear', //dc:date)"/> + </field> + </xsl:if> + + <!-- URL --> + <xsl:for-each select="//dc:identifier"> + <xsl:if test="substring(., 1, string-length($urlPrefix)) = $urlPrefix"> + <field name="url"> + <xsl:value-of select="." /> + </field> + </xsl:if> + </xsl:for-each> + </doc> + </add> + </xsl:template> +</xsl:stylesheet> diff --git a/languages/en.ini b/languages/en.ini index 377f405c01cf0296e6d0350156403ec1dd25dc54..7bd03830f6788b259050880f9738b981fca561a8 100644 --- a/languages/en.ini +++ b/languages/en.ini @@ -69,6 +69,7 @@ and = "and" anonymous_tags = "Anonymous Tags" APA Citation = "APA Citation" applied_filter = "Applied Filter" +Archival Material = "Archival Material" Article = "Article" Ask a Librarian = "Ask a Librarian" Associated country = "Associated country" diff --git a/module/VuFind/src/VuFind/RecordDriver/PluginManager.php b/module/VuFind/src/VuFind/RecordDriver/PluginManager.php index e505c64c23f672bad39ceef148dfae597e76807d..5733befed4b7372c0553fba0fd94e62a37ad0638 100644 --- a/module/VuFind/src/VuFind/RecordDriver/PluginManager.php +++ b/module/VuFind/src/VuFind/RecordDriver/PluginManager.php @@ -54,6 +54,7 @@ class PluginManager extends \VuFind\ServiceManager\AbstractPluginManager 'pazpar2' => Pazpar2::class, 'primo' => Primo::class, 'search2default' => Search2Default::class, + 'solrarchivesspace' => SolrArchivesSpace::class, 'solrauth' => SolrAuthMarc::class, // legacy name 'solrauthdefault' => SolrAuthDefault::class, 'solrauthmarc' => SolrAuthMarc::class, @@ -91,6 +92,7 @@ class PluginManager extends \VuFind\ServiceManager\AbstractPluginManager Pazpar2::class => NameBasedConfigFactory::class, Primo::class => NameBasedConfigFactory::class, Search2Default::class => SolrDefaultFactory::class, + SolrArchivesSpace::class => SolrDefaultFactory::class, SolrAuthDefault::class => SolrDefaultWithoutSearchServiceFactory::class, SolrAuthMarc::class => SolrDefaultWithoutSearchServiceFactory::class, SolrDefault::class => SolrDefaultFactory::class, diff --git a/module/VuFind/src/VuFind/RecordDriver/SolrArchivesSpace.php b/module/VuFind/src/VuFind/RecordDriver/SolrArchivesSpace.php new file mode 100644 index 0000000000000000000000000000000000000000..fa019d4fe376b0e5e14cc864cbf6d0687961c576 --- /dev/null +++ b/module/VuFind/src/VuFind/RecordDriver/SolrArchivesSpace.php @@ -0,0 +1,65 @@ +<?php +/** + * Model for ArchivesSpace records in Solr. + * + * PHP version 7 + * + * Copyright (C) Villanova University 2020. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * @category VuFind + * @package RecordDrivers + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development:plugins:record_drivers Wiki + */ +namespace VuFind\RecordDriver; + +/** + * Model for ArchivesSpace records in Solr. + * + * @category VuFind + * @package RecordDrivers + * @author Demian Katz <demian.katz@villanova.edu> + * @license http://opensource.org/licenses/gpl-2.0.php GNU General Public License + * @link https://vufind.org/wiki/development:plugins:record_drivers Wiki + */ +class SolrArchivesSpace extends SolrDefault +{ + /** + * Return an array of associative URL arrays with one or more of the following + * keys: + * + * <li> + * <ul>desc: URL description text to display (optional)</ul> + * <ul>url: fully-formed URL (required if 'route' is absent)</ul> + * <ul>route: VuFind route to build URL with (required if 'url' is absent)</ul> + * <ul>routeParams: Parameters for route (optional)</ul> + * <ul>queryString: Query params to append after building route (optional)</ul> + * </li> + * + * @return array + */ + public function getURLs() + { + $urls = parent::getURLs(); + // We're going to assume that if there is a link, it's always pointing at + // the finding aid in the ArchivesSpace front end. + if (isset($urls[0])) { + $urls[0]['desc'] = $this->translate('Finding Aid'); + } + return $urls; + } +} diff --git a/module/VuFind/src/VuFind/XSLT/Import/VuFind.php b/module/VuFind/src/VuFind/XSLT/Import/VuFind.php index 626c459b7deed25f72d88978932b51050527add9..704e2e7d3f84deadbc9a6d7f1495ed0cabcf6f04 100644 --- a/module/VuFind/src/VuFind/XSLT/Import/VuFind.php +++ b/module/VuFind/src/VuFind/XSLT/Import/VuFind.php @@ -450,4 +450,72 @@ class VuFind } return $dom; } + + /** + * Proxy the implode PHP function for use in XSL transformation. + * + * @param string $glue Glue string + * @param array $pieces DOM elements to join together. + * + * @return string + */ + public static function implode($glue, $pieces) + { + $mapper = function ($dom) { + return trim($dom->textContent); + }; + return implode($glue, array_map($mapper, $pieces)); + } + + /** + * Try to find the best single year or date range in a set of DOM elements. + * Best is defined as the first value to consist of only YYYY or YYYY-ZZZZ, + * with no other text. If no "best" match is found, the first value is used. + * + * @param array $input DOM elements to search. + * + * @return string + */ + public static function extractBestDateOrRange($input) + { + foreach ($input as $current) { + if (preg_match('/^\d{4}(-\d{4})?$/', $current->textContent)) { + return $current->textContent; + } + } + return reset($input)->textContent; + } + + /** + * Try to find a four-digit year in a set of DOM elements. + * + * @param array $input DOM elements to search. + * + * @return string + */ + public static function extractEarliestYear($input) + { + $goodMatch = $adequateMatch = ''; + foreach ($input as $current) { + // Best match -- a four-digit string starting with 1 or 2 + preg_match_all('/[12]\d{3}/', $current->textContent, $matches); + foreach ($matches[0] as $match) { + if (empty($goodMatch) || $goodMatch > $match) { + $goodMatch = $match; + } + } + // Next best match -- any string of four or fewer digits. + for ($length = 4; $length > 0; $length--) { + preg_match_all( + '/\d{' . $length . '}/', $current->textContent, $matches + ); + foreach ($matches[0] as $match) { + if (strlen($match) > strlen($adequateMatch)) { + $adequateMatch = $match; + } + } + } + } + return empty($goodMatch) ? $adequateMatch : $goodMatch; + } } diff --git a/module/VuFind/tests/unit-tests/src/VuFindTest/XSLT/Import/VuFindTest.php b/module/VuFind/tests/unit-tests/src/VuFindTest/XSLT/Import/VuFindTest.php index c2f3b3a6a51c1fa41ff288806e907c9fbb843244..19cedf0dc070ea779bee9f97093d550ac04861db 100644 --- a/module/VuFind/tests/unit-tests/src/VuFindTest/XSLT/Import/VuFindTest.php +++ b/module/VuFind/tests/unit-tests/src/VuFindTest/XSLT/Import/VuFindTest.php @@ -158,4 +158,67 @@ class VuFindTest extends \VuFindTest\Unit\DbTestCase $expected, simplexml_import_dom(VuFind::explode(',', 'a,b'))->asXml() ); } + + /** + * Test the implode helper. + * + * @return void + */ + public function testImplode() + { + $domify = function ($input) { + return new \DOMElement('foo', $input); + }; + $this->assertEquals( + 'a.b.c', + VuFind::implode('.', array_map($domify, ['a', 'b', 'c'])) + ); + } + + /** + * Test the extractBestDateOrRange helper. + * + * @return void + */ + public function testExtractBestDateOrRange() + { + $data = [ + '1990' => ['foo', 'bar', '1990'], + '1990-1991' => ['foo', '1990-1991', '1992'], + 'foo' => ['foo', 'bar', 'baz'], + ]; + $domify = function ($input) { + return new \DOMElement('foo', $input); + }; + foreach ($data as $output => $input) { + $this->assertEquals( + $output, VuFind::extractBestDateOrRange( + array_map($domify, $input) + ) + ); + } + } + + /** + * Test the extractEarliestYear helper. + * + * @return void + */ + public function testExtractEarliestYear() + { + $data = [ + 'October 9, 1990 (approx)' => '1990', + 'the year 0' => '0', + 'published 1927-1929' => '1927', + '2005-1999' => '1999', + 'there is no year to be found here' => '', + ]; + foreach ($data as $input => $output) { + $this->assertEquals( + $output, VuFind::extractEarliestYear( + [new \DOMElement('foo', $input)] + ) + ); + } + } }