From 3fada5ce39206178031ec55aacb5d2bbc8707ebc Mon Sep 17 00:00:00 2001
From: Ere Maijala <ere.maijala@helsinki.fi>
Date: Fri, 6 Jan 2017 22:29:11 +0200
Subject: [PATCH] Added support for extended subject heading information (#879)

- Information regarding subject type and source can now be returned and exposed through the search API.
---
 config/vufind/SearchApiRecordFields.yaml      | 13 +++-
 module/VuFind/src/VuFind/RecordDriver/EIT.php | 12 +++-
 .../VuFind/src/VuFind/RecordDriver/Primo.php  | 15 +++-
 .../src/VuFind/RecordDriver/SolrDefault.php   | 14 +++-
 .../src/VuFind/RecordDriver/SolrMarc.php      | 71 ++++++++++++++++---
 .../VuFind/src/VuFind/RecordDriver/Summon.php | 53 ++++++++------
 .../VuFindTest/RecordDriver/SolrMarcTest.php  | 27 +++++++
 .../VuFindApi/Formatter/RecordFormatter.php   | 15 ++++
 .../Formatter/RecordFormatterTest.php         | 14 ++--
 themes/root/templates/searchapi/swagger.phtml | 61 ++++++++++------
 10 files changed, 232 insertions(+), 63 deletions(-)

diff --git a/config/vufind/SearchApiRecordFields.yaml b/config/vufind/SearchApiRecordFields.yaml
index 5d239b76201..4f67a719b64 100644
--- a/config/vufind/SearchApiRecordFields.yaml
+++ b/config/vufind/SearchApiRecordFields.yaml
@@ -321,10 +321,19 @@ shortTitle:
 subjects:
   vufind.method: getAllSubjectHeadings
   vufind.default: true
-  description: Subject headings
+  description: >
+    Subject headings as an array from the least specific to the most specific
   type: array
   items:
-    type: string
+    type: array
+    items:
+      type: string
+subjectsExtended:
+  vufind.method: "Formatter::getExtendedSubjectHeadings"
+  description: Subject headings with type and source information
+  type: array
+  items:
+    $ref: '#/definitions/Subject'
 subTitle:
   vufind.method: getSubTitle
   description: Subtitle
diff --git a/module/VuFind/src/VuFind/RecordDriver/EIT.php b/module/VuFind/src/VuFind/RecordDriver/EIT.php
index 009d4d53f3b..64b471f8692 100644
--- a/module/VuFind/src/VuFind/RecordDriver/EIT.php
+++ b/module/VuFind/src/VuFind/RecordDriver/EIT.php
@@ -83,9 +83,15 @@ class EIT extends SolrDefault
      * returned as an array of chunks, increasing from least specific to most
      * specific.
      *
+     * @param bool $extended Whether to return a keyed array with the following
+     * keys:
+     * - heading: the actual subject heading chunks
+     * - type: heading type
+     * - source: source vocabulary
+     *
      * @return array
      */
-    public function getAllSubjectHeadings()
+    public function getAllSubjectHeadings($extended = false)
     {
         $su = isset($this->controlInfo['artinfo']['su'])
             ? $this->controlInfo['artinfo']['su'] : [];
@@ -94,7 +100,9 @@ class EIT extends SolrDefault
         // format, so we'll just send each value as a single chunk.
         $retval = [];
         foreach ($su as $s) {
-            $retval[] = [$s];
+            $retval[] = $extended
+                ? ['heading' => [$s], 'type' => '', 'source' => '']
+                : [$s];
         }
         return $retval;
     }
diff --git a/module/VuFind/src/VuFind/RecordDriver/Primo.php b/module/VuFind/src/VuFind/RecordDriver/Primo.php
index 3641155241c..fee58726efc 100644
--- a/module/VuFind/src/VuFind/RecordDriver/Primo.php
+++ b/module/VuFind/src/VuFind/RecordDriver/Primo.php
@@ -92,14 +92,23 @@ class Primo extends SolrDefault
      * Get an array of all subject headings associated with the record
      * (may be empty).
      *
+     * @param bool $extended Whether to return a keyed array with the following
+     * keys:
+     * - heading: the actual subject heading chunks
+     * - type: heading type
+     * - source: source vocabulary
+     *
      * @return array
      */
-    public function getAllSubjectHeadings()
+    public function getAllSubjectHeadings($extended = false)
     {
         $base = isset($this->fields['subjects'])
             ? $this->fields['subjects'] : [];
-        $callback = function ($str) {
-            return array_map('trim', explode(' -- ', $str));
+        $callback = function ($str) use ($extended) {
+            $s = array_map('trim', explode(' -- ', $str));
+            return $extended
+                ? ['heading' => $s, 'type' => '', 'source' => '']
+                : $s;
         };
         return array_map($callback, $base);
     }
diff --git a/module/VuFind/src/VuFind/RecordDriver/SolrDefault.php b/module/VuFind/src/VuFind/RecordDriver/SolrDefault.php
index 85ceedb4e01..e9ffad89c05 100644
--- a/module/VuFind/src/VuFind/RecordDriver/SolrDefault.php
+++ b/module/VuFind/src/VuFind/RecordDriver/SolrDefault.php
@@ -199,9 +199,15 @@ class SolrDefault extends AbstractBase
      * returned as an array of chunks, increasing from least specific to most
      * specific.
      *
+     * @param bool $extended Whether to return a keyed array with the following
+     * keys:
+     * - heading: the actual subject heading chunks
+     * - type: heading type
+     * - source: source vocabulary
+     *
      * @return array
      */
-    public function getAllSubjectHeadings()
+    public function getAllSubjectHeadings($extended = false)
     {
         $headings = [];
         foreach (['topic', 'geographic', 'genre', 'era'] as $field) {
@@ -213,8 +219,10 @@ class SolrDefault extends AbstractBase
         // The Solr index doesn't currently store subject headings in a broken-down
         // format, so we'll just send each value as a single chunk.  Other record
         // drivers (i.e. MARC) can offer this data in a more granular format.
-        $callback = function ($i) {
-            return [$i];
+        $callback = function ($i) use ($extended) {
+            return $extended
+                ? ['heading' => [$i], 'type' => '', 'source' => '']
+                : [$i];
         };
         return array_map($callback, array_unique($headings));
     }
diff --git a/module/VuFind/src/VuFind/RecordDriver/SolrMarc.php b/module/VuFind/src/VuFind/RecordDriver/SolrMarc.php
index d16f7440294..1e9d6995179 100644
--- a/module/VuFind/src/VuFind/RecordDriver/SolrMarc.php
+++ b/module/VuFind/src/VuFind/RecordDriver/SolrMarc.php
@@ -53,6 +53,42 @@ class SolrMarc extends SolrDefault
      */
     protected $lazyMarcRecord = null;
 
+    /**
+     * Fields that may contain subject headings, and their descriptions
+     *
+     * @var array
+     */
+    protected $subjectFields = [
+        '600' => 'personal name',
+        '610' => 'corporate name',
+        '611' => 'meeting name',
+        '630' => 'uniform title',
+        '648' => 'chronological',
+        '650' => 'topic',
+        '651' => 'geographic',
+        '653' => '',
+        '655' => 'genre/form',
+        '656' => 'occupation'
+    ];
+
+    /**
+     * Mappings from subject source indicators (2nd indicator of subject fields in
+     * MARC 21) to the their codes.
+     *
+     * @var  array
+     * @link https://www.loc.gov/marc/bibliographic/bd6xx.html     Subject field docs
+     * @link https://www.loc.gov/standards/sourcelist/subject.html Code list
+     */
+    protected $subjectSources = [
+        '0' => 'lcsh',
+        '1' => 'lcshac',
+        '2' => 'mesh',
+        '3' => 'nal',
+        '4' => 'unknown',
+        '5' => 'cash',
+        '6' => 'rvm'
+    ];
+
     /**
      * Get access restriction notes for the record.
      *
@@ -68,20 +104,21 @@ class SolrMarc extends SolrDefault
      * returned as an array of chunks, increasing from least specific to most
      * specific.
      *
+     * @param bool $extended Whether to return a keyed array with the following
+     * keys:
+     * - heading: the actual subject heading chunks
+     * - type: heading type
+     * - source: source vocabulary
+     *
      * @return array
      */
-    public function getAllSubjectHeadings()
+    public function getAllSubjectHeadings($extended = false)
     {
-        // These are the fields that may contain subject headings:
-        $fields = [
-            '600', '610', '611', '630', '648', '650', '651', '653', '655', '656'
-        ];
-
         // This is all the collected data:
         $retval = [];
 
         // Try each MARC field one at a time:
-        foreach ($fields as $field) {
+        foreach ($this->subjectFields as $field => $fieldType) {
             // Do we have any results for the current field?  If not, try the next.
             $results = $this->getMarcRecord()->getFields($field);
             if (!$results) {
@@ -105,7 +142,25 @@ class SolrMarc extends SolrDefault
                     }
                     // If we found at least one chunk, add a heading to our result:
                     if (!empty($current)) {
-                        $retval[] = $current;
+                        if ($extended) {
+                            $sourceIndicator = $result->getIndicator(2);
+                            $source = '';
+                            if (isset($this->subjectSources[$sourceIndicator])) {
+                                $source = $this->subjectSources[$sourceIndicator];
+                            } else {
+                                $source = $result->getSubfield('2');
+                                if ($source) {
+                                    $source = $source->getData();
+                                }
+                            }
+                            $retval[] = [
+                                'heading' => $current,
+                                'type' => $fieldType,
+                                'source' => $source ?: ''
+                            ];
+                        } else {
+                            $retval[] = $current;
+                        }
                     }
                 }
             }
diff --git a/module/VuFind/src/VuFind/RecordDriver/Summon.php b/module/VuFind/src/VuFind/RecordDriver/Summon.php
index e229322991f..79b49428cca 100644
--- a/module/VuFind/src/VuFind/RecordDriver/Summon.php
+++ b/module/VuFind/src/VuFind/RecordDriver/Summon.php
@@ -38,6 +38,18 @@ namespace VuFind\RecordDriver;
  */
 class Summon extends SolrDefault
 {
+    /**
+     * Fields that may contain subject headings, and their descriptions
+     *
+     * @var array
+     */
+    protected $subjectFields = [
+        'SubjectTerms' => 'topic',
+        'TemporalSubjectTerms' => 'chronological',
+        'GeographicLocations' => 'geographic',
+        'Keywords' => 'keyword',
+    ];
+
     /**
      * Date converter
      *
@@ -50,32 +62,31 @@ class Summon extends SolrDefault
      * returned as an array of chunks, increasing from least specific to most
      * specific.
      *
+     * @param bool $extended Whether to return a keyed array with the following
+     * keys:
+     * - heading: the actual subject heading chunks
+     * - type: heading type
+     * - source: source vocabulary
+     *
      * @return array
      */
-    public function getAllSubjectHeadings()
+    public function getAllSubjectHeadings($extended = false)
     {
         $retval = [];
-        $topic = isset($this->fields['SubjectTerms']) ?
-            $this->fields['SubjectTerms'] : [];
-        $temporal = isset($this->fields['TemporalSubjectTerms']) ?
-            $this->fields['TemporalSubjectTerms'] : [];
-        $geo = isset($this->fields['GeographicLocations']) ?
-            $this->fields['GeographicLocations'] : [];
-        $key = isset($this->fields['Keywords']) ?
-            $this->fields['Keywords'] : [];
 
-        $retval = [];
-        foreach ($topic as $t) {
-            $retval[] = [trim($t)];
-        }
-        foreach ($temporal as $t) {
-            $retval[] = [trim($t)];
-        }
-        foreach ($geo as $g) {
-            $retval[] = [trim($g)];
-        }
-        foreach ($key as $k) {
-            $retval[] = [trim($k)];
+        foreach ($this->subjectFields as $field => $fieldType) {
+            if (!isset($this->fields[$field])) {
+                continue;
+            }
+            foreach ($this->fields[$field] as $topic) {
+                $topic = trim($topic);
+                $retval[] = $extended
+                    ? [
+                        'heading' => [$topic],
+                        'type' => $fieldType,
+                        'source' => ''
+                    ] : [$topic];
+            }
         }
         return $retval;
     }
diff --git a/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php b/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php
index 24875095075..458895121e3 100644
--- a/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php
+++ b/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php
@@ -93,6 +93,33 @@ class SolrMarcTest extends \VuFindTest\Unit\TestCase
         $this->assertEquals('2, pt. 1.', $series[0]['number']);
     }
 
+    /**
+     * Test regular and extended subject heading support.
+     *
+     * @return void
+     */
+    public function testSubjectHeadings()
+    {
+        $config = new \Zend\Config\Config([]);
+        $record = new \VuFind\RecordDriver\SolrMarc($config);
+        $fixture = $this->loadRecordFixture('testbug1.json');
+        $record->setRawData($fixture['response']['docs'][0]);
+        $this->assertEquals(
+            [['Matematica', 'Periodici.']],
+            $record->getAllSubjectHeadings()
+        );
+        $this->assertEquals(
+            [
+                [
+                    'heading' => ['Matematica', 'Periodici.'],
+                    'type' => '',
+                    'source' => ''
+                ],
+            ],
+            $record->getAllSubjectHeadings(true)
+        );
+    }
+
     /**
      * Load a fixture file.
      *
diff --git a/module/VuFindApi/src/VuFindApi/Formatter/RecordFormatter.php b/module/VuFindApi/src/VuFindApi/Formatter/RecordFormatter.php
index 53d88466157..063259d0028 100644
--- a/module/VuFindApi/src/VuFindApi/Formatter/RecordFormatter.php
+++ b/module/VuFindApi/src/VuFindApi/Formatter/RecordFormatter.php
@@ -85,6 +85,21 @@ class RecordFormatter extends BaseFormatter
         return $result ? $result : null;
     }
 
+    /**
+     * Get extended subject headings
+     *
+     * @param \VuFind\RecordDriver\SolrDefault $record Record driver
+     *
+     * @return array|null
+     */
+    protected function getExtendedSubjectHeadings($record)
+    {
+        $result = $record->getAllSubjectHeadings(true);
+        // Make sure that the record driver returned the additional information and
+        // return data only if it did
+        return $result && isset($result[0]['heading']) ? $result : null;
+    }
+
     /**
      * Get full record for a record as XML
      *
diff --git a/module/VuFindApi/tests/unit-tests/src/VuFindTest/Formatter/RecordFormatterTest.php b/module/VuFindApi/tests/unit-tests/src/VuFindTest/Formatter/RecordFormatterTest.php
index 5f186bf3adc..61f14068408 100644
--- a/module/VuFindApi/tests/unit-tests/src/VuFindTest/Formatter/RecordFormatterTest.php
+++ b/module/VuFindApi/tests/unit-tests/src/VuFindTest/Formatter/RecordFormatterTest.php
@@ -63,7 +63,10 @@ class RecordFormatterTest extends \VuFindTest\Unit\TestCase
             'fullRecord' => ['vufind.method' => 'Formatter::getFullRecord'],
             'rawData' => ['vufind.method' => 'Formatter::getRawData'],
             'buildings' => ['vufind.method' => 'getBuilding'],
-            'recordPage' => ['vufind.method' => 'Formatter::getRecordPage']
+            'recordPage' => ['vufind.method' => 'Formatter::getRecordPage'],
+            'subjectsExtended' => [
+                'vufind.method' => 'Formatter::getExtendedSubjectHeadings'
+            ],
         ];
     }
 
@@ -113,7 +116,8 @@ class RecordFormatterTest extends \VuFindTest\Unit\TestCase
                 'DedupData' => [['id' => 'bar']],
                 'fullrecord' => 'xyzzy',
                 'spelling' => 's',
-                'Building' => ['foo', new TranslatableString('bar', 'xyzzy')]
+                'Building' => ['foo', new TranslatableString('bar', 'xyzzy')],
+                'AllSubjectHeadings' => [['heading' => 'subject']],
             ]
         );
         return $driver;
@@ -149,7 +153,8 @@ class RecordFormatterTest extends \VuFindTest\Unit\TestCase
                 'fullRecord' => 'xyzzy',
                 'rawData' => $expectedRaw,
                 'buildings' => ['foo', ['value' => 'bar', 'translated' => 'xyzzy']],
-                'recordPage' => 'http://record'
+                'recordPage' => 'http://record',
+                'subjectsExtended' => [['heading' => 'subject']],
             ],
         ];
         $this->assertEquals($expected, $results);
@@ -187,7 +192,8 @@ class RecordFormatterTest extends \VuFindTest\Unit\TestCase
             'fullRecord' => [],
             'rawData' => [],
             'buildings' => [],
-            'recordPage' => []
+            'recordPage' => [],
+            'subjectsExtended' => [],
         ];
         $this->assertEquals($expected, $results);
     }
diff --git a/themes/root/templates/searchapi/swagger.phtml b/themes/root/templates/searchapi/swagger.phtml
index e7b05359ade..abc463709de 100644
--- a/themes/root/templates/searchapi/swagger.phtml
+++ b/themes/root/templates/searchapi/swagger.phtml
@@ -267,26 +267,6 @@
                 }
             }
         },
-        "Record": {
-            "type": "object",
-            "properties": <?=json_encode($this->recordFields) ?>
-        },
-        "RecordLink": {
-            "type": "object",
-            "properties": {
-                "title": {
-                    "description": "Link title",
-                    "type": "string"
-                },
-                "value": {
-                    "description": "Link value",
-                    "type": "string"
-                },
-                "link": {
-                    "$ref": "#/definitions/Link"
-                }
-            }
-        },
         "Link": {
             "type": "object",
             "properties": {
@@ -307,6 +287,26 @@
                 }
             }
         },
+        "Record": {
+            "type": "object",
+            "properties": <?=json_encode($this->recordFields) ?>
+        },
+        "RecordLink": {
+            "type": "object",
+            "properties": {
+                "title": {
+                    "description": "Link title",
+                    "type": "string"
+                },
+                "value": {
+                    "description": "Link value",
+                    "type": "string"
+                },
+                "link": {
+                    "$ref": "#/definitions/Link"
+                }
+            }
+        },
         "SearchResponse": {
             "type": "object",
             "properties": {
@@ -336,6 +336,27 @@
             },
             "required": ["resultCount", "status"]
         },
+        "Subject": {
+            "type": "object",
+            "properties": {
+                "heading": {
+                    "description": "Subject heading parts as an array from the least specific to the most specific",
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "type": {
+                    "description": "Subject type",
+                    "type": "string",
+                    "enum": ["", "personal name", "corporate name", "meeting name", "uniform title", "chronological", "topic", "geographic", "genre\/form", "occupation", "keyword"]
+                },
+                "source": {
+                    "description": "Subject source/thesaurus (e.g. lcsh, mesh)",
+                    "type": "string"
+                }
+            }
+        },
         "Url": {
             "type": "object",
             "properties": {
-- 
GitLab