From f3f239e4106df246ba6ea0601c0c93d49c70e7a7 Mon Sep 17 00:00:00 2001
From: Demian Katz <demian.katz@villanova.edu>
Date: Fri, 30 Oct 2020 15:38:54 -0400
Subject: [PATCH] Improve MARC table of contents processing. (#1770)

---
 .../VuFind/RecordDriver/MarcAdvancedTrait.php | 23 ++++----
 module/VuFind/tests/fixtures/marc/toc1.xml    | 42 +++++++++++++++
 module/VuFind/tests/fixtures/marc/toc2.xml    | 13 +++++
 .../VuFindTest/RecordDriver/SolrMarcTest.php  | 52 +++++++++++++++++++
 4 files changed, 118 insertions(+), 12 deletions(-)
 create mode 100644 module/VuFind/tests/fixtures/marc/toc1.xml
 create mode 100644 module/VuFind/tests/fixtures/marc/toc2.xml

diff --git a/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php b/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php
index c3e2aa700fa..e83f8aa8adb 100644
--- a/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php
+++ b/module/VuFind/src/VuFind/RecordDriver/MarcAdvancedTrait.php
@@ -480,21 +480,20 @@ trait MarcAdvancedTrait
     public function getTOC()
     {
         // Return empty array if we have no table of contents:
-        $fields = $this->getMarcRecord()->getFields('505');
-        if (!$fields) {
-            return [];
-        }
-
-        // If we got this far, we have a table -- collect it as a string:
         $toc = [];
-        foreach ($fields as $field) {
-            $subfields = $field->getSubfields();
-            foreach ($subfields as $subfield) {
-                // Break the string into appropriate chunks, filtering empty strings,
-                // and merge them into return array:
+        if ($fields = $this->getMarcRecord()->getFields('505')) {
+            foreach ($fields as $field) {
+                // Implode all the subfields into a single string, then explode
+                // on the -- separators (filtering out empty chunks). Due to
+                // inconsistent application of subfield codes, this is the most
+                // reliable way to split up a table of contents.
+                $str = '';
+                foreach ($field->getSubfields() as $subfield) {
+                    $str .= trim($subfield->getData()) . ' ';
+                }
                 $toc = array_merge(
                     $toc,
-                    array_filter(explode('--', $subfield->getData()), 'trim')
+                    array_filter(array_map('trim', preg_split('/[.\s]--/', $str)))
                 );
             }
         }
diff --git a/module/VuFind/tests/fixtures/marc/toc1.xml b/module/VuFind/tests/fixtures/marc/toc1.xml
new file mode 100644
index 00000000000..643c318e926
--- /dev/null
+++ b/module/VuFind/tests/fixtures/marc/toc1.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection xmlns='http://www.loc.gov/MARC21/slim'>
+
+<record
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
+    xmlns="http://www.loc.gov/MARC21/slim">
+  <datafield tag="505" ind1="0" ind2="0">
+    <subfield code="t">About the Association of Professors of Missions /</subfield>
+    <subfield code="r">Robert Danielson --</subfield>
+    <subfield code="g">Foreword /</subfield>
+    <subfield code="r">Angel Santiago-Vendrell --</subfield>
+    <subfield code="g">Conference theme -- Plenary Papers --</subfield>
+    <subfield code="t">Teaching missiology in and for world Christianity content and method /</subfield>
+    <subfield code="r">Peter C. Phan --</subfield>
+    <subfield code="t">The bodies we teach by: (en) gendering mission for global Christianities /</subfield>
+    <subfield code="r">Mai-Ahn Le --</subfield>
+    <subfield code="t">Teaching Christian mission in an age of world Christianity: a reflection on the centenary of the 1916 Panama Congress /</subfield>
+    <subfield code="r">Philip Wingeier-Rayo --</subfield>
+    <subfield code="g">Conference Papers --</subfield>
+    <subfield code="t">Theological metaphors of teaching mission in an age of world Christianity in the North American context /</subfield>
+    <subfield code="r">David Thang Moe --</subfield>
+    <subfield code="t">Mission shifts from Pope Benedict XVI to Pope Francis /</subfield>
+    <subfield code="r">William P. Gregory --</subfield>
+    <subfield code="t">The elephant in the room: towards a paradigm shift in missiological education /</subfield>
+    <subfield code="r">Sarita D. Gallagher --</subfield>
+    <subfield code="t">Historic models of teaching Christian mission: case studies informing an age of world Christianity /</subfield>
+    <subfield code="r">Robert L. Gallagher --</subfield>
+    <subfield code="t">How the West was won: world Christianity as historic reality /</subfield>
+    <subfield code="r">Matt Friedman --</subfield>
+    <subfield code="t">The world's Christians: strategies for teaching international graduate students in Kenya's Christian universities /</subfield>
+    <subfield code="r">Janice Horsager Rasmussen --</subfield>
+    <subfield code="t">Gendered mission: educational work or itinerating preaching? The mission practice of the Presbyterian Church USA in Barranquilla, Colombia, 1880-1920 /</subfield>
+    <subfield code="r">Angel Santiago-Vendrell --</subfield>
+    <subfield code="t">Mary McLeod Bethune: Christ did not designate any particular color to go /</subfield>
+    <subfield code="r">Mary Cloutier --</subfield>
+    <subfield code="t">Teaching mission in an age of world Christianity: history, theology, anthropology, and gender in the classroom /</subfield>
+    <subfield code="r">Angel Santiago-Vendrell --</subfield>
+    <subfield code="g">Conference Proceedings -- First Fruits report for the APM -- Minutes of 2016 meeting -- Secretary's treasury report -- Conference program.</subfield>
+  </datafield>
+</record>
+</collection>
diff --git a/module/VuFind/tests/fixtures/marc/toc2.xml b/module/VuFind/tests/fixtures/marc/toc2.xml
new file mode 100644
index 00000000000..61f5cb21247
--- /dev/null
+++ b/module/VuFind/tests/fixtures/marc/toc2.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collection xmlns='http://www.loc.gov/MARC21/slim'>
+
+<record
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
+    xmlns="http://www.loc.gov/MARC21/slim">
+  <datafield tag="505" ind1="0" ind2="0">
+    <subfield code="a">Don't split the unspaced--separator. -- Do split the spaced one. --</subfield>
+    <subfield code="a">Respect pre-AACR2-style separation.--Even though it's old.</subfield>
+  </datafield>
+</record>
+</collection>
diff --git a/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php b/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php
index dfb65debf61..a484c6af0d9 100644
--- a/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php
+++ b/module/VuFind/tests/unit-tests/src/VuFindTest/RecordDriver/SolrMarcTest.php
@@ -122,6 +122,58 @@ class SolrMarcTest extends \VuFindTest\Unit\TestCase
         );
     }
 
+    /**
+     * Test table of contents support.
+     *
+     * @return void
+     */
+    public function testTOC()
+    {
+        $marc = $this->getFixture('marc/toc1.xml');
+        $config = new \Laminas\Config\Config([]);
+        $record = new \VuFind\RecordDriver\SolrMarc($config);
+        $record->setRawData(['fullrecord' => $marc]);
+        $this->assertEquals(
+            [
+                'About the Association of Professors of Missions / Robert Danielson',
+                'Foreword / Angel Santiago-Vendrell',
+                'Conference theme',
+                'Plenary Papers',
+                'Teaching missiology in and for world Christianity content and method / Peter C. Phan',
+                'The bodies we teach by: (en) gendering mission for global Christianities / Mai-Ahn Le',
+                'Teaching Christian mission in an age of world Christianity: a reflection on the centenary of the 1916 Panama Congress / Philip Wingeier-Rayo',
+                'Conference Papers',
+                'Theological metaphors of teaching mission in an age of world Christianity in the North American context / David Thang Moe',
+                'Mission shifts from Pope Benedict XVI to Pope Francis / William P. Gregory',
+                'The elephant in the room: towards a paradigm shift in missiological education / Sarita D. Gallagher',
+                'Historic models of teaching Christian mission: case studies informing an age of world Christianity / Robert L. Gallagher',
+                'How the West was won: world Christianity as historic reality / Matt Friedman',
+                'The world\'s Christians: strategies for teaching international graduate students in Kenya\'s Christian universities / Janice Horsager Rasmussen',
+                'Gendered mission: educational work or itinerating preaching? The mission practice of the Presbyterian Church USA in Barranquilla, Colombia, 1880-1920 / Angel Santiago-Vendrell',
+                'Mary McLeod Bethune: Christ did not designate any particular color to go / Mary Cloutier',
+                'Teaching mission in an age of world Christianity: history, theology, anthropology, and gender in the classroom / Angel Santiago-Vendrell',
+                'Conference Proceedings',
+                'First Fruits report for the APM',
+                'Minutes of 2016 meeting',
+                'Secretary\'s treasury report',
+                'Conference program.',
+            ],
+            $record->getTOC()
+        );
+        $marc2 = $this->getFixture('marc/toc2.xml');
+        $record2 = new \VuFind\RecordDriver\SolrMarc($config);
+        $record2->setRawData(['fullrecord' => $marc2]);
+        $this->assertEquals(
+            [
+                'Don\'t split the unspaced--separator.',
+                'Do split the spaced one.',
+                'Respect pre-AACR2-style separation',
+                'Even though it\'s old.',
+            ],
+            $record2->getTOC()
+        );
+    }
+
     /**
      * Test getFormattedMarcDetails() method.
      *
-- 
GitLab