From 95d23a7bf0facdbc424617852ba995d46a2cd4b5 Mon Sep 17 00:00:00 2001
From: Demian Katz <demian.katz@villanova.edu>
Date: Fri, 24 May 2013 13:21:39 -0400
Subject: [PATCH] Progress on VUFIND-749 (index RDA 264 field).

---
 import/index_scripts/getdate.bsh       | 67 ++++++++++++++++++++++++++
 import/index_scripts/getpublishers.bsh | 60 +++++++++++++++++++++++
 import/marc.properties                 |  8 +--
 3 files changed, 131 insertions(+), 4 deletions(-)
 create mode 100644 import/index_scripts/getpublishers.bsh

diff --git a/import/index_scripts/getdate.bsh b/import/index_scripts/getdate.bsh
index 55102a2d755..5d615c15f74 100644
--- a/import/index_scripts/getdate.bsh
+++ b/import/index_scripts/getdate.bsh
@@ -126,3 +126,70 @@ public String getDate(Record record)
         return (null);
     return cleanDate(date);
 }
+
+/**
+ * Get all available dates from the record.
+ *
+ * @param  Record          record
+ * @return Set             dates
+ */
+public Set getDates(Record record) {
+    Set dates = new LinkedHashSet();
+
+    // First check old-style 260c date:
+    String oldStyle = getDate(record);
+    if (oldStyle != null && oldStyle.length() > 0) {
+        dates.add(oldStyle);
+    }
+
+    // Now track down relevant RDA-style 264c dates; we only care about
+    // copyright and publication dates (and ignore copyright dates if
+    // publication dates are present).
+    Set pubDates = new LinkedHashSet();
+    Set copyDates = new LinkedHashSet();
+    List list264 = record.getVariableFields("264");
+    for (VariableField vf : list264)
+    {
+        DataField df = (DataField) vf;
+        Subfield currentDate = df.getSubfield('c');
+        if (currentDate != null) {
+            String currentDateStr = cleanDate(currentDate.getData());
+            char ind2 = df.getIndicator2();
+            switch (ind2)
+            {
+                case '1':
+                    pubDates.add(currentDateStr);
+                    break;
+                case '4':
+                    copyDates.add(currentDateStr);
+                    break;
+            }
+        }
+    }
+    if (pubDates.size() > 0) {
+        dates.addAll(pubDates);
+    } else if (copyDates.size() > 0) {
+        dates.addAll(copyDates);
+    }
+
+    return dates;
+}
+
+/**
+ * Get the earliest publication date from the record.
+ *
+ * @param  Record          record
+ * @return String          earliest date
+ */
+public String getFirstDate(Record record) {
+    String result = null;
+    Set dates = getDates(record);
+    Iterator datesIter = dates.iterator();
+    while (datesIter.hasNext()) {
+        String current = datesIter.next();
+        if (result == null || Integer.parseInt(current) < Integer.parseInt(result)) {
+            result = current;
+        }
+    }
+    return result;
+}
\ No newline at end of file
diff --git a/import/index_scripts/getpublishers.bsh b/import/index_scripts/getpublishers.bsh
new file mode 100644
index 00000000000..aa5a6e5034a
--- /dev/null
+++ b/import/index_scripts/getpublishers.bsh
@@ -0,0 +1,60 @@
+/**
+ * Custom date script.
+ *
+ * This can be used to override built-in SolrMarc custom functions.  If you change
+ * this script, you will need to activate it in import/marc_local.properties before
+ * it will be applied during indexing.
+ */
+import org.marc4j.marc.*;
+
+/**
+ * Get all available publishers from the record.
+ *
+ * @param  Record          record
+ * @return Set             publishers
+ */
+public Set getPublishers(Record record) {
+    Set publishers = new LinkedHashSet();
+
+    // First check old-style 260b name:
+    List list260 = record.getVariableFields("260");
+    for (VariableField vf : list260)
+    {
+        DataField df = (DataField) vf;
+        Subfield current = df.getSubfield('b');
+        if (current != null) {
+            publishers.add(current.getData());
+        }
+    }
+
+    // Now track down relevant RDA-style 264b names; we only care about
+    // copyright and publication names (and ignore copyright names if
+    // publication names are present).
+    Set pubNames = new LinkedHashSet();
+    Set copyNames = new LinkedHashSet();
+    List list264 = record.getVariableFields("264");
+    for (VariableField vf : list264)
+    {
+        DataField df = (DataField) vf;
+        Subfield currentName = df.getSubfield('b');
+        if (currentName != null) {
+            char ind2 = df.getIndicator2();
+            switch (ind2)
+            {
+                case '1':
+                    pubNames.add(currentName.getData());
+                    break;
+                case '4':
+                    copyNames.add(currentName.getData());
+                    break;
+            }
+        }
+    }
+    if (pubNames.size() > 0) {
+        publishers.addAll(pubNames);
+    } else if (copyNames.size() > 0) {
+        publishers.addAll(copyNames);
+    }
+
+    return publishers;
+}
\ No newline at end of file
diff --git a/import/marc.properties b/import/marc.properties
index ede9390da16..570b21c708c 100644
--- a/import/marc.properties
+++ b/import/marc.properties
@@ -36,9 +36,9 @@ title_sort = custom, getSortableTitle
 series = 440ap:800abcdfpqt:830ap
 series2 = 490a
 
-publisher = 260b
-publishDate = custom, getDate
-publishDateSort = custom, getDate
+publisher = script(getpublishers.bsh), getPublishers
+publishDate = script(getdate.bsh), getDates
+publishDateSort = script(getdate.bsh), getFirstDate
 
 physical = 300abcefg:530abcd
 dateSpan = 362a
@@ -83,4 +83,4 @@ oclc_num = 035a, (pattern_map.oclc_num)
 pattern_map.oclc_num.pattern_0 = \\(OCoLC\\)[^0-9]*[0]*([0-9]+)=>$1
 pattern_map.oclc_num.pattern_1 = ocm[0]*([0-9]+)[ ]*[0-9]*=>$1
 pattern_map.oclc_num.pattern_2 = ocn[0]*([0-9]+).*=>$1
-pattern_map.oclc_num.pattern_3 = on[0]*([0-9]+).*=>$1
+pattern_map.oclc_num.pattern_3 = on[0]*([0-9]+).*=>$1
\ No newline at end of file
-- 
GitLab