diff --git a/import/index_scripts/getdate.bsh b/import/index_scripts/getdate.bsh index 55102a2d755f0edc015aeaa6dca4d2117dd29dac..5d615c15f744057dc895d788b61d3664fca3ba20 100644 --- a/import/index_scripts/getdate.bsh +++ b/import/index_scripts/getdate.bsh @@ -126,3 +126,70 @@ public String getDate(Record record) return (null); return cleanDate(date); } + +/** + * Get all available dates from the record. + * + * @param Record record + * @return Set dates + */ +public Set getDates(Record record) { + Set dates = new LinkedHashSet(); + + // First check old-style 260c date: + String oldStyle = getDate(record); + if (oldStyle != null && oldStyle.length() > 0) { + dates.add(oldStyle); + } + + // Now track down relevant RDA-style 264c dates; we only care about + // copyright and publication dates (and ignore copyright dates if + // publication dates are present). + Set pubDates = new LinkedHashSet(); + Set copyDates = new LinkedHashSet(); + List list264 = record.getVariableFields("264"); + for (VariableField vf : list264) + { + DataField df = (DataField) vf; + Subfield currentDate = df.getSubfield('c'); + if (currentDate != null) { + String currentDateStr = cleanDate(currentDate.getData()); + char ind2 = df.getIndicator2(); + switch (ind2) + { + case '1': + pubDates.add(currentDateStr); + break; + case '4': + copyDates.add(currentDateStr); + break; + } + } + } + if (pubDates.size() > 0) { + dates.addAll(pubDates); + } else if (copyDates.size() > 0) { + dates.addAll(copyDates); + } + + return dates; +} + +/** + * Get the earliest publication date from the record. + * + * @param Record record + * @return String earliest date + */ +public String getFirstDate(Record record) { + String result = null; + Set dates = getDates(record); + Iterator datesIter = dates.iterator(); + while (datesIter.hasNext()) { + String current = datesIter.next(); + if (result == null || Integer.parseInt(current) < Integer.parseInt(result)) { + result = current; + } + } + return result; +} \ No newline at end of file diff --git a/import/index_scripts/getpublishers.bsh b/import/index_scripts/getpublishers.bsh new file mode 100644 index 0000000000000000000000000000000000000000..aa5a6e5034a41ebe8d95b2409cb74a2ac4ea0237 --- /dev/null +++ b/import/index_scripts/getpublishers.bsh @@ -0,0 +1,60 @@ +/** + * Custom date script. + * + * This can be used to override built-in SolrMarc custom functions. If you change + * this script, you will need to activate it in import/marc_local.properties before + * it will be applied during indexing. + */ +import org.marc4j.marc.*; + +/** + * Get all available publishers from the record. + * + * @param Record record + * @return Set publishers + */ +public Set getPublishers(Record record) { + Set publishers = new LinkedHashSet(); + + // First check old-style 260b name: + List list260 = record.getVariableFields("260"); + for (VariableField vf : list260) + { + DataField df = (DataField) vf; + Subfield current = df.getSubfield('b'); + if (current != null) { + publishers.add(current.getData()); + } + } + + // Now track down relevant RDA-style 264b names; we only care about + // copyright and publication names (and ignore copyright names if + // publication names are present). + Set pubNames = new LinkedHashSet(); + Set copyNames = new LinkedHashSet(); + List list264 = record.getVariableFields("264"); + for (VariableField vf : list264) + { + DataField df = (DataField) vf; + Subfield currentName = df.getSubfield('b'); + if (currentName != null) { + char ind2 = df.getIndicator2(); + switch (ind2) + { + case '1': + pubNames.add(currentName.getData()); + break; + case '4': + copyNames.add(currentName.getData()); + break; + } + } + } + if (pubNames.size() > 0) { + publishers.addAll(pubNames); + } else if (copyNames.size() > 0) { + publishers.addAll(copyNames); + } + + return publishers; +} \ No newline at end of file diff --git a/import/marc.properties b/import/marc.properties index ede9390da16e784e24d4c2088a4a8b97b84be9c9..570b21c708caba21ccac315566c7f0326ec99921 100644 --- a/import/marc.properties +++ b/import/marc.properties @@ -36,9 +36,9 @@ title_sort = custom, getSortableTitle series = 440ap:800abcdfpqt:830ap series2 = 490a -publisher = 260b -publishDate = custom, getDate -publishDateSort = custom, getDate +publisher = script(getpublishers.bsh), getPublishers +publishDate = script(getdate.bsh), getDates +publishDateSort = script(getdate.bsh), getFirstDate physical = 300abcefg:530abcd dateSpan = 362a @@ -83,4 +83,4 @@ oclc_num = 035a, (pattern_map.oclc_num) pattern_map.oclc_num.pattern_0 = \\(OCoLC\\)[^0-9]*[0]*([0-9]+)=>$1 pattern_map.oclc_num.pattern_1 = ocm[0]*([0-9]+)[ ]*[0-9]*=>$1 pattern_map.oclc_num.pattern_2 = ocn[0]*([0-9]+).*=>$1 -pattern_map.oclc_num.pattern_3 = on[0]*([0-9]+).*=>$1 +pattern_map.oclc_num.pattern_3 = on[0]*([0-9]+).*=>$1 \ No newline at end of file