From 6785edfe87225444ea02639f73d0330fe0721c8f Mon Sep 17 00:00:00 2001
From: Demian Katz <demian.katz@villanova.edu>
Date: Thu, 5 Nov 2020 11:36:59 -0500
Subject: [PATCH] Index DOIs from 856 fields. (#1783)

---
 .../src/org/vufind/index/DoiTools.java        | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 import/index_java/src/org/vufind/index/DoiTools.java

diff --git a/import/index_java/src/org/vufind/index/DoiTools.java b/import/index_java/src/org/vufind/index/DoiTools.java
new file mode 100644
index 00000000000..d558ece623f
--- /dev/null
+++ b/import/index_java/src/org/vufind/index/DoiTools.java
@@ -0,0 +1,54 @@
+package org.vufind.index;
+/**
+ * DOI indexing routines.
+ *
+ * Copyright (C) Villanova University 2020.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+import java.util.LinkedHashSet;
+import java.util.Set;
+import org.marc4j.marc.Record;
+import org.solrmarc.index.SolrIndexer;
+
+/**
+ * Call number indexing routines.
+ */
+public class DoiTools
+{
+    /**
+     * Extract DOIs from URLs with the specified prefix
+     * @param record MARC record
+     * @param fieldSpec taglist for URL fields
+     * @param baseUrl Base URL that will be followed by a DOI
+     * @return Set of DOIs
+     */
+    public Set<String> getDoiFromUrl(final Record record, String fieldSpec, String baseUrl) {
+        // Initialize our return value:
+        Set<String> result = new LinkedHashSet<String>();
+
+        // Loop through the specified MARC fields:
+        Set<String> input = SolrIndexer.instance().getFieldList(record, fieldSpec);
+        for (String current: input) {
+            // If the base URL is found in the string, crop it off for our DOI!
+            if (current.startsWith(baseUrl)) {
+                result.add(current.substring(baseUrl.length()));
+            }
+        }
+
+        // If we found no matches, return null; otherwise, return our results:
+        return result.isEmpty() ? null : result;
+    }
+}
-- 
GitLab