From 6785edfe87225444ea02639f73d0330fe0721c8f Mon Sep 17 00:00:00 2001 From: Demian Katz <demian.katz@villanova.edu> Date: Thu, 5 Nov 2020 11:36:59 -0500 Subject: [PATCH] Index DOIs from 856 fields. (#1783) --- .../src/org/vufind/index/DoiTools.java | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 import/index_java/src/org/vufind/index/DoiTools.java diff --git a/import/index_java/src/org/vufind/index/DoiTools.java b/import/index_java/src/org/vufind/index/DoiTools.java new file mode 100644 index 00000000000..d558ece623f --- /dev/null +++ b/import/index_java/src/org/vufind/index/DoiTools.java @@ -0,0 +1,54 @@ +package org.vufind.index; +/** + * DOI indexing routines. + * + * Copyright (C) Villanova University 2020. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +import java.util.LinkedHashSet; +import java.util.Set; +import org.marc4j.marc.Record; +import org.solrmarc.index.SolrIndexer; + +/** + * Call number indexing routines. + */ +public class DoiTools +{ + /** + * Extract DOIs from URLs with the specified prefix + * @param record MARC record + * @param fieldSpec taglist for URL fields + * @param baseUrl Base URL that will be followed by a DOI + * @return Set of DOIs + */ + public Set<String> getDoiFromUrl(final Record record, String fieldSpec, String baseUrl) { + // Initialize our return value: + Set<String> result = new LinkedHashSet<String>(); + + // Loop through the specified MARC fields: + Set<String> input = SolrIndexer.instance().getFieldList(record, fieldSpec); + for (String current: input) { + // If the base URL is found in the string, crop it off for our DOI! + if (current.startsWith(baseUrl)) { + result.add(current.substring(baseUrl.length())); + } + } + + // If we found no matches, return null; otherwise, return our results: + return result.isEmpty() ? null : result; + } +} -- GitLab